Pandas library
Code for cleaning data
import pandas as pd
import matplotlib.pyplot as plt
import re
# --- قراءة البيانات من الملف---
with open(r"D:\Python data analysis\PVT properties\Log - G 02 - W 04.TXT", enc
content = file.read()
# استخراج قسمWIRELINE WELL LOGS
wireline_start = content.find("WIRELINE WELL LOGS")
wireline_text = content[wireline_start:]
lines = wireline_text.strip().splitlines()
data_lines = [line for line in lines if re.match(r'^\d', line.strip())]
# تحويل النص إلى جدول
columns = ["DEPTH", "GR", "SP", "Rt", "Rxo", "DEN", "SONIC", "CNL", "DIP", "AZIM
data = [re.split(r'\s{2,}', line.strip()) for line in data_lines]
df_log = pd.DataFrame(data, columns=columns)
# تحويل الأعمدة الرقمية
numeric_cols = df_log.columns.drop("MUDLOG")
df_log[numeric_cols] = df_log[numeric_cols].astype(float)
# --- إضافة تفسير الصخور بناًء علىGR ---
df_log["LITHOLOGY"] = df_log["GR"].apply(lambda x: "SANDSTONE" if x < 25 el
Pandas library 1
#for showing data
df_log
# --- الرسم---
fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(12, 10), sharey=True)
depth = df_log["DEPTH"]
# GR: لون مختلف حسب الليثولوجيا
for litho, color in zip(["SHALE", "SANDSTONE"], ["green", "orange"]):
subset = df_log[df_log["LITHOLOGY"] == litho]
axes[0].plot(subset["GR"], subset["DEPTH"], label=litho, color=color)
axes[0].set_xlabel("GR (API)")
axes[0].set_xlim(0, 150)
axes[0].set_title("Gamma Ray")
axes[0].grid()
axes[0].legend()
# Rt ()مقياس لوغاريتمي
axes[1].semilogx(df_log["Rt"], df_log["DEPTH"], color="red")
axes[1].set_xlabel("Rt (ohm.m)")
axes[1].set_xlim(0.2, 200)
axes[1].set_title("Resistivity")
axes[1].grid()
# Density
axes[2].plot(df_log["DEN"], df_log["DEPTH"], color="brown")
axes[2].set_xlabel("DEN (g/cc)")
axes[2].set_xlim(1.9, 2.7)
axes[2].set_title("Density")
axes[2].grid()
# Neutron Porosity
Pandas library 2
axes[3].plot(df_log["CNL"], df_log["DEPTH"], color="blue")
axes[3].set_xlabel("CNL (%)")
axes[3].set_xlim(0, 45)
axes[3].set_title("Neutron Porosity")
axes[3].grid()
# إعداد محور العمق
axes[0].set_ylabel("Depth (ft)")
axes[0].invert_yaxis()
plt.tight_layout()
plt.suptitle("Interpreted Wireline Log with Lithology", fontsize=14, y=1.02)
plt.show()
The output of the code
DEPTH GR SP Rt Rxo DEN SONIC CNL DIP AZIM MUDLOG
0 5745.0 91.8 -23.0 3.6 6.8 2.1 104.4 37.8 15.1 312.4 SHALE
1 5745.5 94.9 -20.3 3.7 6.5 2.1 107.6 39.8 17.0 313.1 SHALE
2 5746.0 94.5 -22.3 4.9 9.3 2.1 106.6 37.3 18.0 306.3 SHALE
3 5746.5 92.8 -20.1 3.8 6.8 2.1 106.8 39.1 19.4 320.0 SHALE
4 5747.0 93.6 -21.9 2.2 4.1 2.1 105.0 39.1 17.5 300.9 SHALE
... ... ... ... ... ... ... ... ... ... ... ...
1622 6556.0 32.8 -17.7 4.1 6.8 2.3 79.7 12.3 15.6 307.4 SANDSTONE
1623 6556.5 32.7 -19.0 4.1 7.0 2.4 78.4 12.1 20.0 325.8 SANDSTONE
1624 6557.0 32.6 -22.1 3.9 7.4 2.3 82.9 11.5 20.6 303.2 SANDSTONE
1625 6557.5 35.7 -20.4 4.1 7.4 2.5 83.0 11.5 9.9 328.7 SANDSTONE
1626 6558.0 32.4 -22.2 4.0 7.4 2.4 81.1 11.9 19.9 309.0 SANDSTONE
1627 rows × 11 columns
Some function of pandas library
Pandas library 3
# 🐼 Pandas Essential Functions with Purpose
# Importing pandas
import pandas as pd
# ------------------------------
# 🧾Basic DataFrame Exploration
# ------------------------------
df.head(n=5) # Shows first n rows (default is 5)
df.tail(n=5) # Shows last n rows
df.shape # Returns (rows, columns)
df.columns # Lists column names
df.index # Lists index range or values
df.info() # Summary: columns, non-nulls, datatypes
df.describe() # Descriptive stats for numeric columns
# ------------------------------
# 🧹Data Selection & Filtering
# ------------------------------
df['column'] # Select a single column
df[['col1', 'col2']] # Select multiple columns
df.loc[row, col] # Label-based selection
df.iloc[row, col] # Index-based selection
df[df['col'] > value] # Conditional filtering
# ------------------------------
# 🧼Data Cleaning
# ------------------------------
df.isnull() # Detect missing values
df.isnull().sum() # Count of missing values per column
df.dropna() # Remove missing rows
df.fillna(value) # Fill missing with value
Pandas library 4
df.duplicated() # Check for duplicate rows
df.drop_duplicates() # Remove duplicates
df.replace(a, b) # Replace values
# ------------------------------
# 🧱Data Manipulation
# ------------------------------
df.sort_values('col') # Sort by column
df.rename(columns={'old':'new'}) # Rename columns
df.set_index('col') # Set column as index
df.reset_index() # Reset index to default
df['new'] = df['col1'] + df['col2'] # Create new column
df.drop(['col1', 'col2'], axis=1) # Drop columns
# ------------------------------
# 🧮Aggregation & Grouping
# ------------------------------
df.sum() # Column-wise sum
df.mean() # Column-wise mean
df.count() # Non-NA count
df.groupby('col') # Group by column
df.groupby('col').mean() # Group and aggregate
# ------------------------------
# 🧰 Merging & Joining
# ------------------------------
pd.concat([df1, df2]) # Concatenate vertically
pd.merge(df1, df2, on='key') # Merge on common column
df1.join(df2, how='left') # Join by index
# ------------------------------
# 📤Input / Output
# ------------------------------
Pandas library 5
pd.read_csv('file.csv') # Load CSV
df.to_csv('file.csv') # Save to CSV
pd.read_excel('file.xlsx') # Load Excel file
df.to_excel('file.xlsx') # Save to Excel
# ------------------------------
# 📊 Visualization (with matplotlib)
# ------------------------------
df.plot() # Basic line plot
df['col'].hist() # Histogram
df.plot.scatter(x='col1', y='col2') # Scatter plot
# ------------------------------
# 🧪 Other Useful Functions
# ------------------------------
df.value_counts() # Frequency of unique values
df.nunique() # Count of unique values per column
df.apply(function) # Apply a function to each column/row
df.astype('int') # Change data type
Pandas library 6