### Python Cheatsheet for Data Science
#### Import Libraries:
import pandas as pd # Data analysis
import numpy as np # Numerical operations
import matplotlib.pyplot as plt # Plotting graphs
import seaborn as sns # Advanced plotting
import scipy.stats as stats # Statistical functions
#### Basic Data Operations:
# Create a DataFrame
df = pd.DataFrame({'col1': [1, 2, 3], 'col2': [4, 5, 6]})
# View the first few rows
df.head()
# Check data types of each column
df.dtypes
# Filter data
df[df['col1'] > 1]
# Group data by a column
df.groupby('col1').mean()
# Summarize data
df.describe()
#### Data Cleaning:
# Fill missing values with a specific value
df.fillna(value=0)
# Drop rows with missing values
df.dropna()
# Remove duplicates
df.drop_duplicates()
#### Plotting:
# Plotting a line chart
df.plot(x='col1', y='col2', kind='line')
# Histogram
df['col1'].hist()
# Boxplot
sns.boxplot(x='col1', data=df)
# Scatter plot
plt.scatter(df['col1'], df['col2'])
#### Statistical Analysis:
# Mean, Median, and Mode
df['col1'].mean()
df['col1'].median()
df['col1'].mode()
# Standard Deviation
df['col1'].std()
# Correlation
df.corr()
# T-test (for hypothesis testing)
stats.ttest_1samp(df['col1'], 0)
#### Working with Strings:
# Convert string to lowercase
df['col1'].str.lower()
# Check if a column contains a specific string
df['col1'].str.contains('substring')
# Replace a substring
df['col1'].replace('old', 'new')