Mastering Data Visualization Techniques
(Part 1)
Prepared by: Syed Afroz Ali
plt.figure(figsize = (8, 6))
plt.ticklabel_format(style = 'plain')
sns.barplot(x = heart["ChestPainType"], y = heart["Cholest
erol"], palette = "tab20");
sns.catplot(data = titanic , x ="Embarked" , y ="Age" , col
="Survived" , kind="bar" , ci =None)
plt.show()
wine.plot(kind='density', subplots=True, layout=(4,3), shar
ex=False, figsize= (14,8))
plt.show()
numeric_feature = titanic.dtypes!=object
final_numeric_feature = titanic.columns[numeric_feature].t
olist()
titanic[final_numeric_feature].plot(kind='density', subplots
=True, layout=(1,7), sharex=False, figsize= (20,4))
plt.show()
heart["ChestPainType"].value_counts()[:3].plot.pie(figsize
= (5, 5),
autopct = '%1.0f%%')
plt.title("Pie Chart")
plt.xticks(rotation = 90)
plt.show()
plt.pie(heart['ChestPainType'].value_counts(),labels=heart[
'ChestPainType'].unique(),autopct = '%1.2f%%');
plt.figure(figsize = (6, 4))
counts = heart["ChestPainType"].value_counts()
explode = (0, 0.1, 0.2, 0.3)
colors = ['#A85CF9', '#FF4949', '#BDF2D5', '#FF06B7', '#4B
7BE5', '#FF5D5D', '#FAC213', '#37E2D5', '#6D8B74', '#E9D5
CA']
counts.plot(kind = 'pie', fontsize = 12, colors = colors, expl
ode = explode, autopct = '%1.1f%%')
plt.axis('equal')
plt.legend(labels = counts.index, loc = "best")
plt.show()
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
my_circle=plt.Circle( (0,0), 0.9, color='white')
plt.pie(titanic['Embarked'].value_counts()[:10].values, label
s = titanic['Embarked'].value_counts()[:10].index)
p=plt.gcf()
p.gca().add_artist(my_circle)
plt.show()
my_circle=plt.Circle( (0,0), 0.9, color='pink')
plt.pie(titanic['Embarked'].value_counts()[:10].values, label
s = titanic['Embarked'].value_counts()[:10].index)
p=plt.gcf()
p.gca().add_artist(my_circle)
plt.show()
•• Join my WhatsApp Channel for the latest updates on AI:
https://www.whatsapp.com/channel/0029VavNSDO9mrGWYirxz40G
•• Join my LinkedIn group for the latest updates on Machine
Learning: https://www.linkedin.com/groups/7436898/
plt.figure(figsize = (7,4))
ax = sns.countplot(x=heart['RestingECG'])
for bars in ax.containers:
ax.bar_label(bars)
plt.title("Count of RestingECG", fontsize = 15,color='Blue');
# Visulazing the distibution of the data for every feature
plt.figure(figsize=(20, 8))
for i, column in enumerate(heart.columns, 1):
plt.subplot(2, 6, i)
heart[heart["HeartDisease"] == 0][column].hist(bins=35,
color='blue', label='Have Diabetes = NO', alpha=0.9)
heart[heart["HeartDisease"] == 1][column].hist(bins=35,
color='red', label='Have Diabetes = YES', alpha=0.5)
plt.legend()
plt.xlabel(column)
cat = ['Sex','RestingECG']
fig, ax = plt.subplots(1, 2, figsize = (10, 4))
for indx, (column, axes) in list(enumerate(list(zip(cat,
ax.flatten())))):
sns.countplot(ax = axes, x = heart[column], hue = heart[
'ExerciseAngina'],
palette = 'magma', alpha = 0.8)
else:
[axes.set_visible(False) for axes in ax.flatten()[indx + 1:]
]
plt.tight_layout()
plt.show()
plt.figure(figsize=(11,5))
plt.gcf().text(.55, .95, "Box Plot", fontsize = 40, color='Red'
,ha='center', va='center')
sns.boxenplot(x=heart['RestingECG'] , y = heart['Cholester
ol'],palette="Set1")
plt.show()
# Facet along the columns to show a categorical variable
using "col" parameter
plt.figure(figsize=(11,5))
sns.catplot(x="ChestPainType" , y = "MaxHR", hue= "Sex",
col="RestingECG", kind="boxen",palette="Set2" , h
eight=5, aspect=1 ,data=heart)
plt.show();
plt.figure(figsize=(12,5))
params = dict(data=titanic ,x = titanic.Pclass ,y = titanic.F
are ,hue=titanic.Sex,dodge=True)
sns.stripplot(**params , size=8,jitter=0.35,palette=['#33FF
66','#FF6600','Blue'],edgecolor='black',linewidth=1)
sns.boxplot(**params ,palette=['#BDBDBD','#E0E0E0'],line
width=6)
plt.show()
# Plot a subset of variables
g = sns.PairGrid(titanic, hue='Pclass' ,x_vars=["Fare" , "Ag
e"],y_vars=["Fare" , "Age"],
height=6, aspect=1)
g = g.map_offdiag(plt.scatter , edgecolor="w", s=130)
g = g.map_diag(plt.hist , edgecolor ='w', linewidth=2)
g = g.add_legend()
plt.show()
•• Join my WhatsApp Channel for the latest updates on AI:
https://www.whatsapp.com/channel/0029VavNSDO9mrGWYirxz40G
features_mean= list(wine.columns[:6])
num_rows, num_cols = 3,2
fig, axes = plt.subplots(num_rows, num_cols, figsize=(20, 8
))
fig.tight_layout()
for index, column in enumerate(wine[features_mean].colu
mns):
i,j = (index // num_cols, index % num_cols)
g = sns.distplot(wine[column], color="m", label="%.2f"%
(wine[column].skew()), ax=axes[i,j])
g = g.legend(loc="best")
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
y = heart['Sex']
# Explore Age distibution
g = sns.kdeplot(heart["Age"][(y == 'M') & (heart["Age"].not
null())], color="Red", shade=True)
g = sns.kdeplot(heart["Age"][(y == 'F') & (heart["Age"].notn
ull())], ax=g, color="Blue", shade=True)
g.set_xlabel("Age")
g.set_ylabel("Frequency")
g = g.legend(["Male","Female"])
raw_df = heart [['Age', 'Sex', 'ChestPainType', 'RestingBP', 'Cholester
ol', 'FastingBS',
'RestingECG', 'MaxHR', 'ExerciseAngina', 'Oldpeak', 'ST_Slope',
'HeartDisease']]
# Function to print width of barcharts on the bars
def barw(ax):
for p in ax.patches:
val = p.get_width() #height of the bar
x = p.get_x()+ p.get_width() # x- position
y = p.get_y() + p.get_height()/2 #y-position
ax.annotate(round(val,2),(x,y))
plt.figure(figsize=(10,5))
ax0 = sns.countplot(data = heart, y ='ChestPainType', order = heart['
ChestPainType'].value_counts().index)
barw(ax0)
plt.show()
sns.set_style('white')
wine.plot(kind="scatter", x="fixed acidity", y="total sulfur
dioxide", alpha=.5,
s=wine["alcohol"], label="alcohol", figsize=(10,7),
c="chlorides", cmap=plt.get_cmap("jet"), colorbar=
True,
sharex=False)
plt.legend()
plt.show()
#Correlation with Response Variable class
X = wine.drop(['quality'], axis=1)
y = wine['quality']
X.corrwith(y).plot.bar(figsize=(16, 4), rot=90, grid=True)
plt.title('Correlation with quality',
fontsize=30,
color='Red',
font='Times New Roman')
plt.show()
import matplotlib
matplotlib.rcParams.update({'font.size': 20})
ax=heart['Sex'].value_counts().plot.pie(explode=[0.1, 0.1],autopct='
%1.2f%%',shadow=True);
ax.set_title(label = "Sex", fontsize = 40,color='DarkOrange',font='Luc
ida Calligraphy');
plt.legend(labels=['M','F'])
plt.axis('off');
•• Join my WhatsApp Channel for the latest updates on AI:
https://www.whatsapp.com/channel/0029VavNSDO9mrGWYirxz40G
matplotlib.rcParams.update({'font.size': 10})
corr = wine.corr()
mask = np.triu(np.ones_like(corr, dtype=bool))
plt.title('Correlation Analysis',
fontsize=25,
color='DarkGreen',
font='Times New Roman')
sns.heatmap(corr,
mask=mask,
annot=True,
lw=0,
linecolor='white',
cmap='viridis',
fmt="0.2f")
plt.xticks(rotation=90)
plt.yticks(rotation=0)
plt.show()
#set configuration for charts
plt.rcParams["figure.figsize"]=[20 , 5]
plt.rcParams["font.size"]=15
plt.rcParams["legend.fontsize"]="medium"
plt.rcParams["figure.titlesize"]="medium"
def plot_disribution(data , x ,color,bins ):
mean = data[x].mean()
std = data[x].std()
info=dict(data = data , x = x , color = color)
plt.subplot(1 , 3 , 1 , title =f"Ditstribution of {x} column")
sns.distplot(a=data[x] , bins = bins)
plt.xlabel(f"bins of {x}")
plt.axvline(mean , label ="mean" , color ="red")
plt.ylabel("frequency")
plt.legend(["${\sigma}$ = %d"%std , f"mean = {mean:.2f}"])
plt.title(f"histogram of {x} column")
plt.subplot(1 , 3 , 2)
sns.boxplot(**info)
plt.xlabel(f"{x}")
plt.title(f"box plot of {x} column")
plt.subplot(1 , 3 , 3)
sns.swarmplot(**info)
plt.xlabel(f"{x}")
plt.title(f"distribution of points in {x} column")
plt.suptitle(f"Distribution of {x} column" , fontsize =15 , color="red
")
plt.show()
age_bins = np.arange(29 , 77+5 , 5)
base_color = sns.color_palette()[4]
plot_disribution(data = heart , x ="Age" , color = base_color , bins=ag
e_bins)
sns.set_style("white")
sns.set_context("poster",font_scale = .7)
palette = ["#1d7874","#679289","#f4c095","#ee2e31","#ffb563","#91
8450","#f85e00","#a41623","#9a031e","#d6d6d6","#ffee32","#ffd100
","#333533","#202020"]
# sns.palplot(sns.color_palette(palette))
# plt.show()
plt.subplots(figsize=(20,8))
p = sns.barplot(x=heart["ChestPainType"][:14],y=heart["Age"],palett
e=palette, saturation=1, edgecolor = "#1c1c1c", linewidth = 2)
p.axes.set_title("\n ChestPainType \n", fontsize=25)
plt.ylabel("Total Member" , fontsize = 20)
plt.xlabel("\n Name" , fontsize = 20)
# plt.yscale("log")
plt.xticks(rotation = 90)
for container in p.containers:
p.bar_label(container,label_type = "center",padding = 6,size = 15,c
olor = "black",rotation = 90,
bbox={"boxstyle": "round", "pad": 0.6, "facecolor": "orange", "edg
ecolor": "black", "alpha": 1})
sns.despine(left=True, bottom=True)
plt.show()
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
fig,axis=plt.subplots(ncols=4,nrows=3,figsize=(15,10))
index=0
axis=axis.flatten()
for col,values in wine.items():
sns.boxplot(y=col,data=wine,color='r',ax=axis[index])
index+=1
plt.tight_layout(pad=0.5,w_pad=0.7,h_pad=5.0);
#checking the target variables for distribution
sns.distplot(heart['Cholesterol'],color='Brown')
plt.axvline(x=heart['Cholesterol'].mean(), color='Brown', lin
estyle='--', linewidth=2)
plt.title('Cholesterol');
s = sns.countplot(x = 'quality',data = wine)
sizes=[]
for p in s.patches:
height = p.get_height()
sizes.append(height)
s.text(p.get_x()+p.get_width()/2.,
height + 3,
'{:1.2f}%'.format(height/len(wine)*100),
ha="center", fontsize=14)
countfeature = ["Survived", "Pclass", "Sex", "SibSp", "Parc
h", "Embarked"]
countlist = list(enumerate(countfeature))
plt.figure(figsize = (20,10))
plt.suptitle("Countplot of Categorical Features", fontsize=1
8)
for i in countlist:
plt.subplot(2,3,i[0]+1)
sns.countplot(data = titanic, x = i[1], hue = "Survived", p
alette="rainbow")
plt.ylabel("")
plt.legend(['Not Survived', 'Survived'], loc='upper center'
, prop={'size': 10})
plt.tight_layout()
plt.show()
numfeature = ["Age", "Fare"]
enumfeat = list(enumerate(numfeature))
plt.figure(figsize=(20,7))
plt.suptitle("Distribution and Outliers of Numerical Data", fontsize=2
0)
for i in enumfeat:
plt.subplot(1,4,i[0]+1)
sns.boxplot(data = titanic[i[1]], palette="rainbow")
plt.xlabel(str(i[1]))
for i in enumfeat:
plt.subplot(1,4,i[0]+3)
sns.histplot(data = titanic[i[1]], palette="rainbow", bins=15)
plt.xlabel(str(i[1]))
plt.tight_layout()
plt.show()
•• Join my WhatsApp Channel for the latest updates on AI:
https://www.whatsapp.com/channel/0029VavNSDO9mrGWYirxz40G
plt.figure(figsize=(20,6))
plt.title("Correlation of quality column with Independent Features", f
ontsize=15)
corr = wine.corr()["quality"].sort_values(ascending=False)[1:]
sns.barplot(x=corr.index, y=corr, color=(0.90,0.30,0.50))
plt.tight_layout()
plt.xticks(rotation = 90)
plt.show()
plt.figure(figsize=(15,5))
plt.suptitle("Probability Distribution of numerical columns
according to number of Survived", fontsize = 20)
for i in enumfeat:
plt.subplot(1,2,i[0]+1)
sns.kdeplot(data=titanic, x=i[1], hue="Survived")
plt.tight_layout()
plt.show()
import missingno as msno
msno.matrix(titanic, color=(0.50,0.30,0.80))
plt.show()
x = titanic.isnull().sum()
for a, b in x.items():
if b > 0:
print(f"There are {b} missing values in column: {a}")
Mastering Data Visualization Techniques
(Part 2)
Prepared by: Syed Afroz Ali
Feature Importance Visualization
import numpy as np
import matplotlib.pyplot as plt
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
plt.style.use('seaborn-white')
# Create train and test splits
target_name = 'quality'
X = wine.drop('quality', axis=1)
label=wine[target_name]
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
X_train, X_test, y_train, y_test = train_test_split(X,label,test_size=0.2,
random_state=42, stratify=label)
# Build a classification task using 3 informative features
tree = tree.DecisionTreeClassifier(
class_weight='balanced',
min_weight_fraction_leaf = 0.01
)
tree = tree.fit(X_train, y_train)
importances = tree.feature_importances_
feature_names = wine.drop('quality', axis=1).columns
indices = np.argsort(importances)[::-1]
# Print the feature ranking
for f in range(X_train.shape[1]):
print("%d. feature %d (%f)" % (f + 1, indices[f],
importances[indices[f]]))
# Plot the feature importances of the forest
def feature_importance_graph(indices, importances, feature_names):
plt.figure(figsize=(12,6))
plt.title("Determining Feature importances \n with
DecisionTreeClassifier", fontsize=18)
plt.barh(range(len(indices)), importances[indices],
color='#31B173', align="center")
plt.yticks(range(len(indices)), feature_names[indices],
rotation='horizontal',fontsize=14)
plt.ylim([-1, len(indices)])
plt.axhline(y=1.0, xmin=0.65, xmax=0.952, color='red', linewidth=3,
linestyle='--')
plt.text(0.19, 2.8, '46% Difference between \n duration and
contacts', color='Blue', fontsize=15)
feature_importance_graph(indices, importances, feature_names)
plt.show()
# Visulazing the distibution of the data for every feature
plt.figure(figsize=(20, 20))
for i, column in enumerate(df.columns, 1):
plt.subplot(3, 3, i)
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
df[df["Outcome"] == 0] [column].hist(bins=35, color='blue',
label='Have Diabetes = NO', alpha=0.6)
df[df["Outcome"] == 1] [column].hist(bins=35, color='red',
label='Have Diabetes = YES', alpha=0.6)
plt.legend()
plt.xlabel(column)
from yellowbrick.classifier import ConfusionMatrix
from yellowbrick.classifier import ClassPredictionError
from yellowbrick.classifier import ROCAUC
from yellowbrick.style import set_palette
from statsmodels.graphics.gofplots import qqplot
# --- Variable, Color & Plot Size ---
var = titanic['Fare']
color = color_mix[2]
fig = plt.figure(figsize = (14, 10))
# --- Skewness & Kurtosis ---
print('\033[35m\033[1m'+'.: Sepal Length Skewness & Kurtosis
:.'+'\033[0m')
print('*' * 40)
•• Join my WhatsApp Channel for the latest updates on AI:
https://www.whatsapp.com/channel/0029VavNSDO9mrGWYirxz40G
print('Skewness:'+'\033[35m\033[1m {:.3f}'.format(var.skew(axis = 0,
skipna = True)))
print('\033[0m'+'Kurtosis:'+'\033[35m\033[1m {:.3f}'.format(var.kurt(axis
= 0, skipna = True)))
print('\n')
# --- General Title ---
fig.suptitle('Sepal Length Distribution', fontweight = 'bold', fontsize =
16, fontfamily = 'sans-serif',
color = black_grad[0])
fig.subplots_adjust(top = 0.9)
# --- Histogram ---
ax_1=fig.add_subplot(2, 2, 2)
plt.title('Histogram Plot', fontweight = 'bold', fontsize = 14, fontfamily =
'sans-serif', color = black_grad[1])
sns.histplot(data = titanic, x = var, kde = True, color = color)
plt.xlabel('Total', fontweight = 'regular', fontsize = 11, fontfamily =
'sans-serif', color = black_grad[1])
plt.ylabel('Sepal Length', fontweight = 'regular', fontsize = 11,
fontfamily = 'sans-serif', color = black_grad[1])
plt.grid(axis = 'x', alpha = 0)
plt.grid(axis = 'y', alpha = 0.2)
# --- Q-Q Plot ---
ax_2 = fig.add_subplot(2, 2, 4)
plt.title('Q-Q Plot', fontweight = 'bold', fontsize = 14, fontfamily = 'sans-
serif', color = black_grad[1])
qqplot(var, fit = True, line = '45', ax = ax_2, markerfacecolor = color,
markeredgecolor = color, alpha = 0.6)
plt.xlabel('Theoritical Quantiles', fontweight = 'regular', fontsize = 11,
fontfamily = 'sans-serif',
color = black_grad[1])
plt.ylabel('Sample Quantiles', fontweight = 'regular', fontsize = 11,
fontfamily = 'sans-serif', color = black_grad[1])
plt.grid(axis = 'both', alpha = 0.2)
# --- Boxen Plot ---
ax_3 = fig.add_subplot(1, 2, 1)
plt.title('Boxen Plot', fontweight = 'bold', fontsize = 14, fontfamily =
'sans-serif', color = black_grad[1])
sns.boxenplot(y = var, data = titanic, color = color, linewidth = 1.5)
plt.ylabel('Sepal Length', fontweight = 'regular', fontsize = 11,
fontfamily = 'sans-serif', color = black_grad[1])
plt.grid(axis = 'y', alpha = 0.2)
plt.show();
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
from yellowbrick.model_selection import LearningCurve,
FeatureImportances
from sklearn.metrics import
accuracy_score,precision_recall_curve
# --- Applying Logistic Regression ---
LRclassifier = LogisticRegression(solver='liblinear')
LRclassifier.fit(X_train, y_train)
y_pred_LR = LRclassifier.predict(X_test)
# --- LR Accuracy ---
LRAcc = accuracy_score(y_pred_LR, y_test)
print('.:. Logistic Regression Accuracy:'+'\033[35m\033[1m
{:.2f}%'.format(LRAcc*100)+' \033[0m.:.')
# --- LR Classification Report ---
print('\033[35m\033[1m\n.: Classification Report'+'\033[0m')
print('*' * 25)
print(classification_report(y_test, y_pred_LR))
# --- Performance Evaluation ---
print('\033[35m\n\033[1m'+'.: Performance
Evaluation'+'\033[0m')
print('*' * 26)
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize = (14, 10))
# --- LR Confusion Matrix ---
logmatrix = ConfusionMatrix(LRclassifier, ax=ax1,
cmap='RdPu', title='Logistic Regression Confusion Matrix')
logmatrix.fit(X_train, y_train)
logmatrix.score(X_test, y_test)
logmatrix.finalize()
# --- LR ROC AUC ---
logrocauc = ROCAUC(LRclassifier, ax = ax2, title = 'Logistic
Regression ROC AUC Plot')
logrocauc.fit(X_train, y_train)
logrocauc.score(X_test, y_test)
logrocauc.finalize()
# --- LR Learning Curve ---
loglc = LearningCurve(LRclassifier, ax = ax3, title = 'Logistic
Regression Learning Curve')
loglc.fit(X_train, y_train)
loglc.finalize()
plt.tight_layout();
cat = ['Sex','Embarked']
sns.set_theme(rc = {'figure.dpi': 100, 'axes.labelsize': 7,
'axes.facecolor': '#f0eee9', 'grid.color': '#fffdfa',
'figure.facecolor': '#e8e6e1'}, font_scale = 0.55)
fig, ax = plt.subplots(5, 2, figsize = (7, 18))
for indx, (column, axes) in list(enumerate(list(zip(cat,
ax.flatten())))):
sns.countplot(ax = axes, x = titanic[column], hue = titanic['Pclass'],
palette = 'magma', alpha = 0.8)
else:
[axes.set_visible(False) for axes in ax.flatten()[indx + 1:]]
plt.tight_layout()
plt.show()
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
num = wine.select_dtypes(include="number")
fig, ax = plt.subplots(14, 1, figsize = (7, 30))
for indx, (column, axes) in list(enumerate(list(zip(num, ax.flatten())))):
sns.scatterplot(ax = axes, y = wine[column].index, x =
wine[column],hue = wine['total sulfur dioxide'],
palette = 'magma', alpha = 0.8)
else:
[axes.set_visible(False) for axes in ax.flatten()[indx + 1:]]
plt.tight_layout()
plt.show()
num = wine.select_dtypes(include="number")
fig, ax = plt.subplots(12, 1, figsize = (14, 35))
for indx, (column, axes) in list(enumerate(list(zip(num, ax.flatten())))):
sns.histplot(ax = axes, x = wine[column],hue = wine['quality'],
palette = 'magma', alpha = 0.8, multiple = 'stack')
legend = axes.get_legend() # sns.hisplot has some issues with
legend
handles = legend.legendHandles
legend.remove()
axes.legend(handles, ['0', '1'], title = 'Survived', loc = 'upper right')
Quantiles = np.quantile(wine[column], [0, 0.25, 0.50, 0.75, 1])
for q in Quantiles: axes.axvline(x = q, linewidth = 0.5, color =
'r')
plt.tight_layout()
plt.show()
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
cat = ['Sex','Embarked']
fig, ax = plt.subplots(5, 2, figsize = (6.5, 10))
for indx, (column, axes) in list(enumerate(list(zip(cat, ax.flatten())))):
sns.violinplot(ax = axes, x = titanic[column],
y = titanic['Fare'],
scale = 'width', linewidth = 0.5,
palette = 'magma', inner = None)
plt.setp(axes.collections, alpha = 0.3)
sns.stripplot(ax = axes, x = titanic[column],
y = titanic['Fare'],
palette = 'magma', alpha = 0.9,
s = 1.5, jitter = 0.07)
sns.pointplot(ax = axes, x = titanic[column],
y = titanic['Fare'],
color = '#ff5736', scale = 0.25,
estimator = np.mean, ci = 'sd',
errwidth = 0.5, capsize = 0.15, join = True)
plt.setp(axes.lines, zorder = 100)
plt.setp(axes.collections, zorder = 100)
else:
[axes.set_visible(False) for axes in ax.flatten()[indx + 1:]]
plt.tight_layout()
plt.show()
sns.set(rc={"axes.facecolor":"#a1c45a" , "axes.grid" : False})
plt.figure(figsize=(11,6))
plt.gcf().text(.51, .84, "Box Plot", fontsize = 40, color='Black' ,ha='center',
va='center')
sns.boxenplot(x=titanic['Embarked'] , y = titanic['Fare'],palette="Set1")
plt.show()
# Facet along the columns to show a categorical variable using "col"
plt.figure(figsize=(11,7))
sns.catplot(x="Embarked" , y = "Fare", hue= "Pclass",
col="Sex", kind="boxen",palette="Set2" , height=8, aspect=1
,data=titanic)
plt.show();
plt.figure(figsize=(16,7))
sns.set(rc={"axes.facecolor":"#b0deff","axes.grid":False,
'xtick.labelsize':15,'ytick.labelsize':15,
'axes.labelsize':20,'figure.figsize':(20.0, 9.0)})
params = dict(data=titanic ,x = titanic.Pclass ,y = titanic.Fare
,hue=titanic.Pclass,dodge=True)
sns.stripplot(**params ,
size=8,jitter=0.35,palette=['#33FF66','#FF6600'],edgecolor='black',linewidth=1)
sns.boxplot(**params ,palette=['#BDBDBD','#E0E0E0'],linewidth=6)
plt.show()
•• Join my WhatsApp Channel for the latest updates on AI:
https://www.whatsapp.com/channel/0029VavNSDO9mrGWYirxz40G
# Plot a subset of variables
g = sns.PairGrid(titanic, hue='Pclass' ,x_vars=["Fare" , "Age"],y_vars=["Fare" ,
"Age"],
height=6, aspect=1)
g = g.map_offdiag(plt.scatter , edgecolor="w", s=130)
g = g.map_diag(plt.hist , edgecolor ='w', linewidth=2)
g = g.add_legend()
plt.show()
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
df = pd.read_csv("winequality-red.csv")
features_mean= list(df.columns[:10])
num_rows, num_cols = 5,2
fig, axes = plt.subplots(num_rows, num_cols, figsize=(25, 12))
fig.tight_layout()
for index, column in enumerate(df[features_mean].columns):
i,j = (index // num_cols, index % num_cols)
g = sns.distplot(df[column], color="m", label="%.2f"%(df[column].skew()),
ax=axes[i,j])
g = g.legend(loc="best")
y = df['Sex']
# Explore Age distibution
g = sns.kdeplot(df["Age"][(y == 'male') & (df["Age"].notnull())], color="Red",
shade=True)
g = sns.kdeplot(df["Age"][(y == 'female') & (df["Age"].notnull())], ax=g,
color="Blue", shade=True)
g.set_xlabel("radius_mean")
g.set_ylabel("Frequency")
g = g.legend(["Not Survived","Survived"])
df = pd.read_csv("winequality-red.csv")
features_mean= list(df.columns[:10])
df_b = df[df['quality'] == 5]
df_m = df[df['quality'] == 6]
num_rows, num_cols = 5,2
fig, axes = plt.subplots(num_rows, num_cols, figsize=(25, 12))
fig.tight_layout()
for index, column in enumerate(df[features_mean].columns):
i,j = (index // num_cols, index % num_cols)
g = sns.kdeplot(df_b[column], color="Red", shade=True,
ax=axes[i,j])
g = sns.kdeplot(df_m[column], ax=g, color="Blue", shade=True)
g.set_xlabel(column)
g = g.legend(["Benign","Malignant"])
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
raw_df = raw_df [['name', 'year', 'selling_price', 'km_driven',
'fuel', 'seller_type',
'transmission', 'owner']]
# Function to print width of barcharts on the bars
def barw(ax):
for p in ax.patches:
val = p.get_width() #height of the bar
x = p.get_x()+ p.get_width() # x- position
y = p.get_y() + p.get_height()/2 #y-position
ax.annotate(round(val,2),(x,y))
plt.figure(figsize=(10,5))
ax0 = sns.countplot(data = raw_df, y ='owner', order =
raw_df['owner'].value_counts().index)
barw(ax0)
plt.show()
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
•• Join my WhatsApp Channel for the latest updates on AI:
https://www.whatsapp.com/channel/0029VavNSDO9mrGWYirxz40G
raw_df = pd.read_csv('datasets_33080_1320127_CAR DETAILS FROM
CAR DEKHO.csv')
raw_df = raw_df [['name', 'year', 'selling_price', 'km_driven', 'fuel',
'seller_type',
'transmission', 'owner']]
df_gc = raw_df.groupby('owner').mean()
df_gc.reset_index(inplace= True)
df_gc[['owner','selling_price']].sort_values('selling_price', ascending
=False)
plt.figure(figsize=(10,5))
ax1 = sns.barplot(data = raw_df, x='selling_price', y ='owner', order =
df_gc.sort_values('selling_price',ascending =False)['owner'], ci =None)
barw(ax1)
plt.show()
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
sns.set_style('white')
df.plot(kind="scatter", x="fixed acidity", y="total sulfur dioxide",
alpha=.5,
s=df["alcohol"]/10, label="alcohol", figsize=(10,7),
c="chlorides", cmap=plt.get_cmap("jet"), colorbar=True,
sharex=False)
plt.legend()
plt.show()
matplotlib.rcParams.update({'font.size': 20})
corr = heart.corr()
mask = np.triu(np.ones_like(corr, dtype=bool))
plt.figure(dpi=100)
plt.title('Correlation Analysis',
fontsize=25,
color='DarkOrange',
font='Lucida Calligraphy')
sns.heatmap(corr,
mask=mask,
annot=True,
lw=0,
linecolor='white',
cmap='viridis',
fmt="0.2f")
plt.xticks(rotation=90)
plt.yticks(rotation=0)
plt.show()
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
matplotlib.rcParams.update({'font.size': 40})
ax=heart['Sex'].value_counts().plot.pie(explode=[0.1,
0.1],autopct='%1.2f%%',shadow=True);
ax.set_title(label = "Sex", fontsize =
40,color='DarkOrange',font='Lucida Calligraphy');
plt.legend(labels=['M','F'])
plt.axis('off');
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
•• Join my WhatsApp Channel for the latest updates on AI:
https://www.whatsapp.com/channel/0029VavNSDO9mrGWYirxz40G
heart["age_bins"]= pd.cut(heart["Age"] , bins=[29 , 40 , 50 , 60
, 80] , labels=["adult" , "fortieth" , "old" , "ancient"] )
def count_plot(data , x=None , y=None , figsize =None , title
=None , color =None , prop=False , rotation_x =0 ):
if x is None and y is None :
raise("Expected y or x")
if x is not None and y is not None:
raise("Expected y or x not both")
count_type = data[y if x is None else
x].value_counts(ascending =False)
Sum = count_type.sum()
type_order = count_type.index
plt.figure(figsize=figsize if figsize is None else (12 , 7))
if x is None:
sns.countplot(data = data , y=y , color = color
,order=type_order)
if prop==True:
for i in range(len(count_type)):
count = count_type[i]
pct_string ="{:0.1f}%".format(100*count/Sum)
plt.text(count+1 , i , pct_string , va="center")
if prop==False:
for i in range(len(count_type)):
count = count_type[i]
pct_string ="{}".format(count)
plt.text(count+1 , i , pct_string , va="center")
plt.title(title)
plt.show()
if y is None :
sns.countplot(data = data , x = x , color = color , order =
type_order)
locs , labels =plt.xticks(rotation = rotation_x)
if prop == True :
for loc , label in zip(locs , labels):
count = count_type[label.get_text()]
pct_string ="{:0.1f}%".format(100*count/Sum)
plt.text(loc , count+2 ,pct_string,ha ="center")
if prop==False :
for loc , label in zip(locs , labels):
count = count_type[label.get_text()]
pct_string ="{}".format(count)
plt.text(loc , count+2 ,pct_string,ha ="center")
plt.title(title)
plt.show()
count_plot(data = heart , x ="age_bins")
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
heart.rename(columns={"target":"have disease"} , inplace=True)
heart.replace({1:"yes" , 0:"no"} , inplace =True)
count_plot(data = heart , x ="HeartDisease")
count_plot(data = heart , x ="HeartDisease" , prop=True)
from sklearn.metrics import confusion_matrix
# 4697: no's, 4232: yes
conf_matrix = confusion_matrix(y_train, y_train_pred)
f, ax = plt.subplots(figsize=(12, 8))
sns.heatmap(conf_matrix, annot=True, fmt="d", linewidths=.5,
ax=ax)
plt.title("Confusion Matrix", fontsize=20)
plt.subplots_adjust(left=0.15, right=0.99, bottom=0.15,
top=0.99)
ax.set_yticks(np.arange(conf_matrix.shape[0]) + 0.5,
minor=False)
ax.set_xticklabels("")
ax.set_yticklabels(['Refused T. Deposits', 'Accepted T.
Deposits'], fontsize=16, rotation=360)
plt.show()
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
lr = LogisticRegression()
lr.fit(X_train,y_train)
y_pred_lr = lr.predict(X_test)
confusion_matrix(y_test,y_pred_lr)
def make_confusion_matrix(cf,
group_names=None,
categories='auto',
count=True,
percent=True,
cbar=True,
xyticks=True,
xyplotlabels=True,
sum_stats=True,
figsize=None,
cmap='Blues',
title=None):
# CODE TO GENERATE TEXT INSIDE EACH SQUARE
blanks = ['' for i in range(cf.size)]
if group_names and len(group_names)==cf.size:
group_labels = ["{}\n".format(value) for value in group_names]
else:
group_labels = blanks
if count:
group_counts = ["{0:0.0f}\n".format(value) for value in cf.flatten()]
else:
group_counts = blanks
if percent:
group_percentages = ["{0:.2%}".format(value) for value in cf.flatten()/np.sum(cf)]
else:
group_percentages = blanks
box_labels = [f"{v1}{v2}{v3}".strip() for v1, v2, v3 in
zip(group_labels,group_counts,group_percentages)]
box_labels = np.asarray(box_labels).reshape(cf.shape[0],cf.shape[1])
# CODE TO GENERATE SUMMARY STATISTICS & TEXT FOR SUMMARY STATS
if sum_stats:
#Accuracy is sum of diagonal divided by total observations
accuracy = np.trace(cf) / float(np.sum(cf))
#if it is a binary confusion matrix, show some more stats
if len(cf)==2:
#Metrics for Binary Confusion Matrices
precision = cf[1,1] / sum(cf[:,1])
recall = cf[1,1] / sum(cf[1,:])
f1_score = 2*precision*recall / (precision + recall)
stats_text = "\n\nAccuracy={:0.3f}\nPrecision={:0.3f}\nRecall={:0.3f}\nF1
Score={:0.3f}".format(
accuracy,precision,recall,f1_score)
else:
stats_text = "\n\nAccuracy={:0.3f}".format(accuracy)
else:
stats_text = ""
# SET FIGURE PARAMETERS ACCORDING TO OTHER ARGUMENTS
if figsize==None:
#Get default figure size if not set
figsize = plt.rcParams.get('figure.figsize')
if xyticks==False:
#Do not show categories if xyticks is False
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
categories=False
# MAKE THE HEATMAP VISUALIZATION
fig = plt.figure(figsize=figsize)
fig.patch.set_facecolor('#f5f6f6')
sns.heatmap(cf,annot=box_labels,fmt="",linewidths = 1,square = True,linecolor=
'#f5f6f6',
cmap=cmap,cbar=cbar,annot_kws={'fontfamily':'serif','size':18,'weight':'bold'},
xticklabels=categories,
yticklabels=categories,)
if xyplotlabels:
plt.ylabel('True label', **{'fontfamily':'serif','size':12,'weight':'bold'})
plt.xlabel('Predicted label' + stats_text,**{'fontfamily':'serif','size':12,'weight':'bold'})
else:
plt.xlabel(stats_text,**{'fontfamily':'serif','size':12,'weight':'bold'})
vani_cf_matrix = confusion_matrix(y_test,y_pred_lr)
my_cols = [colors[3],colors[2]]
labels = [ 'True Neg','False Pos','False Neg','True Pos']
categories = ['Cat', 'Dog']
make_confusion_matrix(vani_cf_matrix,figsize = (10,5),group_names=labels,cbar =
False,cmap = 'magma',categories=categories,
title = 'Vanila CNN comfusion matrix')
plt.show()
•• Join my WhatsApp Channel for the latest updates on AI:
https://www.whatsapp.com/channel/0029VavNSDO9mrGWYirxz40G
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
sns.set_style("white")
sns.set_context("poster",font_scale = .7)
palette =
["#1d7874","#679289","#f4c095","#ee2e31","#ffb563","#918450","#f85e00","#a
41623","#9a031e","#d6d6d6","#ffee32","#ffd100","#333533","#202020"]
# sns.palplot(sns.color_palette(palette))
# plt.show()
plt.subplots(figsize=(20,8))
p = sns.barplot(x=dataset["Pclass"][:14],y=dataset["Age"],palette=palette,
saturation=1, edgecolor = "#1c1c1c", linewidth = 2)
p.axes.set_title("\nTop Anime Community\n", fontsize=25)
plt.ylabel("Total Member" , fontsize = 20)
plt.xlabel("\nAnime Name" , fontsize = 20)
# plt.yscale("log")
plt.xticks(rotation = 90)
for container in p.containers:
p.bar_label(container,label_type = "center",padding = 6,size = 15,color =
"black",rotation = 90,
bbox={"boxstyle": "round", "pad": 0.6, "facecolor": "orange", "edgecolor":
"black", "alpha": 1})
sns.despine(left=True, bottom=True)
plt.show()
numfeature = ["Age", "Fare"]
enumfeat = list(enumerate(numfeature))
plt.figure(figsize=(20,9))
plt.suptitle("Distribution and Outliers of Numerical Data", fontsize=20)
for i in enumfeat:
plt.subplot(1,4,i[0]+1)
sns.boxplot(data = train[i[1]], palette="rainbow")
plt.xlabel(str(i[1]))
for i in enumfeat:
plt.subplot(1,4,i[0]+3)
sns.histplot(data = train[i[1]], palette="rainbow", bins=15)
plt.xlabel(str(i[1]))
plt.tight_layout()
plt.show()
plt.figure(figsize=(15,12))
plt.suptitle("Distribution & Kernel Density Estimation of Numerical
Features", fontsize=20)
for i in enumfeat:
plt.subplot(2,1,i[0]+1)
sns.histplot(x = train[i[1]], kde=True, bins=30,
color=(0.50,0.20,0.70))
plt.tight_layout()
plt.show()
plt.figure(figsize=(6,8))
plt.title("Correlation of Survival column with Independent Features",
fontsize=15)
corr = train.corr()["Survived"].sort_values(ascending=False)[1:]
sns.barplot(x=corr.index, y=corr, color=(0.90,0.30,0.50))
plt.tight_layout()
plt.show()
plt.figure(figsize=(15,5))
plt.suptitle("Probability Distribution of numerical columns according to number
of Survived", fontsize = 20)
for i in enumfeat:
plt.subplot(1,2,i[0]+1)
sns.kdeplot(data=train, x=i[1], hue="Survived")
plt.tight_layout()
plt.show()
import missingno as msno
msno.matrix(train, color=(0.50,0.30,0.80))
plt.show()
x = train.isnull().sum()
for a, b in x.items():
if b > 0:
print(f"There are {b} missing values in column: {a}")
rc = {'figure.dpi': 150, 'axes.labelsize': 4,
'axes.facecolor': '#F6C90E', 'grid.color': 'Red','figure.figsize':(12,5),
'figure.facecolor': '#F6C90E'}
sns.set_theme(context='notebook',
style='dark',
palette='deep',
font='Comic Sans Ms',
font_scale=1,
color_codes='red',
rc=rc)
color = ['Green',"Red"]
df.groupby('Sex')['Medal'].count().sort_values(ascending=True).plot(kind="bar",
color=color,alpha=.5);
plt.title("Sex Vs Medalse",fontsize=17,color='Brown',font='Comic Sans
Ms',pad=20);
plt.xlabel("Sex ",fontsize=15,color='#1a4441',font='Comic Sans Ms')
plt.ylabel("Number of Medals",fontsize=15,color='#1a4441',font='Comic Sans
Ms');
plt.legend(loc='best');
plt.savefig('world regions.png');
region_medal=df.groupby('region')['Medal'].count().nlargest(20).reset_index()
region_medal.head()
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
•• Join my WhatsApp Channel for the latest updates on AI:
https://www.whatsapp.com/channel/0029VavNSDO9mrGWYirxz40G
sns.barplot(y='region',x='Medal',data=region_medal)
plt.title('medals by regions')
plt.xlabel('medals')
plt.ylabel('regions')
plt.xticks(rotation=45)
plt.show()
summer_medals=df.groupby(['region', 'Medal']).size().reset_index()
summer_medals.columns=['region', 'Medal', 'count']
summer_medals.pivot('region', 'Medal', 'count').fillna(0)
summer_medals_20=summer_medals.pivot('region', 'Medal',
'count').fillna(0).sort_values(['Gold'], ascending=False).head(20)
summer_medals_20.plot(kind='bar')
plt.xlabel('Country')
plt.title('Medals by Country- Summer Olympics ')
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
plt.show()
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
year=df['Year'].value_counts()
plt.figure(figsize=(15,10))
sns.barplot(x=year.index, y=year.values)
plt.xticks(rotation=90)
plt.xlabel("Year")
plt.ylabel("number of athletes")
plt.title("Number of participants by year")
plt.show()
sport=df['Sport'].value_counts()[:5]
print(sport)
labels=sport.index
sizes=sport.values
plt.pie(sizes,labels=labels,autopct='%1.1f%%',
shadow=True,startangle=90)
plt.show()
sport_summer=df[df['Season']=='Summer']['Sport'].value_counts
().sort_values(ascending=False).head(20)
sport_summer
plt.figure(figsize=(15,10))
sns.barplot(y=sport_summer.index, x=sport_summer.values,
palette='magma')
plt.xlabel('Number of events')
plt.ylabel('Sport')
plt.xticks(rotation=90)
plt.title("Number of events in each sport in the summer
Olympics")
plt.show()
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
sport_winter=df[df['Season']=='Winter']['Sport'].value_counts().sort_values(asce
nding=False)
plt.figure(figsize=(15,10))
sns.barplot(y=sport_winter.head(20).index, x=sport_winter.head(20).values,
palette='magma')
plt.xlabel('Number of events')
plt.ylabel('Sport')
plt.xticks(rotation=90)
plt.title("Number of events in each sport in the winter Olympics")
plt.show()
Mastering Data Visualization Techniques
(Part 3)
Prepared by: Syed Afroz Ali
plot , ax = plt.subplots(1 , 3 , figsize=(14,4))
sns.histplot(data = train_data.loc[train_data["Pclass"]==1] , x
= "Age" , hue = "Survived",binwidth=5,ax = ax[0],palette = sn
s.color_palette(["yellow" , "green"]),multiple = "stack").set_ti
tle("1-Pclass")
sns.histplot(data = train_data.loc[train_data["Pclass"]==2] , x
= "Age" , hue = "Survived",binwidth=5,ax = ax[1],palette = sn
s.color_palette(["yellow" , "green"]),multiple = "stack").set_ti
tle("2-Pclass")
sns.histplot(data = train_data.loc[train_data["Pclass"]==3] , x
= "Age" , hue = "Survived",binwidth=5,ax = ax[2],palette = sn
s.color_palette(["yellow" , "green"]),multiple = "stack").set_ti
tle("3-Pclass")
plt.show()
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
sex = ["Male", "Female"]
values = data["sex"].value_counts()
color = ["#FF0000", "#000000"]
plt.figure(figsize = (5, 7))
plt.pie(values, labels = sex, colors = color, explode = (0.1, 0),
textprops = {"color":"w"}, autopct = "%.2f%%", shadow = Tru
e, startangle = 90)
plt.legend();
# Plotting Outliers
col = 1
plt.figure(figsize = (20, 15))
for i in data.columns:
if col < 14:
plt.subplot(3, 5, col)
plt.boxplot(data[i])
plt.xlabel(i)
col = col + 1
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
fig = plt.figure( figsize=(8, 6))
ax = fig.add_axes([0,0,1,1])
sns.boxplot(ax=ax, data=df, x='TARGET', y='LDH')#,fli
erprops=dict(marker='o', markersize=6),fliersize=2)
ax.axhline(y=550,color='b')
ax.axhline(y=650,color='orange')
ax.axhline(y=1200,color='g')
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
fig = plt.figure(figsize = (15, 60))
for i in range(len(train.columns.tolist()[:100])):
plt.subplot(20,5,i+1)
sns.set_style("white")
plt.title(train.columns.tolist()[:100][i], size = 12, fontname
= 'monospace')
a = sns.kdeplot(train[train.columns.tolist()[:100][i]], shade
= True, alpha = 0.9, linewidth = 1.5, facecolor=(1, 1, 1, 0), ed
gecolor=".2")
plt.ylabel('')
plt.xlabel('')
plt.xticks(fontname = 'monospace')
plt.yticks([])
for j in ['right', 'left', 'top']:
a.spines[j].set_visible(False)
a.spines['bottom'].set_linewidth(1.2)
fig.tight_layout(h_pad = 3)
plt.show()
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
fig = plt.figure(figsize = (15, 60))
for i in range(len(train.columns.tolist()[:100])):
plt.subplot(20,5,i+1)
sns.set_style("white")
plt.title(train.columns.tolist()[:100][i], size = 12, fontname
= 'monospace')
a = sns.boxplot(train[train.columns.tolist()[:100][i]], linewi
dth = 2.5,color = 'white')
plt.ylabel('')
plt.xlabel('')
plt.xticks(fontname = 'monospace')
plt.yticks([])
for j in ['right', 'left', 'top']:
a.spines[j].set_visible(False)
a.spines['bottom'].set_linewidth(1.2)
fig.tight_layout(h_pad = 3)
plt.show()
fig, ax = plt.subplots(nrows = 5,ncols = 1,figsize = (15,30))
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
colors = ['#F3ED13','#451FA4']
for i in range(len(numerical_features)):
plt.subplot(5,1,i+1)
sns.countplot(numerical_features[i],data = data,hue = "He
artDisease",palette = colors)
title = numerical_features[i] + ' vs Heart Disease'
plt.title(title);
train.iloc[:, :-1].describe().T.sort_values(by='std' , ascending
= False)\
.style.background_gradient(cmap='GnBu')\
.bar(subset=["max"], color='#BB0000')\
.bar(subset=["mean",], color='green')
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
plt.figure(figsize = (15, 12))
ax = plt.axes()
ax.set_facecolor('black')
ax = sns.countplot(x = 'Embarked', data = titanic, palette = [
custom_colors[2], custom_colors[1]], edgecolor = 'white', lin
ewidth = 1.2)
plt.title('Disaster Count', fontsize = 25)
plt.xlabel('Disaster', fontsize = 20)
plt.ylabel('Count', fontsize = 20)
ax.xaxis.set_tick_params(labelsize = 15)
ax.yaxis.set_tick_params(labelsize = 15)
bbox_args = dict(boxstyle = 'round', fc = '0.9')
for p in ax.patches:
ax.annotate('{:.0f} = {:.2f}%'.format(p.get_height(), (p.get
_height() / len(titanic['Embarked'])) * 100), (p.get_x() + 0.25, p
.get_height() + 10),
color = 'black',
bbox = bbox_args,
fontsize = 18)
plt.show()
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
#Plotting the distributions of the numerical variables
color_plot = ['#de972c','#74c91e','#1681de','#e069f5','#f54545','#f0ea46
','#7950cc']
fig,ax = plt.subplots(4,2,figsize=(20,20))
sns.kdeplot(df['HeartDisease'],color=np.random.choice(color_plot), ax=
ax[0][0], shade=True)
sns.kdeplot(df['Oldpeak'],color=np.random.choice(color_plot), ax=ax[0]
[1], shade=True)
sns.kdeplot(df['Age'],color=np.random.choice(color_plot), ax=ax[1][0],
shade=True)
sns.kdeplot(df['FastingBS'],color=np.random.choice(color_plot), ax=ax[
1][1], shade=True)
sns.kdeplot(df['RestingBP'],color=np.random.choice(color_plot), ax=ax[
2][0],shade=True)
sns.kdeplot(df['Cholesterol'],color=np.random.choice(color_plot), ax=a
x[2][1], shade=True)
sns.kdeplot(df['MaxHR'],color=np.random.choice(color_plot), ax=ax[3][
0],shade=True)
fig.delaxes(ax[3][1])
s = sns.countplot(x = 'Survived',data = train)
sizes=[]
for p in s.patches:
height = p.get_height()
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
sizes.append(height)
s.text(p.get_x()+p.get_width()/2.,
height + 3,
'{:1.2f}%'.format(height/len(train)*100),
ha="center", fontsize=14)
z=df['job_title'].value_counts().head(10)
fig=px.bar(z,x=z.index,y=z.values,color=z.index,text=
z.values,labels={'index':'job title','y':'count','text':'cou
nt'},template='seaborn',title='<b> Top 10 Popular Rol
es in Data Sceince')
fig.show()
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
hm= df.drop('id', axis =1)
mask = np.zeros_like(hm.corr(), dtype=np.bool)
mask[np.triu_indices_from(mask)]= True
plt.suptitle('Correlation', size = 20, weight='bold')
ax = sns.heatmap(hm.corr(), linewidths = 0.9, linecolor = 'whi
te', cbar = True,mask=mask, cmap=heatmap)
ax.annotate('Low Correlation',
fontsize=10,fontweight='bold',
xy=(1.3, 3.5), xycoords='data',
xytext=(0.6, 0.95), textcoords='axes fraction',
arrowprops=dict(
facecolor=heatmap[0], shrink=0.025,
connectionstyle='arc3, rad=0.50'),
horizontalalignment='left', verticalalignment='to
p'
)
ax.annotate('High Correlation',
fontsize=10,fontweight='bold',
xy=(3.3, 7.5), xycoords='data',
xytext=(0.8, 0.4), textcoords='axes fraction',
arrowprops=dict(
facecolor=heatmap[0], shrink=0.025,
connectionstyle='arc3, rad=-0.6'),
horizontalalignment='left', verticalalignment='to
p'
)
plt.show()
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
plt.suptitle('Target Variable', size = 20, weight='bold')
song_popularity = df['song_popularity'].map({0:'UnPopular', 1:
'Popular'})
a = sns.countplot(data = df, x =song_popularity,palette=them
e)
plt.tick_params(axis="x", colors=theme[0],labelsize=15)
for p in a.patches:
width = p.get_width()
height = p.get_height()
x, y = p.get_xy()
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
a.annotate(f'{height/df.shape[0]*100} %', (x + width/2, y + h
eight*1.02), ha='center')
plt.show()
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
cont = ['song_duration_ms', 'acousticness', 'danceabi
lity', 'energy',
'instrumentalness', 'liveness', 'loudness',
'speechiness', 'tempo', 'audio_valence']
cat = [ 'key', 'audio_mode', 'time_signature']
a = 4 # number of rows
b = 3 # number of columns
c = 1 # initialize plot counter
plt.figure(figsize= (18,18))
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
for i in cont:
plt.suptitle('Distribution of Features', size = 20,
weight='bold')
plt.subplot(a, b, c)
A=sns.kdeplot(data= df, x=i,hue=song_popularit
y,palette=theme[:-2], linewidth = 1.3,shade=True, alp
ha=0.35)
plt.title(i)
plt.xlabel(" ")
c=c+1
#plotting
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 9))
fig.suptitle(' Highest and Lowest Correlation ', size =
20, weight='bold')
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
axs = [ax1, ax2]
#kdeplot
sns.kdeplot(data=df, y='energy', x='acousticness', ax
=ax1, color=heatmap[0])
ax1.set_title('Energy vs Acousticness', size = 14, wei
ght='bold', pad=20)
#kdeplot
sns.kdeplot(data=df, y='energy', x='loudness', ax=ax2
, color=heatmap[4])
ax2.set_title('Energy vs Loudness', size = 14, weight=
'bold', pad=20);
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
colors = ["#e9d9c8","#cca383","#070c23","#f82d06",
"#e8c195","#cd7551","#a49995","#a3a49c","#6c7470
"]
sns.palplot(sns.color_palette(colors))
#plot
A = sns.countplot(train_df['case_num'],
color=colors[1],
edgecolor='white',
linewidth=1.5,
saturation=1.5)
#Patch
patch_h = []
for patch in A.patches:
reading = patch.get_height()
patch_h.append(reading)
idx_tallest = np.argmax(patch_h)
A.patches[idx_tallest].set_facecolor(colors[3])
#Lables
plt.ylabel('Count', weight='semibold', fontname = 'Georgia')
plt.xlabel('Cases', weight='semibold', fontname = 'Georgia')
plt.suptitle('Number of Cases', fontname = 'Georgia', weight=
'bold', size = 18, color = colors[2])
A.bar_label(A.containers[0], label_type='edge')
plt.show()
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
Datas = india_df["common_name"].value_counts().reset_inde
x().sort_values(by='common_name')
# Creating the bar chart
trace = go.Bar(
y = Datas["index"],
x = Datas["common_name"],
orientation = "h",
marker_color= "#4F7177",
text = Datas["common_name"],
)
layout = dict(
width = 600,
height= 1000,
plot_bgcolor = "#FFFFFF",
font=dict(family='Arial',
size=12,
color='black'),
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
margin = dict(
l=0,
r=0,
b=100,
t=100,
pad=0
),
xaxis = dict(showline=True, linewidth=1.45, linecolor="#4
F7177",gridcolor='#D5D7D8',
#griddash='dot',
title_text='Counts'),
yaxis = dict(showline=True, linewidth=1.45, linecolor="#4F
7177",ticksuffix = " ",title_text='Name'),
bargap = 0.15,hoverlabel_bgcolor="#4F7177",hovermode="
x"
)
fig = go.Figure(data = trace, layout = layout)
fig.layout.xaxis.fixedrange = True
fig.layout.yaxis.fixedrange = True
#text
texter("Indian Birds Species",0.000,1.10,28,"Work Sans")
texter("Birds found in the dataset",0.000,1.06,18,"Source Sa
ns Pro")
texter("heyRobin!",1.00,-0.06,16,"Playfair Display")
fig.show()
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
Missing Values:
fig, axes = plt.subplots(1,2, figsize=(20,5))
fig.suptitle('Missing Data', size = 15, weight='bold')
#first plot
sns.heatmap(train.isna().transpose(),
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
cmap="crest",ax=axes[0])
#missing data
missing = round(train.isna().sum()/train.shape[0]* 100
,2)
missing = missing[missing>0].sort_values().to_frame(
)
missing.columns = ['Percentage']
missing.index.names = ['Name']
missing = missing.reset_index()
sns.barplot(data = missing, y= 'Name', x = 'Percentag
e',ax=axes[1],color=pal[0])
plt.show()
from plotly.subplots import make_subplots
import plotly.graph_objects as go
#Data
cnt_srshp =train['HomePlanet'].value_counts()
cnt_srsdes =train['Destination'].value_counts()
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
fig = make_subplots(rows=2, cols=2, shared_yaxes=True,
subplot_titles=("Home Planets","Destination Planets","VIP
","CryoSleep"))
#figure1
fig.add_trace(go.Scatter(
x=cnt_srshp.index,
y=cnt_srshp.values,
mode='markers',
marker=dict(
sizemode = 'diameter',
sizeref = 20,
size = cnt_srshp.values,
color = ['#1D7595','#B9B596','#864D29'])), 1, 1)
#figure2
fig.add_trace(go.Scatter(
x=cnt_srsdes.index,
y=cnt_srsdes.values,
mode='markers',
marker=dict(
sizemode = 'diameter',
sizeref = 20,
size = cnt_srsdes.values,
color = ['#048B95','#A1231F','#602F58'])),
1, 2)
#figure3
fig.add_trace(go.Histogram(x=train["VIP"],
marker=dict(color=pal)),
row=2, col=1)
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
#figure4
fig.add_trace(go.Histogram(x=train["CryoSleep"],
marker=dict(color=pal)),
row=2, col=2)
fig.update_layout(height=1000,width=1000, coloraxis=dict(c
olorscale='Bluered_r'), showlegend=False,
title_x=0.9,
titlefont=dict(size = 2, color='black', family='Space Mono'),
plot_bgcolor='rgba(0,0,0,0)'
)
fig.show()
fig, axes = plt.subplots(1,3, figsize=(20,6))
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
sns.countplot(train["VIP"], ax=axes[0],palette =pal)
sns.kdeplot(train["Num"],linewidth = 1.3,shade=True,
alpha=0.35, ax=axes[1],color=pal[0])
sns.countplot(train["Side"], ax=axes[2],palette =pal)
plt.show()
import matplotlib as mlb
import matplotlib.image as mpimg
from matplotlib.offsetbox import AnnotationBbox, OffsetImage
#plotting
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 11))
fig.suptitle(' Potablity of Water Quality ', size = 26, color = th
eme[3], weight='bold')
axs = [ax1, ax2]
#Count-Plot
sns.countplot(water_df['Potability'], ax=ax1, palette='husl')
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
ax1.set_title('Count Plot', size = 14, color = theme[3], weight
='bold', pad=20)
#Data-2
names = ["Not Potable", "Potable"]
values = water_df['Potability'].value_counts()
colors = ["#E68193","#459E97"]
explode = (0.01, 0.01)
#Doughnut-chart
ax2.pie(x= values,labels =names, colors=colors,autopct='%1.
0f%%', pctdistance=0.8,explode=explode)
#draw-circle
centre_circle = plt.Circle((0,0),0.62,fc='white')
ax2.add_artist(centre_circle)
ax2.axis('equal')
ax2.set_title('Pie Chart', size = 14, color = theme[3], weight='
bold', pad=20)
#Image
path = mpimg.imread('../input/water/water bottle.png')
imagebox = OffsetImage(path , zoom=0.3)
xy = (0.5, 0.7)
ab = AnnotationBbox(imagebox, xy, frameon=False, pad=1, x
ybox=(0.02, 0.05))
ax2.add_artist(ab)
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
plt.subplots_adjust(left=None, bottom=None, right=None, top
=0.8, wspace=0.4, hspace=None);
#Figure with Image
import matplotlib as mlb
import matplotlib.image as mpimg
from matplotlib.offsetbox import AnnotationBbox, Off
setImage
plt.figure(figsize=(27,15));
ax = sns.barplot(y ='Country',
x='Total',
data=medals[:30],
color=olympics_col[3],
zorder=2,
linewidth=0,
orient='h',
saturation=1,
alpha=1,
)
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
#Lables
ax.set_xlabel("Total",fontsize=20, weight='bold')
ax.set_ylabel("Country",fontsize=20, weight='bold')
ax.tick_params(labelsize=10, width=0.5, length=1.5)
plt.title("Top 30 Countries with Medals",size=20,weig
ht='bold')
#Patches
for a in ax.patches:
value = f'{a.get_width():.0f}'
x = a.get_x() + a.get_width() + 0.60
y = a.get_y() + a.get_height() / 1.8
ax.text(x, y, value, ha='left', va='center', fontsize=12,
bbox=dict(facecolor='none', edgecolor='black', box
style='round', linewidth=0.2))
#image
path = mpimg.imread('../input/font-worksans/medal-cr
op.png')
imagebox = OffsetImage(path , zoom=1.6)
xy = (0.5, 0.7)
ab = AnnotationBbox(imagebox, xy, frameon=False, p
ad=1, xybox=(100.5, 16))
ax.add_artist(ab)
ax.text(x = 92.5, y = 22.5, s = 'Best Performance', fon
tsize=22, weight = 'bold',color=olympics_col[1])
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
ax.text(x = 95.5, y = 23.5, s = 'From U.S.A', fontsize=2
2, weight = 'bold',color=olympics_col[1]);
for col in numeric_features[1:]:
fig = plt.figure(figsize=(9, 6))
ax = fig.gca()
feature = data[col]
feature.hist(bins=50, ax = ax)
ax.axvline(feature.mean(), color='magenta', linesty
le='dashed', linewidth=2)
ax.axvline(feature.median(), color='cyan', linestyle
='dashed', linewidth=2)
ax.set_title(col)
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
plt.show()
data1=mydata[['Parameter 1']]
for i in data1.columns:
plt.figure(figsize=(15,6))
sns.boxplot(data1[i])
plt.xticks(rotation=90)
plt.show()
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
# Creating a figure
plt.figure(figsize=(10,6))
#plotting the values for people who have heart disease
plt.scatter(df.age[df.target==1],
df.thalach[df.target==1],
c="tomato")
#plotting the values for people who doesn't have heart disea
se
plt.scatter(df.age[df.target==0],
df.thalach[df.target==0],
c="lightgreen")
# Addind info
plt.title("Heart Disease w.r.t Age and Max Heart Rate")
plt.xlabel("Age")
plt.legend(["Disease", "No Disease"])
plt.ylabel("Max Heart Rate");
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
sns.displot(x = df.thalach[df.target==1], data = df, kde = True, color= 'o
live')#
skewness=str(df.thalach[df.target==1].skew())
kurtosis=str(df.thalach[df.target==1].kurt())
plt.legend([skewness,kurtosis],title=("skewness and kurtosis"))
plt.title("Maximum heart achieved of peple with heart disease")
plt.xlabel("Maximum heart rate achieved")
plt.ylabel("Number of people with heart disease");
pd.crosstab(df.sex, df.fbs)
fig = pd.crosstab(df.sex, df.fbs).plot(kind = 'bar', color = ['ligh
tblue', 'salmon'])
plt.title("Fasting blood sugar w.r.t sex")
fig.set_xticklabels(labels=['fbs>120 mg/dl', 'fbs<120 mg/dl'], r
otation=0)
plt.legend(['Female', 'Male']);
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
pred = rf_model.predict(x_test)
plt.rcParams['figure.figsize'] = (12,8)
plt.plot(pred, label='Predicted value')
plt.plot(y_test, label='actual value')
plt.legend(loc ="upper left")
plt.title('Random Forest Model')
plt.xlabel('Test data')
plt.ylabel('mm / Day')
plt.show()
fig, ax = plt.subplots(ncols=3, figsize=(18,6))
colors = [['#ADEFD1FF', '#00203FFF'], ['#97BC62FF', '
#2C5F2D'], ['#F5C7B8FF', '#FFA177FF']]
explode = [0, 0.2]
columns = ['Parking', 'Warehouse', 'Elevator']
for i in range(3):
data = df[columns[i]].value_counts()
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
ax[i].pie(data, labels=data.values, explode=expl
ode, colors=colors[i], shadow=True)
ax[i].legend(labels=data.index, fontsize='large')
ax[i].set_title('{} distribution'.format(columns[i]))
def plot_hist(feature):
fig, ax = plt.subplots(2, 1, figsize=(17, 12))
sns.histplot(data = titanic[feature], kde = True, ax =
ax[0],color="Brown")
ax[0].axvline(x = titanic[feature].mean(), color = 'r',
linestyle = '--', linewidth = 2, label = 'Mean:
{}'.format(round(titanic[feature].mean(), 3)))
ax[0].axvline(x = titanic[feature].median(), color =
'orange', linewidth = 2, label = 'Median:
{}'.format(round(titanic[feature].median(), 3)))
ax[0].axvline(x = statistics.mode(titanic[feature]), color =
'yellow', linewidth = 2, label = 'Mode:
{}'.format(statistics.mode(titanic[feature])))
ax[0].legend()
sns.boxplot(x = titanic[feature], ax = ax[1],color="Brown")
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
plt.show()
plot_hist('Age')
plt.figure(figsize=(12,5))
plt.title('top categories')
plt.ylabel('item_price')
titanic.groupby('Embarked')['Fare'].mean().sort_values(ascen
ding=False)[0:15].plot(kind='line', marker='*', color='red',
ms=10)
titanic.groupby('Embarked')['Fare'].mean().sort_values(ascen
ding=False)[0:15].plot(kind='bar',color=sns.color_palette("inf
erno_r", 7))
plt.show()
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
numeric_feature = titanic.dtypes!=object
final_numeric_feature =
titanic.columns[numeric_feature].tolist()
titanic[final_numeric_feature].plot(kind='density',
subplots=True, layout=(2,4), sharex=False, figsize= (20,12))
plt.show()
df.describe().round(2).T.sort_values(by='std' , ascending = Fa
lse)\
.style.background_gradient(cmap='GnBu')\
.bar(subset=["max"], color='#BB0000')\
.bar(subset=["min",], color='green')\
.bar(subset=["mean",], color='Orange')\
.bar(subset=['std'], color='pink')\
.bar(subset=['50%'], color='magenta')
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
def highlight_min(s, props=''):
return np.where(s == np.nanmin(s.values), props, '')
titanic.describe().style.apply(highlight_min, props='color:yell
ow;background-color:Grey', axis=0)
titanic[titanic["Age"] >= 50].describe().style.backgro
und_gradient(cmap='RdPu')
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
fig, ax = plt.subplots(2, 1, sharex=True,
figsize=(17,10),gridspec_kw={"height_ratios": (.2, .8)})
ax[0].set_title('Age distribution',fontsize=18,pad=20)
sns.boxplot(x='age', data=heart, ax=ax[0])
ax[0].set(yticks=[])
sns.histplot(x='age', data=heart, ax=ax[1])
ax[1].set_xlabel(col, fontsize=16)
plt.axvline(heart['age'].mean(), color='darkgreen',
linewidth=2.2, label='mean=' +
str(np.round(heart['age'].mean(),1)))
plt.axvline(heart['age'].median(), color='red', linewidth=2.2,
label='median='+ str(np.round(heart['age'].median(),1)))
plt.axvline(heart['age'].mode()[0], color='purple',
linewidth=2.2, label='mode='+ str(heart['age'].mode()[0]))
plt.legend(bbox_to_anchor=(1, 1.03), ncol=1, fontsize=17,
fancybox=True, shadow=True, frameon=True)
plt.tight_layout()
plt.show()
plt.rcParams['font.family'] = 'Lucida Calligraphy'
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
plt.rcParams['font.size'] = 30
heart["age_bins"]= pd.cut(heart["age"] , bins=[29 , 40 , 50 ,
60 , 80] , labels=["adult" , "fortieth" , "old" , "ancient"] )
def count_plot(data , x=None , y=None , figsize =None , title
=None , color =None , prop=False , rotation_x =0 ):
if x is None and y is None :
raise("Expected y or x")
if x is not None and y is not None:
raise("Expected y or x not both")
count_type = data[y if x is None else
x].value_counts(ascending =False)
Sum = count_type.sum()
type_order = count_type.index
plt.figure(figsize=figsize if figsize is None else (12 , 7))
if x is None:
sns.countplot(data = data , y=y , color = color
,order=type_order)
if prop==True:
for i in range(len(count_type)):
count = count_type[i]
pct_string ="{:0.1f}%".format(100*count/Sum)
plt.text(count+1 , i , pct_string , va="center")
if prop==False:
for i in range(len(count_type)):
count = count_type[i]
pct_string ="{}".format(count)
plt.text(count+1 , i , pct_string , va="center")
plt.title(title)
plt.show()
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
if y is None :
sns.countplot(data = data , x = x , color = color , order =
type_order)
locs , labels =plt.xticks(rotation = rotation_x)
if prop == True :
for loc , label in zip(locs , labels):
count = count_type[label.get_text()]
pct_string ="{:0.1f}%".format(100*count/Sum)
plt.text(loc , count+2 ,pct_string,ha ="center")
if prop==False :
for loc , label in zip(locs , labels):
count = count_type[label.get_text()]
pct_string ="{}".format(count)
plt.text(loc , count+2 ,pct_string,ha ="center")
plt.title(title)
plt.show()
Syed Afroz Ali
Learn Data Visualization With Python
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
https://t.me/AIMLDeepThaught/625
# Barchart sorted by frequency
base_color = sns.color_palette()[0]
cat_order = train_eda[col_name].value_counts().index
plt.figure(figsize=(15,10))
plt.xticks(rotation = 90)
sns.countplot(data = train_eda, x = col_name, order = cat_order, color =
base_color);
# add annotations
n_points = train_eda.shape[0]
cat_counts = train_eda[col_name].value_counts()
locs, labels = plt.xticks() # get the current tick locations and labels
# loop through each pair of locations and labels
for loc, label in zip(locs, labels):
# get the text property for the label to get the correct count
count = cat_counts[label.get_text()]
pct_string = '{:0.1f}%'.format(100*count/n_points)
# print the annotation just below the top of the bar
plt.text(loc, count+4, pct_string, ha = 'center', color = 'black')
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
train = pd.read_csv('train_housing.csv')
#Visualising numerical predictor variables with Target
Variables
train_num = train.select_dtypes(include=['int64','float64'])
fig,axs= plt.subplots(12,3,figsize=(20,80))
#adjust horizontal space between plots
fig.subplots_adjust(hspace=0.6)
for i,ax in zip(train_num.columns,axs.flatten()):
sns.scatterplot(x=i, y='SalePrice',
hue='SalePrice',data=train_num,ax=ax,palette='viridis_r')
plt.xlabel(i,fontsize=12)
plt.ylabel('SalePrice',fontsize=12)
#ax.set_yticks(np.arange(0,900001,100000))
ax.set_title('SalePrice'+' -
'+str(i),fontweight='bold',size=20)
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
train = pd.read_csv('train_housing.csv')
categorical = train.select_dtypes(include=['object'])
##Visualising Categorical predictor variables with Target
Variables
def facetgrid_boxplot(x, y, **kwargs):
sns.boxplot(x=x, y=y)
x=plt.xticks(rotation=90)
f = pd.melt(train, id_vars=['SalePrice'],
value_vars=sorted(train[categorical.columns]))
g = sns.FacetGrid(f, col="variable", col_wrap=3,
sharex=False, sharey=False, size=5)
g = g.map(facetgrid_boxplot, "value", "SalePrice")
import matplotlib.pyplot as plt
import seaborn as sns
sns.scatterplot(x=df.iloc[:,0], y=df.iloc[:,1], hue=y)
plt.annotate("KD65", (df.iloc[64,0], df.iloc[64,1]), (8*1e6, 1),
arrowprops=dict(arrowstyle="->"), fontsize="xx-large",c='red'
)
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
plt.annotate("KD99", (df.iloc[98,0], df.iloc[98,1]), (8*1e6, 2*1
e6), arrowprops=dict(arrowstyle="->"), fontsize="xx-large",c=
'red')
plt.annotate("control3", (df.iloc[107,0], df.iloc[107,1]), (8*1e6
, 3*1e6), arrowprops=dict(arrowstyle="->"), fontsize="xx-larg
e",c='red')
plt.annotate("control13", (df.iloc[117,0], df.iloc[117,1]), (8*1e
6, 4*1e6), arrowprops=dict(arrowstyle="->"), fontsize="xx-lar
ge",c='red')
plt.figure(figsize=(12,5))
plt.title('top categories')
plt.ylabel('item_price')
titanic.groupby('Embarked')['Fare'].mean().sort_values(ascen
ding=False)[0:15].plot(kind='line', marker='*', color='red',
ms=10)
titanic.groupby('Embarked')['Fare'].mean().sort_values(ascen
ding=False)[0:15].plot(kind='bar',color=sns.color_palette("inf
erno_r", 7))
plt.show()
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
corr = wine.corr() # We already examined SalePrice
correlations
plt.figure(figsize=(12, 10))
sns.heatmap(corr[(corr >= 0.5) | (corr <= -0.4)],
cmap='viridis', vmax=1.0, vmin=-1.0,
linewidths=0.1,annot=True, annot_kws={"size": 8},
square=True);
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
import dabl
dabl.plot(titanic, 'Fare');
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
Mastering Data Visualization Techniques
(Part 4)
Prepared by: Syed Afroz Ali
X = dataset.copy()
from sklearn.decomposition import PCA
pca = PCA(n_components=3)
pca.fit(X)
PCA_ds = pd.DataFrame(pca.transform(X), columns=(["col1","col2", "col3"]))
# A 3D Projection Of Data In The Reduced Dimension
x =PCA_ds["col1"]
y =PCA_ds["col2"]
z =PCA_ds["col3"]
#To plot
fig = plt.figure(figsize=(10,8))
ax = fig.add_subplot(111, projection="3d")
ax.scatter(x, y, z, c="maroon", marker="o" )
ax.set_title("A 3D Projection Of Data In The Reduced Dimension")
plt.show()
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
# A 3D Projection Of Data In The Reduced Dimension
x =PCA_ds["col1"]
y =PCA_ds["col2"]
z =PCA_ds["col3"]
#To plot
fig = plt.figure(figsize=(10,8))
ax = fig.add_subplot(111, projection="3d")
ax.scatter(x, y, z, c=labels, marker="o", cmap="BuGn")
ax.set_title("A 3D Projection Of Data In The Reduced Dimension")
plt.show()
for i in range(0, 10):
fig = plt.figure(figsize=(8, 6))
ax = plt.axes(projection="3d")
ax.scatter(x, y, z, marker='*', color='red')
X, Y = np.meshgrid(x, y)
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
Z = theta_0[i]*X + theta_1[i]*Y + theta_2[i]
ax.plot_surface(X, Y, Z, cmap='plasma')
ax.set_xlabel("x")
ax.set_ylabel("y")
ax.set_zlabel("z")
ax.set_title("Thetas: {},{},{}".format(theta_0[i], theta_1[i], theta_2[i]))
plt.show()
print(40*"=")
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
plt.suptitle('Target Variable', size = 20, weight='bold')
song_popularity = df['song_popularity'].map({0:'UnPopular', 1:'Popular'})
a = sns.countplot(data = df, x =song_popularity,palette=theme)
plt.tick_params(axis="x", colors=theme[0],labelsize=15)
for p in a.patches:
width = p.get_width()
height = p.get_height()
x, y = p.get_xy()
a.annotate(f'{height/df.shape[0]*100} %', (x + width/2, y + height*1.0
2), ha='center')
plt.show()
cont = ['song_duration_ms', 'acousticness', 'danceability', 'energy',
'instrumentalness', 'liveness', 'loudness',
'speechiness', 'tempo', 'audio_valence']
cat = [ 'key', 'audio_mode', 'time_signature']
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
a = 4 # number of rows
b = 3 # number of columns
c = 1 # initialize plot counter
plt.figure(figsize= (18,18))
for i in cont:
plt.suptitle('Distribution of Features', size = 20, weight='bold')
plt.subplot(a, b, c)
A=sns.kdeplot(data= df, x=i,hue=song_popularity,palette=theme[:-
2], linewidth = 1.3,shade=True, alpha=0.35)
plt.title(i)
plt.xlabel(" ")
c=c+1
a = 4 # number of rows
b = 3 # number of columns
c = 1 # initialize plot counter
plt.figure(figsize= (18,18))
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
for i in cat:
plt.suptitle('Count of Features', size = 20, weight='bold')
plt.subplot(a, b, c)
A=sns.countplot(df[i],color=theme[3], alpha=0.5)
plt.title(i)
plt.xlabel(" ")
plt.tick_params(axis="x", colors='black',labelsize=10)
c=c+1
figure = plt.figure(figsize=(30,10))
A = plt.pie(medals['Total'][:10],
labels=medals['Country'][:10],
startangle=90,
labeldistance=1.15,
pctdistance=0.8,
autopct='%1.1f%%')
plt.title("Pie Chart of Top 10 Countries with Medals",size=20,weight='b
old')
plt.show();
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
#checking the target variables for distribution
sns.distplot(house['SalePrice'],color=colors[7])
plt.axvline(x=house['SalePrice'].mean(), color=colors[7], linestyle='--', li
newidth=2)
plt.title('Sales');
l = df_current['Q3'].value_counts(normalize=True).mul(100).tolist()[1]-df
_old['Q2'].value_counts(normalize=True).mul(100).values.tolist()[1]
print(5*'\n',"\033[1;32m Increase in Woman is only\033[1;32m",round(l,
2),'%\033[1;32m Over Last Year\033[1;32m',5*'\n')
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
fig, ax = plt.subplots(1, 2, figsize=(20,8))
fig.text(0.1, 0.95, "Visualisation of Gender Distribution for 2022 and 20
21", fontsize=15, fontweight='bold')
sns.countplot(x='Q3', data=df_current,palette="Dark2", ax=ax[0]); #Cur
rent Year
sns.countplot(x='Q2', data=df_old,palette="Dark2",ax=ax[1]); #Last Yea
r
for i, ax in enumerate(ax.flatten()):
ax.grid(axis='y', linestyle='-', alpha=0.4)
if i==0:t=shape;year = 2022
else:t=shape_21;year =2021
for p in ax.patches:
percentage = f'{100 * p.get_height() / t:.2f}%\n'
ax.annotate(percentage, (p.get_x() + p.get_width() / 2,p.get_height
()), ha='center', va='center')
ax.set_xlabel('Gender');ax.set_title("Gender Wise Distribution in "+
str(year))
if not(0.5 <= p.get_x() < 1.5):
p.set_facecolor('lightgrey')
plt.show()
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
fig, ax = plt.subplots(1,2, figsize=(20,8))
fig.text(0.1, 0.95, "Age Distribution of Kaggle Users - 2022", fontsize=1
5, fontweight='bold')
sns.barplot(x=df_current['Q2'].value_counts().index, y=df_current['Q2'].
value_counts().values, ax=ax[0],
edgecolor='black', linewidth=1.5, saturation=1.5)
ax[0].yaxis.set_major_locator(MaxNLocator(nbins=20));ax[0].grid(axis=
'y', linestyle='-', alpha=0.4)
ax[0].set_ylabel('Count', weight='semibold')
ax[0].set_xlabel('Age Group 2022', weight='semibold')
ax[1].set_xlabel('Pie Chart for Age Group 2022', weight='semibold')
for p in ax[0].patches:
percentage = f'{100 * p.get_height() / t:.1f}%\n'
ax[0].annotate(percentage, (p.get_x() + p.get_width() / 2,p.get_hei
ght()), ha='center', va='center')
ax[1].pie(df_current['Q2'].value_counts(), labels = df_current['Q2'].value
_counts().index, autopct='%1.1f%%',
explode=[0.03 for i in df_current['Q2'].value_counts().index])
plt.show()
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
df2=titanic.groupby('Pclass')['Age'].mean().sort_values(ascending=Fals
e)
plt.figure(figsize = (15,8))
color = [('b' if i < 30 else 'r') for i in df2]
df2.plot.bar(color=color);
col=['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
'pH', 'sulphates', 'alcohol', 'quality']
fig = plt.figure(figsize=(15,10))
for i in range(len(col)):
plt.subplot(3,4,i+1)
plt.title(col[i])
sns.distplot(df,x=df[col[i]])
plt.tight_layout()
plt.show()
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
fig, ax = plt.subplots(1, 1)
plt.xlim(-1,26)
plt.ylim(0,1)
x = np.linspace(f.ppf(0.0000000001, dfn, dfd),f.ppf(0.9999999999, dfn, d
fd), 100)
ax.plot(x, f.pdf(x, dfn, dfd), 'r-')
ax.axvline(f.ppf(0.95, dfn, dfd), ls = "--", color = "navy")
print('upper 5%:', f.ppf(0.95, dfn, dfd))
# Free or Paid Courses - Countplot
fig, ax = plt.subplots(figsize=(7,5), dpi=100)
ax = sns.countplot(data=courses, x='is_paid', palette='magma_r')
ax.set_xticklabels(labels=['Free', 'Paid'])
ax.set_xlabel("Free/Paid courses")
ax.set_ylabel("Number of courses")
ax.set_title("Share of Free and Paid Courses on Udemy")
percentage = round(courses['is_paid'].value_counts() * 100 /len(course
s), 2)
patches = ax.patches
for i in range(len(patches)):
x = patches[i].get_x() + patches[i].get_width()/2
y = patches[i].get_height()+.05
ax.annotate('{:.2f}%'.format(percentage[i]), (x, y), ha='center')
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
#Creating a stripplot to visualize differences in data distribution between hot
els
features = ['lead_time', 'stays_in_weekend_nights', 'stays_in_week_nights', 'ad
ults', 'children', 'babies', 'previous_cancellations', 'previous_bookings_not_can
celed', 'booking_changes', 'adr', 'days_in_waiting_list']
n=1
sns.set_style('darkgrid')
sns.set(font_scale = 1.2)
plt.figure(figsize = (14, 18))
for feature in features:
plt.subplot(4,3,n)
sns.stripplot(x = df['hotel'], y = df[feature], palette = 'summer').set(xlabel =
None, ylabel = None)
plt.title(f'{feature} strip plot')
n=n+1
plt.tight_layout()
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
import plotly.graph_objects as go
labels = confirmed_bookings['meal'].unique()
values = confirmed_bookings['meal'].value_counts()
palette = ["#f6bd60", "#f5cac3", "#84a59d", "#f28482"]
fig = go.Figure(data=[go.Pie(labels = labels,
values = values,
hole=.5,
title = 'Meal plans',
legendgroup = True,
pull = [0.1, 0.1, 0.1, 0.1]
)
]
)
fig.update_traces(marker = dict(colors = palette));
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
x = rent_df["Rent"]
y = rent_df["Size"]
colors = rent_df["Size"]
sizes = rent_df["Size"]
plt.figure(figsize = (25, 8))
plt.ticklabel_format(style = 'plain')
plt.scatter(x, y, c = colors, s = sizes, alpha = 0.3, cmap = 'viridis')
plt.colorbar();
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
Syed Afroz Ali
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
Mastering Data Visualization Techniques
(Part 5)
Prepared by: Syed Afroz Ali
import plotly.graph_objs as go
values = data['cuisines'].value_counts()[:20]
labels=values.index
text=values.index
fig =
go.Figure(data=[go.Pie(values=values,labels=labels,hole=.3)]
)
fig.update_traces(hoverinfo='label+percent', textinfo='value',
textfont_size=20,
marker=dict(line=dict(color='#000000', width=3)))
fig.update_layout(title="Most popular cuisines of Bangalore
",
titlefont={'size': 30},
)
fig.show()
Syed Afroz Ali (Kaggle Grandmaster)
Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
MughlaiFoodcafe = data[data['cuisines'].str.contains('Mughlai',
case=False, regex=True,na=False)]
MughlaiFoodcafe.head()
#pie chart showing % of various Food serving Type cafe
slices=[MughlaiFoodcafe.shape[0],
ChineseFoodcafe.shape[0],
MexicanFoodcafe.shape[0],
NorthIndianFoodcafe.shape[0],
SouthIndianFoodcafe.shape[0],
ItalianFoodcafe.shape[0],
AmericanFoodcafe.shape[0]]
labels=['Mughlai','Chinese','Mexican','North Indian','South
Indian','Italian','American']
colors = ['#3333cc','#ffff1a','#ff3333','#c2c2d6','#6699ff','#c4ff4d','#339933']
plt.pie(slices,colors=colors, labels=labels, autopct='%1.0f%%',
pctdistance=.5, labeldistance=1.2,shadow=True)
fig = plt.gcf()
plt.title("Percentage of cafe according to their Food Type",
bbox={'facecolor':'1', 'pad':5})
fig.set_size_inches(12,12)
plt.show()
Syed Afroz Ali (Kaggle Grandmaster)
Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
# Most Liked Dishes in Bangalore
import re
data=data[data['dish_liked'].notnull()]
data.index=range(data.shape[0])
likes=[]
for i in range(data.shape[0]):
splited_array=re.split(',',data['dish_liked'][i])
for item in splited_array:
likes.append(item)
print("Count of Most liked dishes of Bangalore")
favourite_food = pd.Series(likes).value_counts()
favourite_food.head(20)
ax = favourite_food.nlargest(n=20,
keep='first').plot(kind='bar',figsize=(15,15),title = 'Top 20
Favourite Food counts ')
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005,
p.get_height() * 1.005))
Syed Afroz Ali (Kaggle Grandmaster)
Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
#Analysis of biggest food chains
branches =
data.groupby(['name']).size().to_frame('count').reset_index().
sort_values(['count'],ascending=False)
ax = sns.barplot(x='name', y='count', data=branches[:12])
plt.xlabel('')
plt.ylabel('Branches')
plt.title('Food chains and their counts')
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005,
p.get_height() * 1.005))
fig = plt.gcf()
fig.set_size_inches(25,15)
Syed Afroz Ali (Kaggle Grandmaster)
Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
data = df.groupby('name').size()[["Domino's Pizza", "KFC",
"McDonald's",'Subway']].sort_values(ascending = False)
x = data.index
y = data.values
plt.figure(figsize = (15,10))
color = ['red','yellow','green','blue']
ax = plt.bar(x,y,width = 0.4,color = color)
for i in ax:
x_ = i.xy[0] + i.get_width() / 2
y_ = i.get_height()
txt = str(y_)
plt.annotate(
text = txt,
xy = (x_,y_),
xytext = (-17,2.9),
textcoords = 'offset points'
)
Syed Afroz Ali (Kaggle Grandmaster)
Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
plt.xlabel('Restraurant')
plt.ylabel('Number of Branches')
plt.title('Number of branches on KFC, MacD, Dominos and
Subway')
plt.show()
plt.style.use('seaborn-notebook')
for i, label in enumerate(df.Drug_Type.unique().tolist()):
sns.kdeplot(df2.loc[df2['Drug_Type'] == i+1, 'Na_to_K'],
label=label, shade=True)
plt.title('1. KDE of Na_to_k (based on Drug_Type)',
fontdict=font, pad=15)
plt.xticks(np.arange(0,46,2), rotation=90)
plt.xlim([0,46])
plt.legend()
plt.show()
Syed Afroz Ali (Kaggle Grandmaster)
Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
# draw countplot and pie plot of categorical data
for col in categorical:
fig, axes = plt.subplots(1,2,figsize=(10,4))
# count of col (countplot)
sns.countplot(data=df2, x=col, ax=axes[0])
for container in axes[0].containers:
axes[0].bar_label(container)
# count of col (pie chart)
slices = df2[col].value_counts().values
activities = [f"{i} ({var})" for i, var in
zip(df2[col].value_counts().index,
df[col].value_counts().index)]
axes[1].pie(slices, labels=activities, shadow=True,
autopct='%1.1f%%')
plt.suptitle(f'Count of Unique Value in {col}', y=1.09,
**font)
plt.show()
Syed Afroz Ali (Kaggle Grandmaster)
Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
for col in ['Sex','BP','Cholesterol']:
ax = sns.countplot(data=df, x='Drug_Type', hue=col)
for container in ax.containers:
ax.bar_label(container)
plt.title(f'Count of Drug (based on {col})', fontdict=font,
pad=15)
plt.show()
Syed Afroz Ali (Kaggle Grandmaster)
Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
for col in ['Sex', 'BP', 'Cholesterol']:
fig , ax= plt.subplots(1,2, figsize=(10,4))
gp =
df.groupby([col])['Na_to_K'].mean().to_frame().reset_index()
sns.barplot(data=gp, x=col, y='Na_to_K', ax=ax[0])
for container in ax[0].containers:
ax[0].bar_label(container)
ax[0].set_title(f'Mean of Na_to_K (based on {col})', y=1.09,
**font)
sns.boxplot(data=df, x=col, y='Na_to_K', ax=ax[1])
ax[1].set_title(f'Boxplot of {col})', y=1.09, **font)
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
Syed Afroz Ali (Kaggle Grandmaster)
Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
plt.show()
fig, ax = plt.subplots(2,2,figsize=(14,8))
for i, col in enumerate(['Sex', 'BP', 'Cholesterol',
'Drug_Type']):
sns.scatterplot(data=df, x='Age', y='Na_to_K', hue=col,
ax=ax[i//2, i%2], palette='turbo')
ax[i//2, i%2].set_title(f'Na_to_K vs Age (based on {col}',
y=1.09, **font)
ax[i//2, i%2].legend(loc='upper center',
bbox_to_anchor=(1.2, 0.6),
fancybox=True, shadow=True)
fig.tight_layout()
plt.show()
Syed Afroz Ali (Kaggle Grandmaster)
Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
fig, ax = plt.subplots(3,2,figsize=(14,12))
sns.swarmplot(data=df, x='Cholesterol', y='Na_to_K',
hue='Drug_Type', ax=ax[0,0])
sns.swarmplot(data=df, x='Cholesterol', y='Age',
hue='Drug_Type', ax=ax[0,1])
sns.swarmplot(data=df, x='BP', y='Na_to_K',
hue='Drug_Type', ax=ax[1,0])
sns.swarmplot(data=df, x='BP', y='Age', hue='Drug_Type',
ax=ax[1,1])
sns.swarmplot(data=df, x='Sex', y='Na_to_K',
hue='Drug_Type', ax=ax[2,0])
sns.swarmplot(data=df, x='Sex', y='Age', hue='Drug_Type',
ax=ax[2,1])
ax[0,0].set_title('Swarmplot of Drug Type vs Na_to_K',y=1.05,
**font)
ax[0,1].set_title('Swarmplot of Drug Type vs Age',y=1.05,
**font)
plt.tight_layout()
Syed Afroz Ali (Kaggle Grandmaster)
Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
import itertools
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots
# for solve problem of show plotly plots
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)
plt.style.use('_mpl-gallery')
FONT = {'fontsize':20, 'fontstyle':'normal', 'fontfamily':'Times
New Roman', 'backgroundcolor':'#145A32', 'color':'orange'} #
for plot title
fig = go.Figure()
for col in df:
fig.add_trace(go.Box(x=df[col], name=col))
Syed Afroz Ali (Kaggle Grandmaster)
Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
fig.update_layout(
title_text="Box Plot Styling Outliers",
title_font=dict(color='orange', family='newtimeroman',
size=25),
title_x=0.45,
paper_bgcolor='#145A32',
# plot_bgcolor='#DAF7A6',
font=dict(color='#DAF7A6', family='newtimeroman',
size=16),
)
fig.show()
# univariate analysis of categorical data:
sns.set_palette("summer_r")
for i, col in enumerate(discrete_cols1):
fig, axes = plt.subplots(1,2,figsize=(10,4))
Syed Afroz Ali (Kaggle Grandmaster)
Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
# count of col (countplot)
sns.countplot(data=df, x=col, ax=axes[0])
for container in axes[0].containers:
axes[0].bar_label(container)
# count of col (pie chart)
slices = df[col].value_counts().sort_index().values
activities = [var for var in
df[col].value_counts().sort_index().index]
axes[1].pie(slices, labels=activities, shadow=True,
autopct='%1.1f%%')
plt.suptitle(f'Count of Unique Value in {col} (Fig {i+1})',
y=1.09, **FONT)
plt.show()
Syed Afroz Ali (Kaggle Grandmaster)
Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
•• Join my WhatsApp Channel for the latest updates on AI:
https://www.whatsapp.com/channel/0029VavNSDO9mrGWYirxz40G
sns.set_palette(['#1f4a1b','orange','#bbff33','yellow'])
discrete_cols2 = ['Family', 'Education', 'Securities Account',
'CD Account', 'Online', 'CreditCard']
for i, col in enumerate(discrete_cols2):
ax = sns.countplot(data=df, x='Personal Loan', hue=col)
for container in ax.containers:
ax.bar_label(container)
plt.title(f'Count of Personal Loan based on {col} (Fig {i+5})',
fontdict=FONT, pad=15)
plt.show()
for i, col in enumerate(['Income', 'CCAvg','Mortgage']):
print('='*30, f"Mean of {col} in each categorical feature",
'='*30)
for j, cat in enumerate(discrete_cols2):
fig , ax= plt.subplots(1,2, figsize=(10,4))
gp =
df.groupby([cat])[col].mean().to_frame().reset_index()
Syed Afroz Ali (Kaggle Grandmaster)
Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
sns.barplot(data=gp, x=cat, y=col, ax=ax[0])
for container in ax[0].containers:
ax[0].bar_label(container)
ax[0].set_title(f'Mean of {col} (based on {cat})', y=1.09,
**FONT)
sns.boxplot(data=df, x=cat, y=col, ax=ax[1])
ax[1].set_title(f'Boxplot of {cat} (Fig {i+11}-{j+1})',
y=1.09, **FONT)
plt.show()
continuous_cols = ['Age','Experience','CCAvg','Mortgage']
for i, col in enumerate(continuous_cols):
fig = px.scatter_3d(
data_frame= df,
x=df.Income,
y=df[col],
z=df['Personal Loan'],
color=df['Personal Loan'].astype(str),
color_discrete_map={'1':'orange', '0':'red'},
template='ggplot2',
Syed Afroz Ali (Kaggle Grandmaster)
Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
hover_name='Age',
# hover_data=
opacity=0.6,
# symbol='Transmission',
# symbol_map=
# log_x=True,
# log_z=True,
height=700,
title=f'3D scatter of features based on Personal Loan (Fig
{i+1})')
fig.update_layout(
title_text="Box Plot Styling Outliers",
title_font=dict(color='orange', family='newtimeroman',
size=25),
title_x=0.45,
paper_bgcolor='#145A32',
# plot_bgcolor='#DAF7A6',
font=dict(color='#DAF7A6', family='newtimeroman', size=16),
)
pio.show(fig)
Syed Afroz Ali (Kaggle Grandmaster)
Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu