Matplotlib in Python
What is Matplotlib in Python?
Matplotlib is a cross-platform, data visualization and graphical plotting library
(histograms, scatter plots, bar charts, etc) for Python and its numerical extension
NumPy. As such, it offers a viable open source alternative to MATLAB. Developers can
also use matplotlib’s APIs (Application Programming Interfaces) to embed plots in GUI
applications.
Matplotlib styles list
Types of Data
Numerical Data
Categorical Data
In [ ]: import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('fivethirtyeight')
In [ ]: df = sns.load_dataset('tips')
In [ ]: df.head()
Out[ ]: total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
2D Line
Bivariate Analysis
Categorical -> Numerical and Numerical -> Numerical
Use case - Time series data
In [ ]: df1 = df.head(5)
In [ ]: plt.plot(df1['total_bill'], df1['tip'])
plt.show()
In [ ]: # title and label
plt.plot(df1['total_bill'], df1['tip'])
plt.title('Graph')
plt.xlabel('total bill')
plt.ylabel('tip')
plt.show()
In [ ]: # multi plot in same graph
plt.plot(df1['total_bill'], df1['tip'], color='green')
plt.plot(df1['total_bill'], df1['size'])
plt.title('Graph')
plt.xlabel('total bill')
plt.ylabel('tip')
plt.show()
In [ ]: # line style
plt.plot(df1['total_bill'], df1['tip'], color='green', linestyle='dotted'
plt.plot(df1['total_bill'], df1['size'], linestyle='dashed')
plt.title('Graph')
plt.xlabel('total bill')
plt.ylabel('tip')
plt.show()
In [ ]: df = sns.load_dataset('iris')
df.head()
df1 = df.head(5)
In [ ]: plt.plot(df1['sepal_length'], df1['sepal_width'],
color='green', linestyle='dashdot')
plt.title('Graph')
plt.xlabel('total bill')
plt.ylabel('tip')
plt.show()
In [ ]: plt.plot(df1['sepal_length'], df1['sepal_width'],
color='green', linestyle='dashdot', linewidth=3, marker='o')
plt.title('Graph')
plt.xlabel('total bill')
plt.ylabel('tip')
plt.show()
In [ ]: # label
df = sns.load_dataset('tips')
df1 = df.head(5)
plt.plot(df1['total_bill'], df1['tip'], color='green',
linestyle='dotted', label='tip')
plt.plot(df1['total_bill'], df1['size'], linestyle='dashed', label='size'
plt.title('Graph')
plt.xlabel('total bill')
plt.ylabel('tip')
plt.legend(loc='upper right')
plt.show()
In [ ]: # limiting axes
price = [48000, 54000, 57000, 48000, 47000, 45000, 450000]
year = [2015, 2016, 2017, 2018, 2019, 2020, 2021]
plt.plot(year, price)
plt.ylim(0, 100000)
plt.show()
Scatter
Bivariate Analysis
numerical vs numerical
Use case - finding correlation
In [ ]: df = sns.load_dataset('iris')
df.head()
df1 = df.head(5)
df1
Out[ ]: sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa
In [ ]: plt.scatter(df['sepal_length'], df['sepal_width'])
plt.show()
In [ ]: # color change and marker
plt.scatter(df['sepal_length'], df['sepal_width'], color='orange', marker
plt.show()
In [ ]: # size
plt.scatter(df['sepal_length'], df['sepal_width'],
color='orange', marker='o', s=df['petal_width']*40)
plt.show()
In [ ]: plt.plot(df['sepal_length'], df['sepal_width'], 'o')
plt.show()
**Bar Chart**
Bivariate Analysis
Numerical vs Categorical
Use case - Aggregate analysis of groups
In [ ]: plt.bar(df['species'], df['sepal_length'],
width=0.5, color=['orange'])
plt.show()
In [ ]: plt.barh(df['species'], df['sepal_length'])
plt.show()
In [ ]: # stacked bar chart
plt.bar(df['species'], df['sepal_length'])
plt.bar(df['species'], df['sepal_width'], bottom=df['sepal_length'])
plt.bar(df['species'], df['petal_length'],
bottom=df['sepal_length'] + df['sepal_width'])
plt.show()
**Histogram**
Univariate Analysis
Numerical col
Use case - Frequency Count
In [ ]: df.describe()
Out[ ]: sepal_length sepal_width petal_length petal_width
count 150.000000 150.000000 150.000000 150.000000
mean 5.843333 3.057333 3.758000 1.199333
std 0.828066 0.435866 1.765298 0.762238
min 4.300000 2.000000 1.000000 0.100000
25% 5.100000 2.800000 1.600000 0.300000
50% 5.800000 3.000000 4.350000 1.300000
75% 6.400000 3.300000 5.100000 1.800000
max 7.900000 4.400000 6.900000 2.500000
In [ ]: plt.hist(df['sepal_length'])
plt.show()
In [ ]: # using bin
plt.hist(df['sepal_length'], bins=[1, 4.5,
7, 8], edgecolor='r', log=True)
plt.show()
**Pie chart**
Univariate/ Bivariate Analysis
Categorical vs Numerical
Use case - To find contribution on a standard scale
In [ ]: data = [23, 45, 100, 20, 49]
subjects = ['eng', 'science', 'maths', 'sst', 'hindi']
plt.pie(data, labels=subjects, autopct="%0.1f%%")
plt.show()
In [ ]: plt.pie(data, labels=subjects, autopct="%0.1f%%",
explode=[0.2, 0, 0.1, 0, 0], labeldistance=1.1)
plt.show()
In [ ]: plt.pie(data, labels=subjects, autopct="%0.1f%%", textprops={'fontsize':
plt.show()
In [ ]: plt.pie(data, labels=subjects, autopct="%0.1f%%", radius=1.3)
plt.show()
In [ ]: plt.pie(data, labels=subjects, autopct="%0.1f%%", counterclock=False)
plt.show()
In [ ]: plt.style.use('ggplot')
In [ ]: plt.pie(data, labels=subjects, autopct="%0.1f%%")
plt.show()
In [ ]: plt.plot(df['sepal_length'], df['sepal_width'], 'o')
plt.show()
In [ ]: price = [48000, 54000, 57000, 48000, 47000, 45000, 450000]
year = [2015, 2016, 2017, 2018, 2019, 2020, 2021]
plt.plot(year, price)
plt.ylim(0, 100000)
plt.show()
In [ ]: plt.scatter(df['sepal_length'], df['sepal_width'])
plt.show()
In [ ]: df['species'].unique()
array(['setosa', 'versicolor', 'virginica'], dtype=object)
Out[ ]:
In [ ]: df['species'] = df['species'].replace(
{'setosa': 0, 'versicolor': 1, 'virginica': 2})
In [ ]: df.tail()
Out[ ]: sepal_length sepal_width petal_length petal_width species
145 6.7 3.0 5.2 2.3 2
146 6.3 2.5 5.0 1.9 2
147 6.5 3.0 5.2 2.0 2
148 6.2 3.4 5.4 2.3 2
149 5.9 3.0 5.1 1.8 2
In [ ]: plt.scatter(df['sepal_length'], df['petal_length'], c=df['species'])
plt.show()
In [ ]: # color
plt.scatter(df['sepal_length'], df['petal_length'],
c=df['species'], cmap='winter')
plt.colorbar()
plt.show()
C:\Users\dhanr\AppData\Local\Temp\ipykernel_23404\1043740856.py:4: Matplo
tlibDeprecationWarning: Auto-removal of grids by pcolor() and pcolormesh
() is deprecated since 3.5 and will be removed two minor releases later;
please call grid(False) first.
plt.colorbar()
In [ ]: # size
plt.figure(figsize=(10, 6))
plt.scatter(df['sepal_length'], df['petal_length'],
c=df['species'], cmap='winter')
plt.xlabel('Sepal Length')
plt.ylabel('petal length')
plt.colorbar()
plt.show()
C:\Users\dhanr\AppData\Local\Temp\ipykernel_23404\2569873812.py:7: Matplo
tlibDeprecationWarning: Auto-removal of grids by pcolor() and pcolormesh
() is deprecated since 3.5 and will be removed two minor releases later;
please call grid(False) first.
plt.colorbar()
**Annotations**
In [ ]: x = [1, 2, 3, 4]
y = [5, 6, 7, 8]
plt.scatter(x, y)
plt.text(1, 5, 'Point 1')
plt.text(2, 6, 'Point 2')
plt.text(3, 7, 'Point 3')
plt.text(4, 8, 'Point 4')
Text(4, 8, 'Point 4')
Out[ ]:
In [ ]: plt.figure(figsize=(10, 6))
plt.scatter(df['sepal_length'], df['petal_length'],
c=df['species'], cmap='winter')
plt.xlabel('Sepal Length')
plt.ylabel('petal length')
plt.axvline(6.2, color='r')
plt.axhline(3.5, color='blue')
plt.colorbar()
plt.show()
C:\Users\dhanr\AppData\Local\Temp\ipykernel_23404\2047405343.py:8: Matplo
tlibDeprecationWarning: Auto-removal of grids by pcolor() and pcolormesh
() is deprecated since 3.5 and will be removed two minor releases later;
please call grid(False) first.
plt.colorbar()
**Subplot**
In [ ]: fig, ax = plt.subplots(ncols=1, nrows=2, sharex=True, figsize=(10, 6))
ax[0].scatter(df['sepal_length'], df['petal_length'])
ax[0].set_xlabel('Sepal lenght')
ax[1].scatter(df['sepal_width'], df['petal_width'], color='blue')
ax[1].set_xlabel('Sepal width')
plt.show()
In [ ]: plt.style.use('fivethirtyeight')
In [ ]: fig, ax = plt.subplots(ncols=2, nrows=2, sharex=False, figsize=(15, 10))
ax[0, 0].scatter(df['sepal_length'], df['petal_length'])
ax[0, 0].set_xlabel('Sepal lenght')
ax[0, 1].scatter(df['sepal_length'], df['sepal_width'], color='green')
ax[0, 1].set_xlabel('sepal len')
ax[1, 0].bar(df['species'], df['petal_width'])
ax[1, 0].set_xlabel('Sepal width')
ax[1, 1].hist(df['petal_length'], color='orange',
edgecolor='white', bins=[1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5,
ax[1, 1].set_xlabel('petal len')
plt.show()
In [ ]: fig = plt.figure(figsize=(15, 10))
ax1 = fig.add_subplot(2, 2, 1)
ax1.scatter(df['sepal_length'], df['petal_length'])
ax1.set_xlabel('sepal length')
ax2 = fig.add_subplot(2, 2, 2)
ax2.scatter(df['sepal_width'], df['petal_width'])
ax2.set_xlabel('sepal width')
ax3 = fig.add_subplot(2, 2, 3)
ax3.scatter(df['sepal_length'], df['sepal_width'], color='green')
ax3.set_xlabel('sepal len')
ax4 = fig.add_subplot(2, 2, 4)
ax4.hist(df['petal_length'], color='orange',
edgecolor='white', bins=[1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5, 5.5,
ax4.set_xlabel('petal len')
plt.show()
In [ ]: plt.style.use('ggplot')
**3D Scatter Plot**
In [ ]: fig = plt.figure(figsize=(10, 6))
ax = plt.subplot(projection='3d')
ax.scatter3D(df['sepal_length'], df['sepal_width'],
df['petal_length'], marker='>', s=50)
ax.set_xlabel('sepal len')
ax.set_ylabel('sepal width')
ax.set_zlabel('petal len')
plt.show()
In [ ]: x = [0, 1, 5]
y = [0, 10, 13]
z = [0, 13, 20]
plt.figure(figsize=(10, 6))
ax = plt.subplot(projection='3d')
ax.scatter(x, y, z, s=70)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z')
plt.show()
3D Line Plot
In [ ]: x = [0, 1, 5, 25]
y = [0, 10, 13, 0]
z = [0, 13, 20, 9]
plt.figure(figsize=(10, 6))
ax = plt.subplot(projection='3d')
ax.scatter(x, y, z, s=100, color='red')
ax.plot(x, y, z)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z')
plt.show()
**3D Surface Plots**
In [ ]: x = np.linspace(-10, 10, 100)
y = np.linspace(-10, 10, 100)
xx, yy = np.meshgrid(x, y)
yy.shape
(100, 100)
Out[ ]:
In [ ]: z = xx**2 + yy**2
In [ ]: fig = plt.figure(figsize=(10, 6))
ax = plt.subplot(projection='3d')
p = ax.plot_surface(xx, yy, z, cmap='viridis')
plt.colorbar(p)
plt.show()
C:\Users\dhanr\AppData\Local\Temp\ipykernel_23404\1838783719.py:7: Matplo
tlibDeprecationWarning: Auto-removal of grids by pcolor() and pcolormesh
() is deprecated since 3.5 and will be removed two minor releases later;
please call grid(False) first.
plt.colorbar(p)
**Contour Plots**
In [ ]: fig = plt.figure(figsize=(10, 6))
ax = plt.subplot()
p = ax.contourf(xx, yy, z, cmap='viridis')
fig.colorbar(p)
plt.show()
C:\Users\dhanr\AppData\Local\Temp\ipykernel_23404\3272041914.py:6: Matplo
tlibDeprecationWarning: Auto-removal of grids by pcolor() and pcolormesh
() is deprecated since 3.5 and will be removed two minor releases later;
please call grid(False) first.
fig.colorbar(p)