In [21]: import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
In [22]: df = pd.read_csv("IRIS.csv")
In [23]: df.head()
Out[23]:
sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 Iris-setosa
1 4.9 3.0 1.4 0.2 Iris-setosa
2 4.7 3.2 1.3 0.2 Iris-setosa
3 4.6 3.1 1.5 0.2 Iris-setosa
4 5.0 3.6 1.4 0.2 Iris-setosa
In [24]: df.mean()
Out[24]: sepal_length 5.843333
sepal_width 3.054000
petal_length 3.758667
petal_width 1.198667
dtype: float64
In [25]: df.median()
Out[25]: sepal_length 5.80
sepal_width 3.00
petal_length 4.35
petal_width 1.30
dtype: float64
In [26]: df.mode()
Out[26]:
sepal_length sepal_width petal_length petal_width species
0 5.0 3.0 1.5 0.2 Iris-setosa
1 NaN NaN NaN NaN Iris-versicolor
2 NaN NaN NaN NaN Iris-virginica
In [27]: df.std()
Out[27]: sepal_length 0.828066
sepal_width 0.433594
petal_length 1.764420
petal_width 0.763161
dtype: float64
1/5
In [28]: df.min()
Out[28]: sepal_length 4.3
sepal_width 2
petal_length 1
petal_width 0.1
species Iris-setosa
dtype: object
In [29]: df.max()
Out[29]: sepal_length 7.9
sepal_width 4.4
petal_length 6.9
petal_width 2.5
species Iris-virginica
dtype: object
In [30]: df.var()
Out[30]: sepal_length 0.685694
sepal_width 0.188004
petal_length 3.113179
petal_width 0.582414
dtype: float64
In [35]: plt.hist(df['sepal_length'])
plt.show()
2/5
In [43]: plt.figure(figsize = (10,7))
df.boxplot()
plt.show()
<matplotlib.figure.Figure at 0x7f68b933f750>
3/5
In [48]: fig, axes = plt.subplots(2, 2, figsize=(16,8))
axes[0,0].set_title("Distribution of Sepal Length")
axes[0,0].hist(df["sepal_length"]);
axes[0,1].set_title("Distribution of Sepal Width")
axes[0,1].hist(df["sepal_width"]);
axes[1,0].set_title("Distribution of Petal Length")
axes[1,0].hist(df["petal_length"]);
axes[1,1].set_title("Distribution of Petal Width")
axes[1,1].hist(df["petal_width"]);
plt.show()
4/5
In [50]: fig, axes = plt.subplots(2, 2, figsize=(16,9))
axes[0,0].set_title("Distribution of Sepal Length")
sns.boxplot(y="sepal_length", x="species", data=df, orient='v', ax=axes[0,
0])
axes[0,1].set_title("Distribution of Sepal Length")
sns.boxplot(y="sepal_width", x="species", data=df, orient='v', ax=axes[0,
1])
axes[1,0].set_title("Distribution of Sepal Length")
sns.boxplot(y="petal_length", x="species", data=df, orient='v', ax=axes[1,
0])
axes[1,1].set_title("Distribution of Sepal Length")
sns.boxplot(y="petal_length", x="species", data=df, orient='v', ax=axes[1,
1])
plt.show()
5/5