03/11/2023, 09:27 decision tree on classification lab ML - Jupyter Notebook
In [1]: import pandas as pd
In [2]: from matplotlib import pyplot as plt
In [3]: import seaborn as sns
In [4]: poke =pd.read_csv(r"D:\smriti iitr and tarot material\data science and
(https://getlin
In [5]: poke
Out[5]:
Type Sp. Sp.
# Name Type 1 Total HP Attack Defense Speed
2 Atk Def
0 1 Bulbasaur Grass Poison 318 45 49 49 65 65 45
1 2 Ivysaur Grass Poison 405 60 62 63 80 80 60
2 3 Venusaur Grass Poison 525 80 82 83 100 100 80
VenusaurMega
3 3 Grass Poison 625 80 100 123 122 120 80
Venusaur
4 4 Charmander Fire NaN 309 39 52 43 60 50 65
... ... ... ... ... ... ... ... ... ... ... ...
795 719 Diancie Rock Fairy 600 50 100 150 100 150 50
DiancieMega
796 719 Rock Fairy 700 50 160 110 160 110 110
Diancie
HoopaHoopa
797 720 Psychic Ghost 600 80 110 60 150 130 70
Confined
HoopaHoopa
798 720 Psychic Dark 680 80 160 60 170 130 80
Unbound
799 721 Volcanion Fire Water 600 80 110 120 130 90 70
800 rows × 13 columns
localhost:8888/notebooks/decision tree on classification lab ML.ipynb 1/13
03/11/2023, 09:27 decision tree on classification lab ML - Jupyter Notebook
In [6]: poke.head()
Out[6]:
Type Type Sp. Sp.
# Name Total HP Attack Defense Speed Gene
1 2 Atk Def
0 1 Bulbasaur Grass Poison 318 45 49 49 65 65 45
1 2 Ivysaur Grass Poison 405 60 62 63 80 80 60
2 3 Venusaur Grass Poison 525 80 82 83 100 100 80
VenusaurMega
3 3 Grass Poison 625 80 100 123 122 120 80
Venusaur
(https://getlin
4 4 Charmander Fire NaN 309 39 52 43 60 50 65
In [7]: poke.rename(columns= {'Type 1':'Primary_type', 'Type 2': 'Secondary_typ
In [8]: poke.head()
Out[8]:
Sp. S
# Name Primary_type Secondary_type Total HP Attack Defense
Atk D
0 1 Bulbasaur Grass Poison 318 45 49 49 65 6
1 2 Ivysaur Grass Poison 405 60 62 63 80 8
2 3 Venusaur Grass Poison 525 80 82 83 100 10
VenusaurMega
3 3 Grass Poison 625 80 100 123 122 12
Venusaur
4 4 Charmander Fire NaN 309 39 52 43 60 5
In [9]: poke['Primary_type']== 'Grass'
Out[9]: 0 True
1 True
2 True
3 True
4 False
...
795 False
796 False
797 False
798 False
799 False
Name: Primary_type, Length: 800, dtype: bool
localhost:8888/notebooks/decision tree on classification lab ML.ipynb 2/13
03/11/2023, 09:27 decision tree on classification lab ML - Jupyter Notebook
In [10]: #to extract grass type pokemon, true values
poke[ poke['Primary_type']== 'Grass']
Out[10]:
S
# Name Primary_type Secondary_type Total HP Attack Defense
At
0 1 Bulbasaur Grass Poison 318 45 49 49 6
1 2 Ivysaur Grass Poison 405 60 62 63 8
2 3 Venusaur Grass Poison 525 80 82 83 10
(https://getlin
VenusaurMega
3 3 Grass Poison 625 80 100 123 12
Venusaur
48 43 Oddish Grass Poison 320 45 50 55 7
... ... ... ... ... ... ... ... ...
718 650 Chespin Grass NaN 313 56 61 65 4
719 651 Quilladin Grass NaN 405 61 78 95 5
720 652 Chesnaught Grass Fighting 530 88 107 122 7
740 672 Skiddo Grass NaN 350 66 65 48 6
741 673 Gogoat Grass NaN 531 123 100 62 9
70 rows × 13 columns
In [11]: #store in new object called grass_pokemon
grass_pokemon = poke[ poke['Primary_type']== 'Grass']
In [12]: grass_pokemon.head()
Out[12]:
Sp.
# Name Primary_type Secondary_type Total HP Attack Defense
Atk
0 1 Bulbasaur Grass Poison 318 45 49 49 65
1 2 Ivysaur Grass Poison 405 60 62 63 80
2 3 Venusaur Grass Poison 525 80 82 83 100
VenusaurMega
3 3 Grass Poison 625 80 100 123 122
Venusaur
48 43 Oddish Grass Poison 320 45 50 55 75
In [13]: water_pokemon = poke[ poke['Primary_type']== 'Water']
localhost:8888/notebooks/decision tree on classification lab ML.ipynb 3/13
03/11/2023, 09:27 decision tree on classification lab ML - Jupyter Notebook
In [14]: water_pokemon.head()
Out[14]:
Sp.
# Name Primary_type Secondary_type Total HP Attack Defense
Atk
9 7 Squirtle Water NaN 314 44 48 65 50
10 8 Wartortle Water NaN 405 59 63 80 65
11 9 Blastoise Water NaN 530 79 83 100 85
BlastoiseMega
12 9 Water NaN 630 79 103 120 135
Blastoise
(https://getlin
59 54 Psyduck Water NaN 320 50 52 48 65
In [15]: fire_pokemon = poke[ poke['Primary_type']== 'Fire']
In [16]: fire_pokemon.head()
Out[16]:
Sp. S
# Name Primary_type Secondary_type Total HP Attack Defense
Atk D
4 4 Charmander Fire NaN 309 39 52 43 60
5 5 Charmeleon Fire NaN 405 58 64 58 80
6 6 Charizard Fire Flying 534 78 84 78 109
CharizardMega
7 6 Fire Dragon 634 78 130 111 130
Charizard X
CharizardMega
8 6 Fire Flying 634 78 104 78 159 1
Charizard Y
In [17]: poke.shape
Out[17]: (800, 13)
In [18]: grass_pokemon.shape
Out[18]: (70, 13)
In [19]: fire_pokemon.shape
Out[19]: (52, 13)
In [20]: water_pokemon.shape
Out[20]: (112, 13)
localhost:8888/notebooks/decision tree on classification lab ML.ipynb 4/13
03/11/2023, 09:27 decision tree on classification lab ML - Jupyter Notebook
In [21]: # % of all 3 types
(112+70+52)/800
Out[21]: 0.2925
In [22]: grass_pokemon.head()
Out[22]:
Sp.
# Name Primary_type Secondary_type Total HP Attack Defense
Atk
0 1 Bulbasaur Grass Poison 318 45 49 49 65
(https://getlin
1 2 Ivysaur Grass Poison 405 60 62 63 80
2 3 Venusaur Grass Poison 525 80 82 83 100
VenusaurMega
3 3 Grass Poison 625 80 100 123 122
Venusaur
48 43 Oddish Grass Poison 320 45 50 55 75
In [36]: #to understand speed distribution
sns.histplot(grass_pokemon['Speed'])
plt.show()
localhost:8888/notebooks/decision tree on classification lab ML.ipynb 5/13
03/11/2023, 09:27 decision tree on classification lab ML - Jupyter Notebook
In [37]: sns.histplot(grass_pokemon['Sp. Atk'])
plt.show()
(https://getlin
In [38]: sns.histplot(grass_pokemon['Sp. Def'])
plt.show()
localhost:8888/notebooks/decision tree on classification lab ML.ipynb 6/13
03/11/2023, 09:27 decision tree on classification lab ML - Jupyter Notebook
In [30]: grass_pokemon.describe()
Out[30]:
# Total HP Attack Defense Sp. Atk Sp.
count 70.000000 70.000000 70.000000 70.000000 70.000000 70.000000 70.000
mean 344.871429 421.142857 67.271429 73.214286 70.800000 77.500000 70.428
std 200.264385 106.650626 19.516564 25.380520 24.485192 27.244864 21.446
min 1.000000 180.000000 30.000000 27.000000 30.000000 24.000000 30.000
25% 187.250000 318.500000 51.250000 55.000000 50.000000 57.000000 55.000
(https://getlin
50% 372.000000 430.000000 65.500000 70.000000 66.000000 75.000000 66.000
75% 496.750000 497.000000 75.000000 93.500000 84.500000 99.500000 85.000
max 673.000000 630.000000 123.000000 132.000000 131.000000 145.000000 129.000
In [31]: water_pokemon.head()
Out[31]:
Sp.
# Name Primary_type Secondary_type Total HP Attack Defense
Atk
9 7 Squirtle Water NaN 314 44 48 65 50
10 8 Wartortle Water NaN 405 59 63 80 65
11 9 Blastoise Water NaN 530 79 83 100 85
BlastoiseMega
12 9 Water NaN 630 79 103 120 135
Blastoise
59 54 Psyduck Water NaN 320 50 52 48 65
In [35]: sns.histplot(water_pokemon['Speed'])
plt.show()
localhost:8888/notebooks/decision tree on classification lab ML.ipynb 7/13
03/11/2023, 09:27 decision tree on classification lab ML - Jupyter Notebook
In [39]: sns.histplot(water_pokemon['Sp. Atk'])
plt.show()
(https://getlin
In [40]: sns.histplot(water_pokemon['Sp. Def'])
plt.show()
In [41]: water_pokemon.describe()
Out[41]:
# Total HP Attack Defense Sp. Atk Sp.
count 112.000000 112.000000 112.000000 112.000000 112.000000 112.000000 112.000
mean 303.089286 430.455357 72.062500 74.151786 72.946429 74.812500 70.517
std 188.440807 113.188266 27.487026 28.377192 27.773809 29.030128 28.460
min 7.000000 200.000000 20.000000 10.000000 20.000000 10.000000 20.000
25% 130.000000 328.750000 52.250000 53.000000 54.500000 55.000000 50.000
50% 275.000000 455.000000 70.000000 72.000000 70.000000 70.000000 65.000
75% 456.250000 502.250000 90.250000 92.000000 88.500000 90.500000 89.250
max 693.000000 770.000000 170.000000 155.000000 180.000000 180.000000 160.000
localhost:8888/notebooks/decision tree on classification lab ML.ipynb 8/13
03/11/2023, 09:27 decision tree on classification lab ML - Jupyter Notebook
In [42]: grass_pokemon.describe()
Out[42]:
# Total HP Attack Defense Sp. Atk Sp.
count 70.000000 70.000000 70.000000 70.000000 70.000000 70.000000 70.000
mean 344.871429 421.142857 67.271429 73.214286 70.800000 77.500000 70.428
std 200.264385 106.650626 19.516564 25.380520 24.485192 27.244864 21.446
min 1.000000 180.000000 30.000000 27.000000 30.000000 24.000000 30.000
25% 187.250000 318.500000 51.250000 55.000000 50.000000 57.000000 55.000
(https://getlin
50% 372.000000 430.000000 65.500000 70.000000 66.000000 75.000000 66.000
75% 496.750000 497.000000 75.000000 93.500000 84.500000 99.500000 85.000
max 673.000000 630.000000 123.000000 132.000000 131.000000 145.000000 129.000
In [43]: sns.histplot(fire_pokemon['Speed'], color='Orange')
plt.show()
In [44]: sns.histplot(fire_pokemon['Sp. Atk'], color='Orange')
plt.show()
localhost:8888/notebooks/decision tree on classification lab ML.ipynb 9/13
03/11/2023, 09:27 decision tree on classification lab ML - Jupyter Notebook
In [45]: sns.histplot(fire_pokemon['Sp. Def'], color='Orange')
plt.show()
(https://getlin
In [46]: fire_pokemon.describe()
Out[46]:
# Total HP Attack Defense Sp. Atk Sp.
count 52.000000 52.000000 52.000000 52.000000 52.000000 52.000000 52.000
mean 327.403846 458.076923 69.903846 84.769231 67.769231 88.980769 72.211
std 226.262840 109.760496 19.404123 28.769275 23.658200 30.042121 22.619
min 4.000000 250.000000 38.000000 30.000000 37.000000 15.000000 40.000
25% 143.500000 368.000000 58.000000 62.250000 51.000000 70.000000 54.750
50% 289.500000 482.000000 70.000000 84.500000 64.000000 85.000000 67.500
75% 513.250000 534.000000 80.000000 101.000000 78.000000 109.000000 85.000
max 721.000000 680.000000 115.000000 160.000000 140.000000 159.000000 154.000
In [47]: water_pokemon.describe()
Out[47]:
# Total HP Attack Defense Sp. Atk Sp.
count 112.000000 112.000000 112.000000 112.000000 112.000000 112.000000 112.000
mean 303.089286 430.455357 72.062500 74.151786 72.946429 74.812500 70.517
std 188.440807 113.188266 27.487026 28.377192 27.773809 29.030128 28.460
min 7.000000 200.000000 20.000000 10.000000 20.000000 10.000000 20.000
25% 130.000000 328.750000 52.250000 53.000000 54.500000 55.000000 50.000
50% 275.000000 455.000000 70.000000 72.000000 70.000000 70.000000 65.000
75% 456.250000 502.250000 90.250000 92.000000 88.500000 90.500000 89.250
max 693.000000 770.000000 170.000000 155.000000 180.000000 180.000000 160.000
localhost:8888/notebooks/decision tree on classification lab ML.ipynb 10/13
03/11/2023, 09:27 decision tree on classification lab ML - Jupyter Notebook
In [48]: #sp def, sp attack and speed both higher in fire pokemon. mean also hig
#water pokemon loses from fire pokemon
#till now data analysis was done
In [49]: #now apply decision tree algorithm
(https://getlin
poke.head()
Out[49]:
Sp. S
# Name Primary_type Secondary_type Total HP Attack Defense
Atk D
0 1 Bulbasaur Grass Poison 318 45 49 49 65 6
1 2 Ivysaur Grass Poison 405 60 62 63 80 8
2 3 Venusaur Grass Poison 525 80 82 83 100 10
VenusaurMega
3 3 Grass Poison 625 80 100 123 122 12
Venusaur
4 4 Charmander Fire NaN 309 39 52 43 60 5
In [50]: poke['Legendary'].value_counts()
Out[50]: False 735
True 65
Name: Legendary, dtype: int64
In [51]: #if we are comparing 2 features , presense of one fetaure is more than
#non legandary pokemons presence is much more than legandary
65/800
Out[51]: 0.08125
In [52]: # 8 % presence of legandary pokemons
#to know relationship between speed and legandary column
#means classifying this pokemon is legandary or not on the basis of the
#so here we are using decision tree classifier
#so first divide dependent(legandary) variable and independent variable
localhost:8888/notebooks/decision tree on classification lab ML.ipynb 11/13
03/11/2023, 09:27 decision tree on classification lab ML - Jupyter Notebook
In [54]: x = poke[['Speed']]
y = poke[['Legendary']]
In [55]: #divide data in traning and testing set and predict on test set
#for train and test set use sklearn
from sklearn.model_selection import train_test_split
x_train, x_test,y_train, y_test = train_test_split(x,y,test_size=0.3)
#means 30 % observations in test set rest 70 % training set observat
(https://getlin
In [56]: #to make decison tree classifier
from sklearn.tree import DecisionTreeClassifier
#make instance
dtc = DecisionTreeClassifier()
In [57]: #fit on train set
dtc.fit(x_train, y_train)
Out[57]: DecisionTreeClassifier()
In [58]: #to predict value
y_pred = dtc.predict(x_test)
In [61]: # to see difference in actual and predicted results
#to find accuracy import confusion matrix
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred)
Out[61]: array([[224, 1],
[ 15, 0]], dtype=int64)
In [62]: #finding accuracy
(217+0)/(217+0+22+1)
Out[62]: 0.9041666666666667
In [63]: #left diagonal shows correctly classified value and right diagonal show
#so i divide correctly classified values with all classified value so 9
#so implementation of decision tree done
localhost:8888/notebooks/decision tree on classification lab ML.ipynb 12/13
03/11/2023, 09:27 decision tree on classification lab ML - Jupyter Notebook
In [ ]:
(https://getlin
localhost:8888/notebooks/decision tree on classification lab ML.ipynb 13/13