Scalers

UNIVERSIDAD DE GUADALAJARA
CENTRO UNIVERSITARIO DE CIENCIAS EXACTAS E

INGENIERÍAS
Seminario de Inteligencia Artificial II.
Reporte de práctica
Nombre del alumno: Izmael Guzman Murguia

Profesor: Erasmo Gabriel Martı́nez Soltero
Tı́tulo de la práctica: Scalers.
Fecha: 28 marzo 2023
1
Código para Scalers con dataset de heart.
import numpy as np
import pandas as pd
from sklearn . mo de l _s el ec t io n import t r ai n _ t e s t _ s p l i t

from sklearn . neu ral_net work import MLPClassifier
from sklearn . metrics import c o n f u s i o n _ m at r i x
from sklearn . metrics import c l a s s i f i c a t i o n _ r e p o r t
from sklearn . preprocessing import MinMaxScaler
from sklearn . preprocessing import St andardS caler
import numpy as np
import matplotlib . pyplot as plt
import pandas as pd
from sklearn . ne ural_net work import MLPClassifier
from sklearn import metrics
from sklearn . mo de l _s el ec t io n import t r a in _ t e s t _ s p l i t
import seaborn as sns
df = pd . read_csv ( ’ heart . csv ’)
# Clean data
filtro = df [ ’ sex ’] ! = 0
cp = df [ ’ sex ’]
cpf = cp [ filtro ]
min = np . amin ( cpf )
df [ ’ sex ’] . replace ( to_replace = 0 , value = min / 2 , inplace = True )
filtro = df [ ’ cp ’] ! = 0
cp = df [ ’ cp ’]
cpf = cp [ filtro ]
df [ ’ cp ’] . replace ( to_replace = 0 , value = min / 2 , inplace = True )
filtro = df [ ’ trestbps ’] ! = 0
cp = df [ ’ trestbps ’]
cpf = cp [ filtro ]
df [ ’ trestbps ’] . replace ( to_replace = 0 , value = min / 2 , inplace = True )
filtro = df [ ’ fbs ’] ! = 0
cp = df [ ’ fbs ’]
cpf = cp [ filtro ]
df [ ’ fbs ’] . replace ( to_replace = 0 , value = min / 2 , inplace = True )
filtro = df [ ’ restecg ’] ! = 0
cp = df [ ’ restecg ’]
cpf = cp [ filtro ]
df [ ’ restecg ’] . replace ( to_replace = 0 , value = min / 2 , inplace = True )
filtro = df [ ’ exang ’] ! = 0
cp = df [ ’ exang ’]
cpf = cp [ filtro ]
df [ ’ exang ’] . replace ( to_replace = 0 , value = min / 2 , inplace = True )
filtro = df [ ’ oldpeak ’] ! = 0
cp = df [ ’ oldpeak ’]
cpf = cp [ filtro ]
df [ ’ oldpeak ’] . replace ( to_replace = 0 , value = min / 2 , inplace = True )
filtro = df [ ’ slope ’] ! = 0
cp = df [ ’ slope ’]
cpf = cp [ filtro ]
df [ ’ slope ’] . replace ( to_replace = 0 , value = min / 2 , inplace = True )
filtro = df [ ’ ca ’] ! = 0
cp = df [ ’ ca ’]
cpf = cp [ filtro ]
df [ ’ ca ’] . replace ( to_replace = 0 , value = min / 2 , inplace = True )
filtro = df [ ’ thal ’] ! = 0
cp = df [ ’ thal ’]
2
cpf = cp [ filtro ]
df [ ’ thal ’] . replace ( to_replace = 0 , value = min / 2 , inplace = True )
# P l o t e a m o s la c a n t i d a d de p e r s o n a s con p r o b l e m a s del corazon ( 1 ) y los que no

tienen ( 0 )
filtro = df [ ’ target ’] ! = 0
cp = df [ ’ target ’]
cpf = cp [ filtro ]
ax = df [ ’ target ’] . value_counts () . plot ( kind = ’ bar ’)
df [ ’ target ’] . replace ( to_replace = 0 , value = 2 , inplace = True )
y = df . iloc [ : , 13 ]
x = df . iloc [ : ,0 : 12 ]
x . hist ( column = [ ’ age ’ , ’ sex ’ , ’ cp ’ , ’ trestbps ’ , ’ chol ’ , ’ fbs ’ , ’ restecg ’ , ’ thalach
’ , ’ exang ’ , ’ oldpeak ’ , ’ slope ’ , ’ ca ’] )
# * ** NEVER *
xtrain , xtest , ytrain , ytest = tr a i n _ t e s t _ s p l i t (x , y , test_size = 0 . 3 )
model = MLPClassifier ( alpha = 0 . 01 , max_iter = 1200 )
model . fit ( xtrain , ytrain )
print ( ’\ n \ n \ n \ n \ n \ n \ n \ n * NEVER * ’)
print ( ’ Train : ’ , model . score ( xtrain , ytrain ) )
print ( ’ Test : ’ , model . score ( xtest , ytest ) )
ytestpred = model . predict ( xtest )
print ( ’ Cla ssificat ion report : \ n ’ , c l a s s i f i c a t i o n _ r e p o r t ( ytest , ytestpred ) )
class_names = [1 , 2 ]
plt . show ()
cm = metrics . c on f u s i o n _ m a t r i x ( ytest , ytestpred )
disp = metrics . C o n f u s i o n M a t r i x D i s p l a y ( c o n f u s io n _ m a t r i x = cm , display _labels =
class_names )
disp . plot ()
plt . show ()
# * ** MinMax Scaler *
# creamos los e s c a l a d o r e s
scalerMM = MinMaxScaler () # Min Max Scaler
# Para e s t a n d a r i z a r
scaledXDFMM = x . copy () # Copy for Min Max Scaler
# escalar los valores de e n t r a d a s

scaledXMM = scalerMM . fit_transform ( x )
scaledXDFMM [ x . columns ] = scalerMM . fit_transform ( x ) # E s t a n d a r i z a los valores con nuevo
scaledXDFMM . hist ( column = [ ’ age ’ , ’ sex ’ , ’ cp ’ , ’ trestbps ’ , ’ chol ’ , ’ fbs ’ , ’ restecg ’
, ’ thalach ’ , ’ exang ’ , ’ oldpeak ’ , ’ slope ’ ,
’ ca ’] )
xtrain2 , xtest2 , ytrain2 , ytest2 = t r a i n _ t e s t _ s p l i t ( scaledXDFMM , y , test_size = 0 .

3)
# creamos el modelo de un P e r c e p t r o n m u l t i c a p a
model2 = MLPClassifier ( alpha = 0 . 01 , max_iter = 1200 )

#a entrenar
model2 . fit ( xtrain2 , ytrain2 )
# Aplicar metrica al modelo
print ( ’* MinMax Scaler * ’)

print ( ’ Train2 : ’ , model2 . score ( xtrain2 , ytrain2 ) )
print ( ’ Test2 : ’ , model2 . score ( xtest2 , ytest2 ) )
# sacar la p r e d i c c i o n en la parte del test
ytestpred2 = model2 . predict ( xtest2 )
3
# sacar el reporte de c l a s i f i c a c i o n
print ( ’ Cla ssificat ion report2 : \ n ’ , c l a s s i f i c a t i o n _ r e p o r t ( ytest2 , ytestpred2 ) )
class_names2 = [1 , 2 ]
plt . show ()
cm2 = metrics . c o n f u s i o n _ m a t ri x ( ytest2 , ytestpred2 )
disp2 = metrics . C o n f u s i o n M a t r i x D i s p l a y ( c o n f u s i o n _ m a t r i x = cm2 , display _labels =
class_names2 )
disp2 . plot ()
plt . show ()
# * ** Standar scaler *
scalerS = Stan dardScal er () # Standar Scaler
scaledXDFS = x . copy () # Copy for Standar Scaler
scaledXS = scalerS . fit_transform ( x )

scaledXDFS [ x . columns ] = scalerS . fit_transform ( x )
scaledXDFS . hist ( column = [ ’ age ’ , ’ sex ’ , ’ cp ’ , ’ trestbps ’ , ’ chol ’ , ’ fbs ’ , ’ restecg ’ ,
’ thalach ’ , ’ exang ’ , ’ oldpeak ’ , ’ slope ’ ,
’ ca ’] )
# S P l i t t i n g into train and test
xtrain3 , xtest3 , ytrain3 , ytest3 = t r a i n _ t e s t _ s p l i t ( scaledXDFS , y , test_size = 0 . 3

)

#a entrenar
print ( ’* Standar scaler * ’)

plt . show ()
class_names3 )
disp3 . plot ()
plt . show ()
# * ** Minmax - Standar *
scalerMM2 = MinMaxScaler () # Min Max Scaler
4
scaledXDFMM2 = x . copy () # Copy for Min Max Scaler

scaledXMM2 = scalerMM2 . fit_transform ( x )
scaledXDFMM2 [ x . columns ] = scalerMM2 . fit_transform ( x ) # E s t a n d a r i z a los valores con
nuevo
scaledXDFMM2 . hist ( column = [ ’ age ’ , ’ sex ’ , ’ cp ’ , ’ trestbps ’ , ’ chol ’ , ’ fbs ’ , ’ restecg
’ , ’ thalach ’ , ’ exang ’ , ’ oldpeak ’ , ’ slope ’
, ’ ca ’] )
scalerS2 = Standar dScaler () # Standar Scaler

scaledXDFS2 = scaledXDFMM2 . copy () # Copy for Standar Scaler
scaledXS2 = scalerS2 . fit_transform ( scaledXDFS2 )

scaledXDFS2 [ scaledXDFS2 . columns ] = scalerS2 . fit_transform ( scaledXDFS2 )
scaledXDFS2 . hist ( column = [ ’ age ’ , ’ sex ’ , ’ cp ’ , ’ trestbps ’ , ’ chol ’ , ’ fbs ’ , ’ restecg ’
’ ca ’] )
# ################################################################
xtrain4 , xtest4 , ytrain4 , ytest4 = t r a i n _ t e s t _ s p l i t ( scaledXDFS2 , y , test_size = 0 .

3)

#a entrenar
print ( ’* Minmax - Standar * ’)

plt . show ()
class_names4 )
disp4 . plot ()
plt . show ()
# * ** Standar - MinMax *
scalerS5 = Standar dScaler ()
scaledXDFS5 = x . copy ()

scaledXS5 = scalerS5 . fit_transform ( scaledXDFS5 )
scaledXDFS5 [ scaledXDFS5 . columns ] = scalerS5 . fit_transform ( scaledXDFS5 )
5
scaledXDFS5 . hist ( column = [ ’ age ’ , ’ sex ’ , ’ cp ’ , ’ trestbps ’ , ’ chol ’ , ’ fbs ’ , ’ restecg ’
’ ca ’] )
scalerMM5 = MinMaxScaler ()
scaledXDFMM5 = scaledXDFS5 . copy ()
scaledXMM5 = scalerMM5 . fit_transform ( scaledXDFMM5 )

scaledXDFMM5 [ scaledXDFMM5 . columns ] = scalerMM5 . fit_transform ( scaledXDFMM5 ) #
E s t a n d a r i z a los valores con nuevo
scaledXDFMM5 . hist ( column = [ ’ age ’ , ’ sex ’ , ’ cp ’ , ’ trestbps ’ , ’ chol ’ , ’ fbs ’ , ’ restecg
’ , ’ thalach ’ , ’ exang ’ , ’ oldpeak ’ , ’ slope ’
, ’ ca ’] )
# ################################################################
xtrain5 , xtest5 , ytrain5 , ytest5 = t r a i n _ t e s t _ s p l i t ( scaledXDFMM5 , y , test_size = 0

.3)

#a entrenar
print ( ’* Standar - MinMax * ’)

plt . show ()
class_names5 )
disp5 . plot ()
plt . show ()
# Hacerlo con heart con varios modelos
# 5 modelos
# 1 Sin e s c a l a d o r e s
# 2 MinMax Scaler
# 3 Standar scaler
# 4 Minmax - Standar
# 5 Standar MinMax
6
Data
160
140
120
100
80
60
40
20
0
1
0
Figure 1: Dataset heart.csv, cantidad de casos positivos y negativos representados por 1 y 0.
7
Report sin nada
Figure 2: Report para heart.csv sin nada.
8
Data sin nada
age sex cp
150
60 200
150 100
40
100
20 50
50
0 0 0
30 40 50 60 70 0.5 0.6 0.7 0.8 0.9 1.0 0.5 1.0 1.5 2.0 2.5 3.0
trestbps chol fbs
100
60 200
75
40
50
100
20 25
0 0 0
100 120 140 160 180 200 200 300 400 500 0.5 0.6 0.7 0.8 0.9 1.0
restecg thalach exang
150 80
200
60 150
100
40 100
50
20 50
0 0 0
0.6 0.8 1.0 1.2 1.4 1.6 1.8 2.0 80 100 120 140 160 180 200 0.5 0.6 0.7 0.8 0.9 1.0
oldpeak slope ca
150
150
100 100
100
50 50
50
0 0 0
0 1 2 3 4 5 6 0.6 0.8 1.0 1.2 1.4 1.6 1.8 2.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0
Figure 3: Data para heart.csv sin nada.
9
Matriz de confucion sin nada
50
40
1 50 6
30
True label
20
2 4 31
10
1 2
Predicted label
Figure 4: Matriz de confucion para heart.csv sin nada.
10
Report con MinMax Scaler.
Figure 5: Report para heart.csv con MinMax Scaler.
11
Data con MinMax Scaler
age sex cp
150
60 200
150 100
40
100
20 50
50
0 0 0
0.0 0.2 0.4 0.6 0.8 1.0 0.0 0.2 0.4 0.6 0.8 1.0 0.0 0.2 0.4 0.6 0.8 1.0
trestbps chol fbs
100
60 200
75
40
50
100
20 25
0 0 0
0.0 0.2 0.4 0.6 0.8 1.0 0.0 0.2 0.4 0.6 0.8 1.0 0.0 0.2 0.4 0.6 0.8 1.0
150 80
200
60 150
100
40 100
50
20 50
0 0 0
0.0 0.2 0.4 0.6 0.8 1.0 0.0 0.2 0.4 0.6 0.8 1.0 0.0 0.2 0.4 0.6 0.8 1.0
oldpeak slope ca
150
150
100 100
100
50 50
50
0 0 0
0.0 0.2 0.4 0.6 0.8 1.0 0.0 0.2 0.4 0.6 0.8 1.0 0.0 0.2 0.4 0.6 0.8 1.0
Figure 6: Data para heart.csv con MinMax Scaler.
12
Matriz de confucion con MinMax Scaler.
40
35
1 41 20
30
25
True label
20
15
2 3 27
10
1 2
Predicted label
Figure 7: Matriz de confucion para heart.csv con MinMax Scaler.
13
Report con Standar scaler.
Figure 8: Report para heart.csv con Standar scaler.
14
Data con Standar scaler
age sex cp
150
60 200
150 100
40
100
20 50
50
0 0 0
−3 −2 −1 0 1 2 −1.5 −1.0 −0.5 0.0 0.5 −1.0 −0.5 0.0 0.5 1.0 1.5 2.0
trestbps chol fbs
100
60 200
75
40
50
100
20 25
0 0 0
−2 −1 0 1 2 3 4 −2 0 2 4 6 −0.5 0.0 0.5 1.0 1.5 2.0 2.5
150 80
200
60 150
100
40 100
50
20 50
0 0 0
−1 0 1 2 3 4 −3 −2 −1 0 1 2 −0.5 0.0 0.5 1.0 1.5
oldpeak slope ca
150
150
100 100
100
50 50
50
0 0 0
−1 0 1 2 3 4 −1.5 −1.0 −0.5 0.0 0.5 1.0 0 1 2 3
Figure 9: Data para heart.csv con Standar scaler.
15
Matriz de confucion con Standar scaler.
35
30
1 35 8
25
True label
20
2 14 34 15
10
1 2
Predicted label
Figure 10: Matriz de confucion para heart.csv con Standar scaler.
16
Report con Minmax - Standar.
Figure 11: Report para heart.csv con Minmax - Standar.
17
Data con Minmax - Standar
age sex cp
150
60 200
150 100
40
100
20 50
50
0 0 0
−3 −2 −1 0 1 2 −1.5 −1.0 −0.5 0.0 0.5 −1.0 −0.5 0.0 0.5 1.0 1.5 2.0
trestbps chol fbs
100
60 200
75
40
50
100
20 25
0 0 0
−2 −1 0 1 2 3 4 −2 0 2 4 6 −0.5 0.0 0.5 1.0 1.5 2.0 2.5
150 80
200
60 150
100
40 100
50
20 50
0 0 0
−1 0 1 2 3 4 −3 −2 −1 0 1 2 −0.5 0.0 0.5 1.0 1.5
oldpeak slope ca
150
150
100 100
100
50 50
50
0 0 0
−1 0 1 2 3 4 −1.5 −1.0 −0.5 0.0 0.5 1.0 0 1 2 3
Figure 12: Data para heart.csv con Minmax - Standar.
18
Matriz de confucion con Minmax - Standar.
35
1 39 10
30
True label 25
20
2 15 27
15
10
1 2
Predicted label
Figure 13: Matriz de confucion para heart.csv con Minmax - Standar.
19
Report con Standar - MinMax.
Figure 14: Report para heart.csv con Standar - MinMax.
20
Data con Standar - MinMax
age sex cp
150
60 200
150 100
40
100
20 50
50
0 0 0
0.0 0.2 0.4 0.6 0.8 1.0 0.0 0.2 0.4 0.6 0.8 1.0 0.0 0.2 0.4 0.6 0.8 1.0
trestbps chol fbs
100
60 200
75
40
50
100
20 25
0 0 0
0.0 0.2 0.4 0.6 0.8 1.0 0.0 0.2 0.4 0.6 0.8 1.0 0.0 0.2 0.4 0.6 0.8 1.0
150 80
200
60 150
100
40 100
50
20 50
0 0 0
0.0 0.2 0.4 0.6 0.8 1.0 0.0 0.2 0.4 0.6 0.8 1.0 0.0 0.2 0.4 0.6 0.8 1.0
oldpeak slope ca
150
150
100 100
100
50 50
50
0 0 0
0.0 0.2 0.4 0.6 0.8 1.0 0.0 0.2 0.4 0.6 0.8 1.0 0.0 0.2 0.4 0.6 0.8 1.0
Figure 15: Data para heart.csv con Standar - MinMax.
21
Matriz de confucion con Standar - MinMax.
35
1 38 5 30
25
True label
20
15
2 16 32
10
5
1 2
Predicted label
Figure 16: Matriz de confucion para heart.csv con Standar - MinMax.
22
Conclusión
En general utilizar estas técnicas puede llegar a tener unos mejores resultados, sin embargo
es importante considerar que en ciertos casos cuando existan valores atı́picos y estos son de
suma importancia el aprendizaje puede llegar a verse afectado, no obstante esto no es lo común,
generalmente los problemas se presentan cuando existe una gran variedad de datos, en este
sentido y para el caso de el dataset de heart.csv tenemos un mejor train cuando se le aplica Min
Max Y Standar en ese orden llegando a una precisión de 99, mientras que en el test puede notarse
una disminución significativa comparada con un normal es decir sin aplicar Scalers, tenemos de
igual manera un valor bastante bueno para train con Standar Scaler con un valor de 0.97 pero
en el test tan solo se tiene un 0.75, quizá en este sentido se presente un sobreentrenamiento o
es posible que el standar scaler haya afectado los datos, Min Max scaler presenta un train de
0.83 mientras que en test tan solo de 0.74, finalmente tenemos el mejor caso para el test pero
el peor para el train, no aplicar ningún Scaler da como resultado un train de 0.76 pero el test
es el mejor de todos los casos con un 0.89, es importante mencionar que todos los modelos son
entrenados con los mismos hiperparametros es decir, con la misma cantidad de datos de train y
test, el mismo alpha ası́ como la misma cantidad de iteraciones.
23

Scalers

Cargado por

Copyright:

Formatos disponibles

Scalers

Cargado por

Información del documento

Derechos de autor

Formatos disponibles

Compartir este documento

Compartir o incrustar documentos

Opciones para compartir

¿Le pareció útil este documento?

¿Este contenido es inapropiado?

Copyright:

Formatos disponibles

Scalers

Cargado por

Copyright:

Formatos disponibles

UNIVERSIDAD DE GUADALAJARA

CENTRO UNIVERSITARIO DE CIENCIAS EXACTAS E

Nombre del alumno: Izmael Guzman Murguia

from sklearn . mo de l _s el ec t io n import t r ai n _ t e s t _ s p l i t

df = pd . read_csv ( ’ heart . csv ’)

# P l o t e a m o s la c a n t i d a d de p e r s o n a s con p r o b l e m a s del corazon ( 1 ) y los que no

ax = df [ ’ target ’] . value_counts () . plot ( kind = ’ bar ’)

df [ ’ target ’] . replace ( to_replace = 0 , value = 2 , inplace = True )

# escalar los valores de e n t r a d a s

xtrain2 , xtest2 , ytrain2 , ytest2 = t r a i n _ t e s t _ s p l i t ( scaledXDFMM , y , test_size = 0 .

model2 = MLPClassifier ( alpha = 0 . 01 , max_iter = 1200 )

model2 . fit ( xtrain2 , ytrain2 )

# Aplicar metrica al modelo

print ( ’* MinMax Scaler * ’)

ytestpred2 = model2 . predict ( xtest2 )

print ( ’ Cla ssificat ion report2 : \ n ’ , c l a s s i f i c a t i o n _ r e p o r t ( ytest2 , ytestpred2 ) )

# escalar los valores de e n t r a d a s

scaledXS = scalerS . fit_transform ( x )

# S P l i t t i n g into train and test

xtrain3 , xtest3 , ytrain3 , ytest3 = t r a i n _ t e s t _ s p l i t ( scaledXDFS , y , test_size = 0 . 3

model3 = MLPClassifier ( alpha = 0 . 01 , max_iter = 1200 )

model3 . fit ( xtrain3 , ytrain3 )

# Aplicar metrica al modelo

print ( ’* Standar scaler * ’)

ytestpred3 = model3 . predict ( xtest3 )

print ( ’ Cla ssificat ion report3 : \ n ’ , c l a s s i f i c a t i o n _ r e p o r t ( ytest3 , ytestpred3 ) )

# escalar los valores de e n t r a d a s

# S P l i t t i n g into train and test

scalerS2 = Standar dScaler () # Standar Scaler

scaledXS2 = scalerS2 . fit_transform ( scaledXDFS2 )

xtrain4 , xtest4 , ytrain4 , ytest4 = t r a i n _ t e s t _ s p l i t ( scaledXDFS2 , y , test_size = 0 .

model4 = MLPClassifier ( alpha = 0 . 01 , max_iter = 1200 )

model4 . fit ( xtrain4 , ytrain4 )

# Aplicar metrica al modelo

print ( ’* Minmax - Standar * ’)

ytestpred4 = model4 . predict ( xtest4 )

print ( ’ Cla ssificat ion report4 : \ n ’ , c l a s s i f i c a t i o n _ r e p o r t ( ytest4 , ytestpred4 ) )

# escalar los valores de e n t r a d a s

# S P l i t t i n g into train and test

scaledXMM5 = scalerMM5 . fit_transform ( scaledXDFMM5 )

xtrain5 , xtest5 , ytrain5 , ytest5 = t r a i n _ t e s t _ s p l i t ( scaledXDFMM5 , y , test_size = 0

model5 = MLPClassifier ( alpha = 0 . 01 , max_iter = 1200 )

model5 . fit ( xtrain5 , ytrain5 )

# Aplicar metrica al modelo

print ( ’* Standar - MinMax * ’)

ytestpred5 = model5 . predict ( xtest5 )

print ( ’ Cla ssificat ion report4 : \ n ’ , c l a s s i f i c a t i o n _ r e p o r t ( ytest5 , ytestpred5 ) )

Figure 2: Report para heart.csv sin nada.

Figure 3: Data para heart.csv sin nada.

Figure 4: Matriz de confucion para heart.csv sin nada.

Figure 5: Report para heart.csv con MinMax Scaler.

Figure 6: Data para heart.csv con MinMax Scaler.

Figure 7: Matriz de confucion para heart.csv con MinMax Scaler.

Figure 8: Report para heart.csv con Standar scaler.

Figure 9: Data para heart.csv con Standar scaler.

Figure 10: Matriz de confucion para heart.csv con Standar scaler.

Figure 11: Report para heart.csv con Minmax - Standar.

Figure 12: Data para heart.csv con Minmax - Standar.

Figure 13: Matriz de confucion para heart.csv con Minmax - Standar.

Figure 14: Report para heart.csv con Standar - MinMax.