Regresión Logistica

UNIVERSIDAD DE GUADALAJARA
CENTRO UNIVERSITARIO DE CIENCIAS EXACTAS E

INGENIERÍAS
Seminario de Inteligencia Artificial II.
Reporte de práctica
Nombre del alumno: Izmael Guzman Murguia

Profesor: Erasmo Gabriel Martı́nez Soltero
Tı́tulo de la práctica: Regresión logistica
Fecha: 24 febrero 2023
1
Código entrenar un modelo con una regresión polinómica
con el dataset del para estimar el precio de venta de carros
# -* - coding : utf - 8 -* -
"""
@author : Izmael Guzman Murguia
"""
import pandas as pd
import numpy as np
from sklearn import metrics
import matplotlib . pyplot as plt
from sklearn . preprocessing import P o l y n o m i a l F e a t u r e s
from sklearn . linear_model import L i n e a r R e g r e s si o n
from sklearn . mo de l _s el ec t io n import t r ai n _ t e s t _ s p l i t
df = pd . read_csv ( ’/ content / drive / MyDrive / Colab Notebooks / IA / car_data . csv ’)
# nos dice las c o l u m n a s , cuantos , tipo de dato
df . info ()
# datos e s t a d i s t i c o s
print ( df . describe () )
# las p r i m e r a s filas
print ( df . head () )
# Column Non - Null Count Dtype

# --- - - - - - - -------------- -----
# 0 Car_Name 301 non - null object
# 1 Year 301 non - null int64
# 2 S e l l i n g _ P r i c e 301 non - null float64
# 3 P r e s e n t _ P r i c e 301 non - null float64
# 4 Kms_Driven 301 non - null int64
# 5 Fuel_Type 301 non - null object
# 6 Seller_Type 301 non - null object
# 7 Transmission 301 non - null object
# 8 Owner 301 non - null int64
# c o n v e r t i r c a t e g o r i c o s a numeros porque pues no le puedo meter ’ male ’ o ’ female ’

df [ ’ Fuel_Type ’] . replace ( to_replace = ’ Petrol ’ , value =1 , inplace = True )
df [ ’ Fuel_Type ’] . replace ( to_replace = ’ Diesel ’ , value =2 , inplace = True )
df [ ’ Fuel_Type ’] . replace ( to_replace = ’ CNG ’ , value =3 , inplace = True )
df [ ’ Seller_Type ’] . replace ( to_replace = ’ Dealer ’ , value =1 , inplace = True )

df [ ’ Seller_Type ’] . replace ( to_replace = ’ Individual ’ , value =2 , inplace = True )
df [ ’ Transmission ’] . replace ( to_replace = ’ Manual ’ , value =1 , inplace = True )

df [ ’ Transmission ’] . replace ( to_replace = ’ Automatic ’ , value =2 , inplace = True )
df [ ’ Owner ’] . replace ( to_replace = ’0 ’ , value =1 , inplace = True )

df [ ’ Owner ’] . replace ( to_replace = ’1 ’ , value =2 , inplace = True )
df . info ()
y = df [ ’ Selling_Price ’]
X = df . drop ( columns = [ ’ Car_Name ’ , ’ Selling_Price ’] )
df = df . drop ( columns = [ ’ Car_Name ’ , ’ Selling_Price ’] )

df . info ()
X_train , X_test , y_train , y_test = t r a i n _ t e s t _ s p l i t (X , y , test_size = 0 . 33 ,

random_state = 42 )
deg = 2 # Define degree
poly_features = P o l y n o m i a l F e a t u r e s ( degree = deg , include_bias = False )
X_train_poly = poly_features . fit_transform ( X_train )
X_test_poly = poly_features . fit_transform ( X_test )
# Fiting
lin_reg = L i n e a r R e g re s s i o n ()
lin_reg . fit ( X_train_poly , y_train )
prediction = lin_reg . predict ( X_test_poly )
2
print ( " \ n \ n \ n * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )
print ( " *\ tPrecicion model : " , lin_reg . score ( X_train_poly , y_train ) ," \ t * " )
print ( " *\ tDegree : " ,deg , " \ t \ t \ t \ t * " )
print ( " *\ tCreated by : Izmael Guzman Murguia \ t * " )
print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )
3
Resultados
Figure 1: Precisión con regresión polinómica para el dataset card ata.csv.
4
Código Aplicar regresión logı́stica para estimar la proba-
bilidad de que alguien tenga cáncer de mama o no (aquı́
la salida y es ”diagnosis”, y hay que tener cuidado con las
columnas que no aportan información como por ejemplo id)
# import the l i b r e r i e s
import pandas as pd
from sklearn . metrics import accu racy_sco re
from sklearn . linear_model import L o g i s t i c R e g r e s s i o n
import numpy as np
from sklearn . pipeline import Pipeline
# Execute load dataset

df = pd . read_csv ( " / content / drive / MyDrive / Colab Notebooks / IA / breastCancer . csv " )
df . head ( 5 ) # show the five first r e g i s t e r s
# Show general r e g i s t e r s
df . info ()
# Test the no numeric column

df [ ’ diagnosis ’] . unique
# Renace no numeric to numeric Number

df [ ’ diagnosis ’] . replace ( to_replace = ’M ’ , value =1 , inplace = True )
df [ ’ diagnosis ’] . replace ( to_replace = ’B ’ , value =2 , inplace = True )
# Test the no numeric column

# df [ ’ d i a g n o s i s ’]. unique
# df = df . drop ( columns =[ ’ id ’,’ d i a g n o s i s ’,’ Unnamed : 32 ’])
# print (" - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")
# Unique if i need remove the columns no n e c e s a r i e s
df . info ()
# Select de output ( Y ) value

y = df . iloc [ : ,1 ]
# S e l e c t i o n of input values
X = df . iloc [ : ,2 : 31 ]
# S e p a r a t e the value train and value test ( 80 ) ( 20 )

X_train , X_test , y_train , y_test = t r a i n _ t e s t _ s p l i t (X ,y , test_size = 0 . 2 )
print ( " Size data of input on train : " , X_train . shape )

print ( " Size data of input test : " , X_test . shape )
print ( " Size data of output train " , y_train . shape )
print ( " Size data of result of test " , y_test . shape )
# R e g r e s i o n model l o g i s t i c
lr = L o g i s t i c R e g r e s s i o n ()
model = lr . fit ( X_train , y_train )
# P r e d i c t i o n whit probes dates

y_pred = model . predict ( X_test )
# Scuere model whit accuary

accuracy = accurac y_score ( y_test , y_pred )
# Scuere whit model

print ( " \ n \ n \ n \ n * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )
print ( " * " ,f ’ Model scuere : { accuracy : 0 . 1 % } ’ ," \ t \ t \ t * " )
print ( ’* Train : ’ , model . score ( X_train , y_train ) ," \ t \ t * " )
print ( ’* Test : ’ , model . score ( X_test , y_test ) ," \ t \ t * " )
print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * \ n \ n \ n \ n \ n " )
5
# draw the image
# plt . figure ( ’ p r o b a b i l i d a d ’)
# g = X_test
# p r e d i c t i o n = model . p r e d i c t _ p r o b a ( g )
# p r e d i c t i o n 1 = model . predict ( g )
# plt . plot ( X_train , y_train , ’. b ’)
# plt . plot ( X_test , y_test , ’. r ’)
# plt . xlabel ( ’ Glucose ’)
# plt . ylabel ( ’ Outcome ’)
# plt . plot (g , prediction1 ," - -")
# # Crear modelo y e n t r e n a r
# models = P i p e l i n e ([
# ( ’ scaler ’, S t a n d a r d S c a l e r () ) ,
# ( ’ logit ’, L o g i s t i c R e g r e s s i o n ( solver = ’ lbfgs ’) ) ])
# models . fit ( X_train , y_train )
# # C a l c u l a r Score
# print ( ’ Train : ’, models . score ( X_train , y_train ) )
# print ( ’ Test : ’, models . score ( X_test , y_test ) )
6
Resultados
Figure 2: Precisión para la regresión logı́stica con el dataset breastCancer.csv
7
Código y lo mismo para saber si padece del corazón (para
este la salida es la columna de ”target”)
# import the l i b r e r i e s
import pandas as pd
from sklearn . linear_model import L o g i s t i c R e g r e s s i o n
import numpy as np
from sklearn . pipeline import Pipeline
# Execute load dataset

df = pd . read_csv ( " / content / drive / MyDrive / Colab Notebooks / IA / heart . csv " )
# Show general r e g i s t e r s
# df . info ()
# Replace 0 whit min / 2 is equial ya que sigue ciemdo el menor d i f e r e n t of 0

print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )
filtro = df [ ’ sex ’] ! = 0
cp = df [ ’ sex ’]
cpf = cp [ filtro ]
print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )
print ( cpf )
min = np . amin ( cpf )
print ( " Min : " , min / 2 )
df [ ’ sex ’] . replace ( to_replace =0 , value = min /2 , inplace = True )
print ( df )
print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )

filtro = df [ ’ cp ’] ! = 0
cp = df [ ’ cp ’]
cpf = cp [ filtro ]
print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )
print ( cpf )
print ( " Min : " , min / 2 )
df [ ’ cp ’] . replace ( to_replace =0 , value = min /2 , inplace = True )
print ( df )
print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )

filtro = df [ ’ trestbps ’] ! = 0
cp = df [ ’ trestbps ’]
cpf = cp [ filtro ]
print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )
print ( cpf )
print ( " Min : " , min / 2 )
df [ ’ trestbps ’] . replace ( to_replace =0 , value = min /2 , inplace = True )
print ( df )
print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )

filtro = df [ ’ fbs ’] ! = 0
cp = df [ ’ fbs ’]
cpf = cp [ filtro ]
print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )
print ( cpf )
print ( " Min : " , min / 2 )
df [ ’ fbs ’] . replace ( to_replace =0 , value = min /2 , inplace = True )
print ( df )
print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )

filtro = df [ ’ restecg ’] ! = 0
cp = df [ ’ restecg ’]
cpf = cp [ filtro ]
print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )
print ( cpf )
8
print ( " Min : " , min / 2 )
df [ ’ restecg ’] . replace ( to_replace =0 , value = min /2 , inplace = True )
print ( df )
print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )

filtro = df [ ’ exang ’] ! = 0
cp = df [ ’ exang ’]
cpf = cp [ filtro ]
print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )
print ( cpf )
print ( " Min : " , min / 2 )
df [ ’ exang ’] . replace ( to_replace =0 , value = min /2 , inplace = True )
print ( df )
print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )

filtro = df [ ’ oldpeak ’] ! = 0
cp = df [ ’ oldpeak ’]
cpf = cp [ filtro ]
print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )
print ( cpf )
print ( " Min : " , min / 2 )
df [ ’ oldpeak ’] . replace ( to_replace =0 , value = min /2 , inplace = True )
print ( df )
print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )

filtro = df [ ’ slope ’] ! = 0
cp = df [ ’ slope ’]
cpf = cp [ filtro ]
print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )
print ( cpf )
print ( " Min : " , min / 2 )
df [ ’ slope ’] . replace ( to_replace =0 , value = min /2 , inplace = True )
print ( df )
print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )

filtro = df [ ’ ca ’] ! = 0
cp = df [ ’ ca ’]
cpf = cp [ filtro ]
print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )
print ( cpf )
print ( " Min : " , min / 2 )
df [ ’ ca ’] . replace ( to_replace =0 , value = min /2 , inplace = True )
print ( df )
print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )

filtro = df [ ’ thal ’] ! = 0
cp = df [ ’ thal ’]
cpf = cp [ filtro ]
print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )
print ( cpf )
print ( " Min : " , min / 2 )
df [ ’ thal ’] . replace ( to_replace =0 , value = min /2 , inplace = True )
print ( df )
print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )

filtro = df [ ’ target ’] ! = 0
cp = df [ ’ target ’]
cpf = cp [ filtro ]
print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )
print ( cpf )
print ( " Min : " , min / 2 )
df [ ’ target ’] . replace ( to_replace =0 , value =2 , inplace = True )
print ( df )
9
df . info ()
# Select the Y value
y = df . iloc [ : , 13 ]
# S e l e c t i o n of the X values
X = df . iloc [ : ,0 : 12 ]
# R e s e r v a t i o n the value tran and test

X_train , X_test , y_train , y_test = t r a i n _ t e s t _ s p l i t (X ,y , test_size = 0 . 2 )
print ( " Size data of input : " , X_train . shape )
print ( " Size data of input : " , X_test . shape )
print ( " Size data of output : " , y_train . shape )
print ( " Size data of output : " , y_test . shape )
# df . head ()
# R e g r e s s i o n model
lr = L o g i s t i c R e g r e s s i o n ()
model = lr . fit ( X_train , y_train )
# The p r e d i c t i o n whit probes dates

y_pred = model . predict ( X_test )
# Scuere model whit accuary

accuracy = accurac y_score ( y_test , y_pred )
# Scuere whit model
print ( " \ nt \ t \ t * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )

print ( " \ t \ t \ t *\ t \ t " ,f ’ Model scuere : { accuracy : 0 . 1 % } ’ ," \ t \ t \ t \ t * " )
print ( ’\ t \ t \ t *\ t \ t Train : ’ , model . score ( X_train , y_train ) ," \ t \ t \ t * " )
print ( ’\ t \ t \ t *\ t \ t Test : ’ , model . score ( X_test , y_test ) ," \ t \ t \ t * " )
print ( " \ t \ t \ t * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )
# draw the image

# plt . figure ( ’ p r o b a b i l i d a d ’)
# g = X_test
# p r e d i c t i o n = model . p r e d i c t _ p r o b a ( g )
# p r e d i c t i o n 1 = model . predict ( g )
# plt . plot ( X_train , y_train , ’. b ’)
# plt . plot ( X_test , y_test , ’. r ’)
# plt . xlabel ( ’ Glucose ’)
# plt . ylabel ( ’ Outcome ’)
# plt . plot (g , prediction1 ," - -")
# # Crear modelo y e n t r e n a r
# models = P i p e l i n e ([
# ( ’ scaler ’, S t a n d a r d S c a l e r () ) ,
# ( ’ logit ’, L o g i s t i c R e g r e s s i o n ( solver = ’ lbfgs ’) ) ])
# models . fit ( X_train , y_train )
# # C a l c u l a r Score
# print ( ’ Train : ’, models . score ( X_train , y_train ) )
# print ( ’ Test : ’, models . score ( X_test , y_test ) )
10
Resultados
Figure 3: Precisión para la regresión logı́stica con el dataset heart.csv
11
Conclusión
Cuando tratamos de aplicar algoritmos de inteligencia artificial hay que cuidar aspectos como la
información que en realidad es útil, un claro ejemplo de la información que no es útil es los ids que
en la mayorı́a de los casos son números consecutivos para indicar el registro, también debemos
considerar que los datos sean de tipo numérico por lo que será de utilidad hacer uso de librerı́as
que nos permitan reemplazar ciertos caracteres por valores numéricos, asimismo es necesario
verificar que no existan valores nulos en algún campo ya que nos podrı́a ocasionar un error, por
último a la hora de graficar un problema donde tenemos varias variables de entrada es un poco
complicado, a esto se le llama multidimensional, en algunos casos como los unidimensionales o
bidimensionales si que es sencillo graficar ya que contamos con modelos 2D y 3D que podemos
observar y analizar sin ningún problema, sin embargo cuando las variables son demasiadas las
cosas se complican.
12

Regresión Logistica

Cargado por

Copyright:

Formatos disponibles

Regresión Logistica

Cargado por

Información del documento

Título original

Derechos de autor

Formatos disponibles

Compartir este documento

Compartir o incrustar documentos

Opciones para compartir

¿Le pareció útil este documento?

¿Este contenido es inapropiado?

Copyright:

Formatos disponibles

Regresión Logistica

Cargado por

Copyright:

Formatos disponibles

UNIVERSIDAD DE GUADALAJARA

CENTRO UNIVERSITARIO DE CIENCIAS EXACTAS E

Nombre del alumno: Izmael Guzman Murguia

# Column Non - Null Count Dtype

# c o n v e r t i r c a t e g o r i c o s a numeros porque pues no le puedo meter ’ male ’ o ’ female ’

df [ ’ Seller_Type ’] . replace ( to_replace = ’ Dealer ’ , value =1 , inplace = True )

df [ ’ Transmission ’] . replace ( to_replace = ’ Manual ’ , value =1 , inplace = True )

df [ ’ Owner ’] . replace ( to_replace = ’0 ’ , value =1 , inplace = True )

df = df . drop ( columns = [ ’ Car_Name ’ , ’ Selling_Price ’] )

X_train , X_test , y_train , y_test = t r a i n _ t e s t _ s p l i t (X , y , test_size = 0 . 33 ,

prediction = lin_reg . predict ( X_test_poly )

Figure 1: Precisión con regresión polinómica para el dataset card ata.csv.

# Execute load dataset

# Test the no numeric column

# Renace no numeric to numeric Number

# Test the no numeric column

# Select de output ( Y ) value

# S e p a r a t e the value train and value test ( 80 ) ( 20 )

print ( " Size data of input on train : " , X_train . shape )

# P r e d i c t i o n whit probes dates

# Scuere model whit accuary

# Scuere whit model

Figure 2: Precisión para la regresión logı́stica con el dataset breastCancer.csv

# Execute load dataset

# Replace 0 whit min / 2 is equial ya que sigue ciemdo el menor d i f e r e n t of 0

print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )

print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )

print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )

print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )

print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )

print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )

print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )

print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )

print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )

print ( " * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )

# R e s e r v a t i o n the value tran and test

# The p r e d i c t i o n whit probes dates

# Scuere model whit accuary

# Scuere whit model

print ( " \ nt \ t \ t * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * " )

# draw the image

Figure 3: Precisión para la regresión logı́stica con el dataset heart.csv

También podría gustarte