Trabajo

Download as pdf or txt
Download as pdf or txt
You are on page 1of 5

Untitled10.ipynb - Colab https://colab.research.google.com/drive/15LGs5x72u4a8FcApdQ5Vs...

import pandas as pd
import numpy as np
from sklearn import linear_model
from sklearn import model_selection
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sb
%matplotlib inline

from google.colab import drive


drive.mount('/content/drive')
Mounted at /content/drive

import pandas as pd

# Leer el archivo CSV desde la raíz de Google Drive


file_path = '/content/drive/My Drive/usuarios_win_mac_lin.csv'
dataframe = pd.read_csv(file_path)

# Mostrar los primeros 5 registros del dataframe


print(dataframe.head())

duracion paginas acciones valor clase


0 7.0 2 4 8 2
1 21.0 2 6 6 2
2 57.0 2 4 4 2
3 101.0 3 6 12 2
4 109.0 2 6 12 2

dataframe.describe()

duracion paginas acciones valor clase

count 170.000000 170.000000 170.000000 170.000000 170.000000

mean 111.075729 2.041176 8.723529 32.676471 0.752941

std 202.453200 1.500911 9.136054 44.751993 0.841327

min 1.000000 1.000000 1.000000 1.000000 0.000000

25% 11.000000 1.000000 3.000000 8.000000 0.000000

50% 13.000000 2.000000 6.000000 20.000000 0.000000

75% 108.000000 2.000000 10.000000 36.000000 2.000000

max 898.000000 9.000000 63.000000 378.000000 2.000000

1 de 5 28/05/2024, 21:20
Untitled10.ipynb - Colab https://colab.research.google.com/drive/15LGs5x72u4a8FcApdQ5Vs...

print(dataframe.groupby('clase').size())
clase
0 86
1 40
2 44
dtype: int64

import matplotlib.pyplot as plt

dataframe.drop(['clase'], axis=1).hist()
plt.show()

sb.pairplot(dataframe.dropna(), hue='clase',size=4,vars=["duracion", "paginas","acciones"

/usr/local/lib/python3.10/dist-packages/seaborn/axisgrid.py:2100: UserWarning: The `si


warnings.warn(msg, UserWarning)
<seaborn.axisgrid.PairGrid at 0x782cf9a395a0>

2 de 5 28/05/2024, 21:20
Untitled10.ipynb - Colab https://colab.research.google.com/drive/15LGs5x72u4a8FcApdQ5Vs...

3 de 5 28/05/2024, 21:20
Untitled10.ipynb - Colab https://colab.research.google.com/drive/15LGs5x72u4a8FcApdQ5Vs...

import numpy as np

# Suponiendo que ya has importado pandas como pd y has cargado tu dataframe


# dataframe = pd.read_csv('tu_archivo.csv') # Ejemplo de carga de dataframe

X = np.array(dataframe.drop(['clase'], axis=1))
y = np.array(dataframe['clase'])
print(X.shape)
(170, 4)

predictions = model.predict(X)
print(predictions[:5])
[2 2 2 2 2]

model.score(X,y)
0.7823529411764706

validation_size = 0.20
seed = 7
X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split(X, y, test_

name = 'Logistic Regression'


model = linear_model.LogisticRegression(max_iter=1000)
kfold = model_selection.KFold(n_splits=10, shuffle=True, random_state=seed)
cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=
msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
print(msg)
Logistic Regression: 0.720330 (0.151123)

# Ajustar el modelo a los datos de entrenamiento


model.fit(X_train, Y_train)

# Hacer predicciones en el conjunto de validación


predictions = model.predict(X_validation)

# Calcular la precisión de las predicciones


print(accuracy_score(Y_validation, predictions))

4 de 5 28/05/2024, 21:20
Untitled10.ipynb - Colab https://colab.research.google.com/drive/15LGs5x72u4a8FcApdQ5Vs...

0.8529411764705882

print(confusion_matrix(Y_validation, predictions))

[[16 0 2]
[ 3 3 0]
[ 0 0 10]]

print(classification_report(Y_validation, predictions))

precision recall f1-score support

0 0.84 0.89 0.86 18


1 1.00 0.50 0.67 6
2 0.83 1.00 0.91 10

accuracy 0.85 34
macro avg 0.89 0.80 0.81 34
weighted avg 0.87 0.85 0.84 34

5 de 5 28/05/2024, 21:20

You might also like