Ml-Exp-3 - Jupyter Notebook

Download as pdf or txt
Download as pdf or txt
You are on page 1of 6

10/20/24, 10:54 PM ml-exp-3 - Jupyter Notebook

In [1]:  # Importing necessary libraries


import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Step 1: Load the dataset
url = '/kaggle/input/bank-customer-churn-modeling/Churn_Modelling.csv'
df = pd.read_csv(url)

# Display the first few rows to check the dataset
df.head()

Out[1]: RowNumber CustomerId Surname CreditScore Geography Gender Age Tenure

0 1 15634602 Hargrave 619 France Female 42 2

1 2 15647311 Hill 608 Spain Female 41 1

2 3 15619304 Onio 502 France Female 42 8

3 4 15701354 Boni 699 France Female 39 1

4 5 15737888 Mitchell 850 Spain Female 43 2

localhost:8888/notebooks/Downloads/ml-exp-3.ipynb 1/6
10/20/24, 10:54 PM ml-exp-3 - Jupyter Notebook

In [2]:  #STEP 2 Dropping irrelevant columns (CustomerId, Surname) and defining


X = df.drop(columns=['CustomerId', 'Surname', 'Exited']) # Features
y = df['Exited'] # Target

# Handle categorical data using one-hot encoding for 'Geography' and 'G
X = pd.get_dummies(X, drop_first=True)

# Display the processed features
X.head(), y.head()

Out[2]: ( RowNumber CreditScore Age Tenure Balance NumOfProducts Ha


sCrCard \
0 1 619 42 2 0.00 1
1
1 2 608 41 1 83807.86 1
0
2 3 502 42 8 159660.80 3
1
3 4 699 39 1 0.00 2
0
4 5 850 43 2 125510.82 1
1

IsActiveMember EstimatedSalary Geography_Germany Geography_Spa


in \
0 1 101348.88 False Fal
se
1 1 112542.58 False Tr
ue
2 0 113931.57 False Fal
se
3 0 93826.63 False Fal
se
4 1 79084.10 False Tr
ue

Gender_Male
0 False
1 False
2 False
3 False
4 False ,
0 1
1 0
2 1
3 0
4 0
Name: Exited, dtype: int64)

localhost:8888/notebooks/Downloads/ml-exp-3.ipynb 2/6
10/20/24, 10:54 PM ml-exp-3 - Jupyter Notebook

In [3]:  # Step 3: Normalize the feature data


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Show the normalized data
pd.DataFrame(X_scaled, columns=X.columns).head()

Out[3]: RowNumber CreditScore Age Tenure Balance NumOfProducts HasCrCard

0 -1.731878 -0.326221 0.293517 -1.041760 -1.225848 -0.911583 0.646092

1 -1.731531 -0.440036 0.198164 -1.387538 0.117350 -0.911583 -1.547768

2 -1.731185 -1.536794 0.293517 1.032908 1.333053 2.527057 0.646092

3 -1.730838 0.501521 0.007457 -1.387538 -1.225848 0.807737 -1.547768

4 -1.730492 2.063884 0.388871 -1.041760 0.785728 -0.911583 0.646092

In [4]:  # Step 4: Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_s

# Display the shapes of training and testing data to verify the split
print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")

X_train shape: (8000, 12)


X_test shape: (2000, 12)
y_train shape: (8000,)
y_test shape: (2000,)

localhost:8888/notebooks/Downloads/ml-exp-3.ipynb 3/6
10/20/24, 10:54 PM ml-exp-3 - Jupyter Notebook

In [5]:  # Step 5: Build the neural network model


model = Sequential()

# Adding the input layer and first hidden layer
model.add(Dense(units=10, activation='relu', input_dim=X_train.shape[1]

# Adding the second hidden layer
model.add(Dense(units=10, activation='relu'))

# Adding the output layer
model.add(Dense(units=1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['a

# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validat

# Print training results
history_df = pd.DataFrame(history.history)
history_df[['accuracy', 'val_accuracy']].plot()

Epoch 1/20

/opt/conda/lib/python3.10/site-packages/keras/src/layers/core/dense.p
y:87: UserWarning: Do not pass an `input_shape`/`input_dim` argument
to a layer. When using Sequential models, prefer using an `Input(shap
e)` object as the first layer in the model instead.
super().__init__(activity_regularizer=activity_regularizer, **kwarg
s)

localhost:8888/notebooks/Downloads/ml-exp-3.ipynb 4/6
10/20/24, 10:54 PM ml-exp-3 - Jupyter Notebook

250/250 ━━━━━━━━━━━━━━━━━━━━ 2s 2ms/step - accuracy: 0.6186 - loss:


0.6393 - val_accuracy: 0.8035 - val_loss: 0.4613
Epoch 2/20
250/250 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.7970 - loss:
0.4668 - val_accuracy: 0.8130 - val_loss: 0.4255
Epoch 3/20
250/250 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.8140 - loss:
0.4395 - val_accuracy: 0.8255 - val_loss: 0.4126
Epoch 4/20
250/250 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.8303 - loss:
0.4179 - val_accuracy: 0.8270 - val_loss: 0.4073
Epoch 5/20
250/250 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.8318 - loss:
0.4134 - val_accuracy: 0.8300 - val_loss: 0.4028
Epoch 6/20
250/250 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.8303 - loss:
0.4114 - val_accuracy: 0.8375 - val_loss: 0.3979
Epoch 7/20
250/250 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.8284 - loss:
0.4083 - val_accuracy: 0.8385 - val_loss: 0.3937
Epoch 8/20
250/250 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.8262 - loss:
0.4152 - val_accuracy: 0.8415 - val_loss: 0.3881
Epoch 9/20
250/250 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.8406 - loss:
0.3869 - val_accuracy: 0.8470 - val_loss: 0.3773
Epoch 10/20
250/250 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.8502 - loss:
0.3699 - val_accuracy: 0.8530 - val_loss: 0.3679
Epoch 11/20
250/250 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.8451 - loss:
0.3718 - val_accuracy: 0.8560 - val_loss: 0.3609
Epoch 12/20
250/250 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.8406 - loss:
0.3821 - val_accuracy: 0.8565 - val_loss: 0.3532
Epoch 13/20
250/250 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.8538 - loss:
0.3614 - val_accuracy: 0.8570 - val_loss: 0.3502
Epoch 14/20
250/250 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.8555 - loss:
0.3472 - val_accuracy: 0.8580 - val_loss: 0.3486
Epoch 15/20
250/250 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.8572 - loss:
0.3521 - val_accuracy: 0.8565 - val_loss: 0.3484
Epoch 16/20
250/250 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.8670 - loss:
0.3293 - val_accuracy: 0.8595 - val_loss: 0.3462
Epoch 17/20
250/250 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.8618 - loss:
0.3443 - val_accuracy: 0.8580 - val_loss: 0.3448
Epoch 18/20
250/250 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.8653 - loss:
0.3386 - val_accuracy: 0.8575 - val_loss: 0.3446
Epoch 19/20
250/250 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.8521 - loss:
0.3457 - val_accuracy: 0.8585 - val_loss: 0.3457
Epoch 20/20
250/250 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.8623 - loss:
0.3456 - val_accuracy: 0.8570 - val_loss: 0.3438

Out[5]: <Axes: >


localhost:8888/notebooks/Downloads/ml-exp-3.ipynb 5/6
10/20/24, 10:54 PM ml-exp-3 - Jupyter Notebook

In [6]:  # Step 6: Evaluate the model


# Predicting the test set results
y_pred = (model.predict(X_test) > 0.5).astype("int32")

# Print accuracy score
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Print confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

63/63 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step


Accuracy: 85.70%
Confusion Matrix:
[[1533 74]
[ 212 181]]

localhost:8888/notebooks/Downloads/ml-exp-3.ipynb 6/6

You might also like