#Group: B (ML) : Numpy NP Pandas PD
#Group: B (ML) : Numpy NP Pandas PD
[2]:
import numpy as np
import pandas as pd
[3]: df = pd.read_csv("Churn_Modelling.csv")
df
1
EstimatedSalary Exited
0 101348.88 1
1 112542.58 0
2 113931.57 1
3 93826.63 0
4 79084.10 0
... ... ...
9995 96270.64 0
9996 101699.77 0
9997 42085.58 1
9998 92888.52 1
9999 38190.78 0
[4]:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
# Column Non-Null Count Dtype
[5]:
2
Tenure Balance NumOfProducts HasCrCard IsActiveMember\ 0
2 0.00 1 1 1
1 1 83807.86 1 0 1
2 8 159660.80 3 1 0
3 1 0.00 2 0 0
4 2 125510.82 1 1 1
EstimatedSalary Exited
0 101348.88 1
1 112542.58 0
2 113931.57 1
3 93826.63 0
4 79084.10 0
[6]:
EstimatedSalary Exited
9995 96270.64 0
9996 101699.77 0
9997 42085.58 1
9998 92888.52 1
9999 38190.78 0
[7]:
[8]:
3
min 1.00000 1.556570e+07 350.000000 18.000000 0.000000
25% 2500.75000 1.562853e+07 584.000000 32.000000 3.000000
50% 5000.50000 1.569074e+07 652.000000 37.000000 5.000000
75% 7500.25000 1.575323e+07 718.000000 44.000000 7.000000
max 10000.00000 1.581569e+07 850.000000 92.000000 10.000000
EstimatedSalary Exited
count 10000.000000 10000.000000
mean 100090.239881 0.203700
std 57510.492818 0.402769
min 11.580000 0.000000
25% 51002.110000 0.000000
50% 100193.915000 0.000000
75% 149388.247500 0.000000
max 199992.480000 1.000000
[9]:
[10]:
4
[10] : RowNumber 0
CustomerId 0
Surname 0
CreditScore 0
Geography 0
Gender 0
Age 0
Tenure 0
Balance 0
NumOfProducts 0
HasCrCard 0
IsActiveMember 0
EstimatedSalary 0
Exited 0
dtype: int64
[11]: df.columns
[12]:
EstimatedSalary Exited
0 101348.88 1
1 112542.58 0
2 113931.57 1
3 93826.63 0
4 79084.10 0
5
[13]: #Distinguish the feature and target set, and divide the data into training and␣
‹→test sets
# Input Data
x =␣
‹→df[['CreditScore','Age','Tenure','Balance','NumOfProducts','HasCrCard','IsActiveMember','Esti
# Output Data
y = df['Exited']
[14]:
IsActiveMember EstimatedSalary
0 1 101348.88
1 1 112542.58
2 0 113931.57
3 0 93826.63
4 1 79084.10
... ... ...
9995 0 96270.64
9996 1 101699.77
9997 1 42085.58
9998 0 92888.52
9999 0 38190.78
[15]: =
6
[16]:
[16]: Exited
0 7963
1 2037
Name: count, dtype: int64
[29]:
ros
[30]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_scaled = scaler.fit_transform(x)
x_scaled
7
-1.03067011, 0.2406869 ],
...,
[ 0.60498839, -0.27860412, 0.68712986, ..., -1.54776799,
0.97024255, -1.00864308],
[ 1.25683526, 0.29351742, -0.69598177, ..., 0.64609167,
-1.03067011, -0.12523071],
[ 1.46377078, -1.04143285, -0.35020386, ..., 0.64609167,
-1.03067011, -1.07636976]])
[31]:
from sklearn.model_selection import train_test_split
x.shape
[31]: (10000, 8)
[32]:
[32]: (2500, 8)
[33]:
[33]: (7500, 8)
[35]:
from sklearn.metrics import ConfusionMatrixDisplay, classification_report
from sklearn.metrics import accuracy_score
[37]:
[37] : Exited
0 1991
1 509
Name: count, dtype: int64
8
[39]:
[39]: 0.7968
[40]:
[ ]:
[ ]: