0% found this document useful (0 votes)

5 views

Sanket ML Assign1

Uploaded by

21102092.karanpatel

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

5 views

Sanket ML Assign1

Uploaded by

21102092.karanpatel

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 9

NAME : Sanket Sarode

DIV : B BATCH : B2

ROLL NO : 130

ID : 21102045

import pandas as pd

import matplotlib.pyplot as plt

import seaborn as sns

# Load dataset
data = pd.read_csv('target.csv')

# Descriptive statistics
print(data.describe())

# Check for null values

print(data.isnull().sum())

# Drop rows with null values (if any) data

= data.dropna()

# Convert categorical columns to numerical data['salary'] =

data['salary'].map({'low': 0, 'medium': 1, 'high':
2})
data['Department'] = data['Department'].astype('category').cat.codes

satisfaction_level last_evaluation number_project \ count

14999.000000 14999.000000 14999.000000 mean 0.612834 0.716102
3.803054 std 0.248631 0.171169 1.232592 min
0.090000 0.360000 2.000000 25%
0.440000 0.560000 3.000000
50% 0.640000 0.720000 4.000000 75% 0.820000 0.870000 5.000000
max
1.000000 1.000000 7.000000
average_montly_hours time_spend_company Work_accident left
\
count 14999.000000 14999.000000 14999.000000 14999.000000
mean 201.050337 3.498233 0.144610 0.238083
std 49.943099 1.460136 0.351719
0.425924
min 96.000000 2.000000 0.000000 0.000000
25% 156.000000 3.000000 0.000000 0.000000
50% 200.000000 3.000000 0.000000 0.000000
75% 245.000000 4.000000 0.000000 0.000000
max 310.000000 10.000000 1.000000 1.000000

promotion_last_5years
count 14999.000000
mean 0.021268 std
0.144281 min
0.000000 25%
0.000000
50% 0.000000
75% 0.000000
max 1.000000
satisfaction_level 0
last_evaluation 0
number_project 0
average_montly_hours 0
time_spend_company 0
Work_accident 0
left 0
promotion_last_5years 0
Department 0
salary 0
dtype: int64

# Boxplot to detect outliers

sns.boxplot(data=data[['satisfaction_level',
'last_evaluation', 'number_project', 'average_montly_hours',
'time_spend_company']]) plt.show()

# Optional: Remove outliers based on some criteria (e.g., z-score, IQR

method)
# Correlation matrix
corr_matrix = data.corr()
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')
plt.title("Correlation Matrix") plt.show()
# Bar chart: Salary vs Retention
sns.barplot(x='salary', y='left', data=data)
plt.title("Impact of Salary on Employee
Retention") plt.xlabel("Salary Level")
plt.ylabel("Employee Left (1 = Left, 0 =
Stayed)") plt.show()
# Bar chart: Department vs Retention
sns.barplot(x='Department', y='left', data=data)
plt.title("Correlation Between Department and Employee
Retention") plt.xlabel("Department")
plt.ylabel("Employee Left (1 = Left, 0 = Stayed)")
plt.show()
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from
sklearn.metrics import accuracy_score,
precision_score, recall_score, confusion_matrix,
roc_curve, roc_auc_score import matplotlib.pyplot as
plt import seaborn as sns

# Load and prepare your dataset (assuming it's preprocessed) #

data = pd.read_csv('employee_data.csv') # If not already loaded

# Define features and target variable

X = data.drop('left', axis=1) # Features (excluding the target
column 'left')
y = data['left'] # Target (employee retention)

# Split data into training and testing sets (70% train, 30%
test) X_train, X_test, y_train, y_test = train_test_split(X,
y, test_size=0.3, random_state=42)

# Initialize and train the logistic regression model

log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train)
# Predict on the test set y_pred
= log_reg.predict(X_test)
y_pred_proba = log_reg.predict_proba(X_test)[:, 1]

# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)

# Plotting confusion matrix

plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d',
cmap='Blues') plt.title("Confusion Matrix")
plt.ylabel('Actual')
plt.xlabel('Predicted') plt.show()

# Calculate ROC curve and AUC score

fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
roc_auc = roc_auc_score(y_test, y_pred_proba)

# Plotting ROC Curve

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='orange', label=f'ROC Curve (AUC
= {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='darkblue',
linestyle='--') plt.title('Receiver Operating
Characteristic (ROC) Curve') plt.xlabel('False Positive
Rate') plt.ylabel('True
Positive Rate') plt.legend() plt.show()

# Calculate accuracy, precision, and

recall accuracy = accuracy_score(y_test,
y_pred) precision =
precision_score(y_test, y_pred) recall =
recall_score(y_test, y_pred)

# Sensitivity (same as recall) and Specificity

tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
sensitivity = tp / (tp + fn) # Sensitivity = TP / (TP +
FN) specificity = tn / (tn + fp) # Specificity = TN / (TN
+ FP)

# Print evaluation metrics print(f"Accuracy:

{accuracy:.2f}") print(f"Precision:
{precision:.2f}") print(f"Recall
(Sensitivity): {recall:.2f}")
print(f"Specificity: {specificity:.2f}")

Accuracy: 0.78
Precision: 0.59
Recall (Sensitivity): 0.33
Specificity: 0.93

Supervised Learning
100% (1)
Supervised Learning
15 pages
Regression Analysis - Cheatsheet
No ratings yet
Regression Analysis - Cheatsheet
9 pages
Guideline OPC UA
No ratings yet
Guideline OPC UA
32 pages
Srushti ML Assign1
No ratings yet
Srushti ML Assign1
9 pages
Komal ML Assg1
No ratings yet
Komal ML Assg1
9 pages
Data Analysis in Python-3
No ratings yet
Data Analysis in Python-3
4 pages
Logistic Binary Classification
No ratings yet
Logistic Binary Classification
3 pages
python 1
No ratings yet
python 1
3 pages
Simple Linear Regression
No ratings yet
Simple Linear Regression
30 pages
ml_6_7_8 (1)
No ratings yet
ml_6_7_8 (1)
10 pages
Appendix B: Source Code
No ratings yet
Appendix B: Source Code
5 pages
Code Book
No ratings yet
Code Book
20 pages
DADM Unit 5 Programs
No ratings yet
DADM Unit 5 Programs
63 pages
Iot Da3
No ratings yet
Iot Da3
12 pages
Task1
No ratings yet
Task1
5 pages
Logistic Regression
100% (1)
Logistic Regression
10 pages
2022UCD2164-1-2
No ratings yet
2022UCD2164-1-2
35 pages
Project paarth (1) (1)
No ratings yet
Project paarth (1) (1)
21 pages
Ml Projects
No ratings yet
Ml Projects
22 pages
C: Users Dell Downloads Salary - Data - CSV
No ratings yet
C: Users Dell Downloads Salary - Data - CSV
2 pages
EXP 2 ML
No ratings yet
EXP 2 ML
4 pages
Machine Learning Hands-On
100% (1)
Machine Learning Hands-On
18 pages
EXP-4 DMusingPYTHON
No ratings yet
EXP-4 DMusingPYTHON
7 pages
Linear Regression2
No ratings yet
Linear Regression2
9 pages
DA_Programs
No ratings yet
DA_Programs
44 pages
Data Preprocessing & Visualization1
No ratings yet
Data Preprocessing & Visualization1
2 pages
Regression Dataset Example
No ratings yet
Regression Dataset Example
14 pages
ML Capacity Career Choice Prediction Annotation
No ratings yet
ML Capacity Career Choice Prediction Annotation
20 pages
Btech1007022_lab5.1
No ratings yet
Btech1007022_lab5.1
9 pages
Regression Demo
No ratings yet
Regression Demo
8 pages
Lab 1
No ratings yet
Lab 1
3 pages
Simple Linear Regression in Machine Learning
No ratings yet
Simple Linear Regression in Machine Learning
7 pages
HR Analytic Using Logistic Regression
No ratings yet
HR Analytic Using Logistic Regression
12 pages
Home Work
No ratings yet
Home Work
12 pages
Classification Problems
100% (1)
Classification Problems
25 pages
Btech1007022_lab5
No ratings yet
Btech1007022_lab5
14 pages
Data Science Record_05
No ratings yet
Data Science Record_05
20 pages
Supervised Learning For Data Science...
No ratings yet
Supervised Learning For Data Science...
14 pages
linear regression program
No ratings yet
linear regression program
2 pages
Practical # 10
No ratings yet
Practical # 10
5 pages
2.1 ML (Implementation of Simple Linear Regression in Python)
No ratings yet
2.1 ML (Implementation of Simple Linear Regression in Python)
8 pages
Salary Prediction LinearRegression
100% (1)
Salary Prediction LinearRegression
7 pages
Credit_defaulters_prediction-using-logostic-regression
No ratings yet
Credit_defaulters_prediction-using-logostic-regression
17 pages
Assignment 03
No ratings yet
Assignment 03
6 pages
5 Logistic Regression Social Nw
No ratings yet
5 Logistic Regression Social Nw
5 pages
ML Manual Final
No ratings yet
ML Manual Final
35 pages
Python Cod1
No ratings yet
Python Cod1
3 pages
Absenteeism_module
No ratings yet
Absenteeism_module
2 pages
AI CODE
No ratings yet
AI CODE
2 pages
Aquif Ibrar 1212
No ratings yet
Aquif Ibrar 1212
9 pages
Aiml Ex 4-7
No ratings yet
Aiml Ex 4-7
8 pages
Binary logistic regression
No ratings yet
Binary logistic regression
1 page
Group Work Assignment Supervised and Unsupervised Learning
No ratings yet
Group Work Assignment Supervised and Unsupervised Learning
10 pages
23UCC554
No ratings yet
23UCC554
9 pages
Unit5 - Linear Regression
No ratings yet
Unit5 - Linear Regression
4 pages
ML Activity Kalyan
No ratings yet
ML Activity Kalyan
21 pages
Logistic Regression
No ratings yet
Logistic Regression
4 pages
FYMCA IDSLab A6 Submission
No ratings yet
FYMCA IDSLab A6 Submission
9 pages
Regression
No ratings yet
Regression
16 pages
Exp3 ML
No ratings yet
Exp3 ML
4 pages
MCS-011: Problem Solving and Programming
From Everand
MCS-011: Problem Solving and Programming
Dr. DK Sukhani
No ratings yet
Lecturer Notes Cte 214 Computer Architecture
No ratings yet
Lecturer Notes Cte 214 Computer Architecture
33 pages
Review Questions: Draw and Explain The Process of Communication System Model
No ratings yet
Review Questions: Draw and Explain The Process of Communication System Model
22 pages
Table Common Distributions
No ratings yet
Table Common Distributions
3 pages
Accelerating Digital Transformation Understanding and Setting Up A Digital Services Unit
No ratings yet
Accelerating Digital Transformation Understanding and Setting Up A Digital Services Unit
12 pages
Computer Science Engineering (Level 1) Syllabus
No ratings yet
Computer Science Engineering (Level 1) Syllabus
1 page
Fuzzy Logic in Control Systems: Fuzzy Logic Controller, Part I1
No ratings yet
Fuzzy Logic in Control Systems: Fuzzy Logic Controller, Part I1
17 pages
Reliability and Credibility
No ratings yet
Reliability and Credibility
1 page
BD FACSVia Quick Reference Guide PDF
No ratings yet
BD FACSVia Quick Reference Guide PDF
6 pages
Electrical Network, Graph Theory, Incidence Matrix, Topology
100% (1)
Electrical Network, Graph Theory, Incidence Matrix, Topology
53 pages
Hefshine-Kairee Systems Pvt. Ltd. Candidates Details Tracker 16 Dec 2021
No ratings yet
Hefshine-Kairee Systems Pvt. Ltd. Candidates Details Tracker 16 Dec 2021
9 pages
Narayana Engineering College::Nellore: Permanently Affiliated To JNTUA, Ananthapuramu
No ratings yet
Narayana Engineering College::Nellore: Permanently Affiliated To JNTUA, Ananthapuramu
3 pages
IBM AIX Enhancements: and Modernization
No ratings yet
IBM AIX Enhancements: and Modernization
188 pages
Introduction
No ratings yet
Introduction
3 pages
Deleting .DWH Files
No ratings yet
Deleting .DWH Files
5 pages
10 Plus - Multi DOF - Modal Analysis
No ratings yet
10 Plus - Multi DOF - Modal Analysis
15 pages
ABCVU - All Bengal CSC Vle Union
No ratings yet
ABCVU - All Bengal CSC Vle Union
2 pages
Course - Ictprg407 Write Script For Software Applications - 2020
No ratings yet
Course - Ictprg407 Write Script For Software Applications - 2020
5 pages
Running Head: Annotated Bibliography of Viacom Media Company 1
No ratings yet
Running Head: Annotated Bibliography of Viacom Media Company 1
5 pages
Python Recap: Tata Consultancy Services
No ratings yet
Python Recap: Tata Consultancy Services
21 pages
Checklist For Appointment
No ratings yet
Checklist For Appointment
1 page
Mock Exam 3 Answers
No ratings yet
Mock Exam 3 Answers
25 pages
Contents of A Research Report
100% (1)
Contents of A Research Report
4 pages
X10 KR22 RF Remote
No ratings yet
X10 KR22 RF Remote
1 page
The C Preprocessor: Richard M. Stallman, Zachary Weinberg
No ratings yet
The C Preprocessor: Richard M. Stallman, Zachary Weinberg
84 pages
Gsma - Iot Capacity Building
No ratings yet
Gsma - Iot Capacity Building
10 pages
Digital Signal Processing Notes
No ratings yet
Digital Signal Processing Notes
12 pages
Networking - 2019
No ratings yet
Networking - 2019
19 pages
Optimisation of Short-Term Scheduling For Cement Limestone Quarry Production
No ratings yet
Optimisation of Short-Term Scheduling For Cement Limestone Quarry Production
28 pages
PLSQL
100% (2)
PLSQL
4 pages