0% found this document useful (0 votes)
6 views21 pages

AI Lab9 22it3044

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
6 views21 pages

AI Lab9 22it3044

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 21

Name:Shivansh

Name:- Saurabh Pandey


Singh
Roll No: 22IT3044
Roll No. 22it3047

Q.1
# Importing necessary libraries
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt

# Sample dataset: 10 samples


data = {
'Dose': [47, 50, 10, 32, 41, 15, 29, 33, 47, 55],
'Age': [25, 33, 37, 40, 29, 34, 46, 53, 64, 65],
'Sex': [0, 1, 0, 1, 1, 0, 1, 0, 1, 0], # 0: Female, 1: Male
'Effectiveness': [75, 85, 68, 82, 72, 62, 78, 89, 85, 97]
}

# Creating a DataFrame
df = pd.DataFrame(data)

# Defining features (Dose, Age, Sex) and target (Effectiveness)


X = df[['Dose', 'Age', 'Sex']]
y = df['Effectiveness']

# Splitting the data into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

# Initializing the Decision Tree Regressor


regressor = DecisionTreeRegressor(random_state=42)

# Training the model


regressor.fit(X_train, y_train)

# Predicting the effectiveness on test data


y_pred = regressor.predict(X_test)
# Calculating the Mean Squared Error for evaluation
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Printing the predictions vs actual values


print("\nPredicted Effectiveness vs Actual Effectiveness:")
for i in range(len(y_test)):
print(f"Predicted: {y_pred[i]:.2f}, Actual: {y_test.values[i]}")

# Visualizing the Decision Tree using matplotlib


plt.figure(figsize=(16,10), dpi=300) # Increase figsize and dpi to make
the plot clearer
plot_tree(
regressor,
feature_names=['Dose', 'Age', 'Sex'],
filled=True,
rounded=True,
fontsize=8 # Reduce the font size to avoid overlapping
)
plt.title("Decision Tree Regressor")
plt.show()

Mean Squared Error: 156.5

Predicted Effectiveness vs Actual Effectiveness:


Predicted: 97.00, Actual: 85
Predicted: 72.00, Actual: 85
Q.1 Splittingthe data into training and test sets (60%
train, 40% test)

# Importing necessary libraries


import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt

# Sample dataset: 10 samples


data = {
'Dose': [30, 55, 9, 29, 45, 16, 28, 37, 50, 63],
'Age': [28, 32, 39, 42, 30, 33, 47, 52, 62, 64],
'Sex': [0, 1, 0, 1, 1, 0, 1, 0, 1, 0], # 0: Female, 1: Male
'Effectiveness': [72, 83, 66, 87, 76, 60, 79, 91, 89, 94]
}
# Creating a DataFrame
df = pd.DataFrame(data)

# Defining features (Dose, Age, Sex) and target (Effectiveness)


X = df[['Dose', 'Age', 'Sex']]
y = df['Effectiveness']

# Splitting the data into training and test sets (60% train, 40% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,
random_state=42)

# Initializing the Decision Tree Regressor


regressor = DecisionTreeRegressor(random_state=42)

# Training the model


regressor.fit(X_train, y_train)

# Predicting the effectiveness on test data


y_pred = regressor.predict(X_test)

# Calculating the Mean Squared Error for evaluation


mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Printing the predictions vs actual values


print("\nPredicted Effectiveness vs Actual Effectiveness:")
for i in range(len(y_test)):
print(f"Predicted: {y_pred[i]:.2f}, Actual: {y_test.values[i]}")

# Visualizing the Decision Tree using matplotlib


plt.figure(figsize=(16,10), dpi=300) # Increase figsize and dpi to make
the plot clearer
plot_tree(
regressor,
feature_names=['Dose', 'Age', 'Sex'],
filled=True,
rounded=True,
fontsize=8 # Reduce the font size to avoid overlapping
)
plt.title("Decision Tree Regressor")
plt.show()

Mean Squared Error: 31.25

Predicted Effectiveness vs Actual Effectiveness:


Predicted: 87.00, Actual: 89
Predicted: 76.00, Actual: 83
Predicted: 66.00, Actual: 60
Predicted: 66.00, Actual: 72

Q.1
# Importing necessary libraries
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt

# Sample dataset: 10 samples


data = {
'Dose': [35, 52, 14, 34, 49, 20, 32, 39, 53, 60],
'Age': [30, 35, 40, 45, 33, 38, 50, 55, 65, 70],
'Sex': [0, 1, 1, 0, 1, 0, 1, 0, 1, 0], # 0: Female, 1: Male
'Effectiveness': [71, 82, 67, 85, 74, 61, 80, 88, 90, 95]
}

# Creating a DataFrame
df = pd.DataFrame(data)

# Defining features (Dose, Age, Sex) and target (Effectiveness)


X = df[['Dose', 'Age', 'Sex']]
y = df['Effectiveness']

# Splitting the data into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

# Initializing the Decision Tree Regressor


regressor = DecisionTreeRegressor(random_state=42)

# Training the model


regressor.fit(X_train, y_train)

# Predicting the effectiveness on test data


y_pred = regressor.predict(X_test)

# Calculating the Mean Squared Error for evaluation


mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Printing the predictions vs actual values


print("\nPredicted Effectiveness vs Actual Effectiveness:")
for i in range(len(y_test)):
print(f"Predicted: {y_pred[i]:.2f}, Actual: {y_test.values[i]}")

# Visualizing the Decision Tree using matplotlib


plt.figure(figsize=(16,10), dpi=300) # Increase figsize and dpi to make
the plot clearer
plot_tree(
regressor,
feature_names=['Dose', 'Age', 'Sex'],
filled=True,
rounded=True,
fontsize=8 # Reduce the font size to avoid overlapping
)
plt.title("Decision Tree Regressor")
plt.show()

Mean Squared Error: 44.5

Predicted Effectiveness vs Actual Effectiveness:


Predicted: 95.00, Actual: 90
Predicted: 74.00, Actual: 82

Q.2
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt

# Sample dataset: 10 samples


data = {
'Dose': [20, 50, 10, 30, 40, 15, 25, 35, 45, 55],
'Age': [25, 30, 35, 40, 28, 32, 45, 50, 60, 65],
'Sex': [0, 1, 0, 1, 1, 0, 1, 0, 1, 0], # 0: Female, 1: Male
'Effectiveness': [70, 80, 65, 85, 75, 60, 77, 90, 88, 95] # %
effectiveness to cure
}

# Creating a DataFrame
df = pd.DataFrame(data)

# Defining features (Dose, Age, Sex) and target (Effectiveness)


X = df[['Dose', 'Age', 'Sex']]
y = df['Effectiveness']

# Splitting the data into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

# Initializing the Decision Tree Regressor


regressor = DecisionTreeRegressor(random_state=42)

# Training the model


regressor.fit(X_train, y_train)

# Predicting the effectiveness on test data


y_pred = regressor.predict(X_test)

# Calculating the Mean Squared Error for evaluation


mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)
# Calculating R² score
r2 = r2_score(y_test, y_pred)
print("R² Score:", r2)

# Calculating adjusted R² score


n = X_test.shape[0] # Number of test samples
p = X_test.shape[1] # Number of predictors (features)
adjusted_r2 = 1 - (1 - r2) * (n - 1) / (n - p - 1)
print("Adjusted R² Score:", adjusted_r2)

# Printing the predictions vs actual values


print("\nPredicted Effectiveness vs Actual Effectiveness:")
for i in range(len(y_test)):
print(f"Predicted: {y_pred[i]:.2f}, Actual: {y_test.values[i]}")

# Feature importance from the Decision Tree


importance = regressor.feature_importances_
features = ['Dose', 'Age', 'Sex']

print("\nFeature Importances:")
for i, feature in enumerate(features):
print(f"{feature}: {importance[i]:.4f}")

# Visualizing the Decision Tree using matplotlib


plt.figure(figsize=(16,10), dpi=300) # Increase figsize and dpi to make
the plot clearer
plot_tree(
regressor,
feature_names=features,
filled=True,
rounded=True,
fontsize=8 # Reduce the font size to avoid overlapping
)
plt.title("Decision Tree Regressor")
plt.show()

Mean Squared Error: 17.0


R² Score: -0.0625
Adjusted R² Score: 1.53125
Predicted Effectiveness vs Actual Effectiveness:
Predicted: 85.00, Actual: 88
Predicted: 75.00, Actual: 80

Feature Importances:
Dose: 0.0427
Age: 0.8305
Sex: 0.1268

Q.2–
# Importing necessary libraries
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt

# Sample dataset: 10 samples


data = {
'Dose': [47, 50, 10, 32, 41, 15, 29, 33, 47, 55],
'Age': [25, 33, 37, 40, 29, 34, 46, 53, 64, 65],
'Sex': [0, 1, 0, 1, 1, 0, 1, 0, 1, 0], # 0: Female, 1: Male
'Effectiveness': [75, 85, 68, 82, 72, 62, 78, 89, 85, 97]
}

# Creating a DataFrame
df = pd.DataFrame(data)

# Defining features (Dose, Age, Sex) and target (Effectiveness)


X = df[['Dose', 'Age', 'Sex']]
y = df['Effectiveness']

# Splitting the data into training and test sets (60% train, 40% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,
random_state=42)

# Initializing the Decision Tree Regressor


regressor = DecisionTreeRegressor(random_state=42)

# Training the model


regressor.fit(X_train, y_train)

# Predicting the effectiveness on test data


y_pred = regressor.predict(X_test)

# Calculating the Mean Squared Error for evaluation


mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Printing the predictions vs actual values


print("\nPredicted Effectiveness vs Actual Effectiveness:")
for i in range(len(y_test)):
print(f"Predicted: {y_pred[i]:.2f}, Actual: {y_test.values[i]}")

# Visualizing the Decision Tree using matplotlib


plt.figure(figsize=(16,10), dpi=300) # Increase figsize and dpi to make
the plot clearer
plot_tree(
regressor,
feature_names=['Dose', 'Age', 'Sex'],
filled=True,
rounded=True,
fontsize=8 # Reduce the font size to avoid overlapping
)
plt.title("Decision Tree Regressor")
plt.show()

Mean Squared Error: 89.5

Predicted Effectiveness vs Actual Effectiveness:


Predicted: 97.00, Actual: 85
Predicted: 72.00, Actual: 85
Predicted: 68.00, Actual: 62
Predicted: 72.00, Actual: 75

Q,3 –
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt

# Sample dataset: 10 samples


data = {
'Dose': [20, 50, 10, 30, 40, 15, 25, 35, 45, 55],
'Age': [25, 30, 35, 40, 28, 32, 45, 50, 60, 65],
'Sex': [0, 1, 0, 1, 1, 0, 1, 0, 1, 0], # 0: Female, 1: Male
'Disease': [0, 1, 0, 1, 1, 0, 1, 1, 1, 1] # 0: No disease, 1: Disease
}

# Creating a DataFrame
df = pd.DataFrame(data)

# Defining features (Dose, Age, Sex) and target (Disease)


X = df[['Dose', 'Age', 'Sex']]
y = df['Disease']

# Splitting the data into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

# Initializing the Decision Tree Classifier


classifier = DecisionTreeClassifier(random_state=42)

# Training the model


classifier.fit(X_train, y_train)

# Predicting the disease outcome on test data


y_pred = classifier.predict(X_test)

# Calculating the accuracy of the model


accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Displaying a detailed classification report


print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Visualizing the Decision Tree using matplotlib


plt.figure(figsize=(16,10), dpi=300)
plot_tree(
classifier,
feature_names=['Dose', 'Age', 'Sex'],
class_names=['No Disease', 'Disease'],
filled=True,
rounded=True,
fontsize=8
)
plt.title("Decision Tree Classifier")

plt.show()

Accuracy: 1.0

Classification Report:
precision recall f1-score support

1 1.00 1.00 1.00 2

accuracy 1.00 2
macro avg 1.00 1.00 1.00 2
weighted avg 1.00 1.00 1.00 2
Q.4 –
# Importing necessary libraries
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt

# Modified dataset: 12 samples to better showcase multistage


classification
data = {
'Dose': [20, 50, 10, 30, 40, 15, 25, 35, 45, 55, 60, 65],
'Age': [25, 30, 35, 40, 28, 32, 45, 50, 60, 65, 48, 33],
'Sex': [0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1], # 0: Female, 1: Male
'Smoking': [0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0], # 0: Non-smoker, 1:
Smoker
'Disease': [0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0] # 0: No Disease, 1:
Disease
}

# Creating a DataFrame
df = pd.DataFrame(data)

# Defining features (Dose, Age, Sex, Smoking) and target (Disease)


X = df[['Dose', 'Age', 'Sex', 'Smoking']]
y = df['Disease']

# Splitting the data into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

# Initializing the Decision Tree Classifier with max depth to force


multistage splits
classifier = DecisionTreeClassifier(random_state=42, max_depth=4)

# Training the model


classifier.fit(X_train, y_train)

# Predicting the disease outcome on test data


y_pred = classifier.predict(X_test)

# Calculating the accuracy of the model


accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Displaying a detailed classification report


print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Printing the test data along with the predictions


print("\nSample Predictions:")
for i in range(len(y_test)):
print(f"Test Sample {i + 1} -> Features: {X_test.iloc[i].to_dict()} |
Actual: {y_test.iloc[i]} | Predicted: {y_pred[i]}")

# Visualizing the Decision Tree using matplotlib


plt.figure(figsize=(16,10), dpi=300)
plot_tree(
classifier,
feature_names=['Dose', 'Age', 'Sex', 'Smoking'],
class_names=['No Disease', 'Disease'],
filled=True,
rounded=True,
fontsize=8
)
plt.title("Multistage Decision Tree Classifier")
plt.show()

Accuracy: 0.6666666666666666

Classification Report:
precision recall f1-score support

0 1.00 0.50 0.67 2


1 0.50 1.00 0.67 1

accuracy 0.67 3
macro avg 0.75 0.75 0.67 3
weighted avg 0.83 0.67 0.67 3

Sample Predictions:
Test Sample 1 -> Features: {'Dose': 60, 'Age': 48, 'Sex': 1, 'Smoking': 1}
| Actual: 0 | Predicted: 1
Test Sample 2 -> Features: {'Dose': 55, 'Age': 65, 'Sex': 0, 'Smoking': 1}
| Actual: 1 | Predicted: 1
Test Sample 3 -> Features: {'Dose': 20, 'Age': 25, 'Sex': 0, 'Smoking': 0}
| Actual: 0 | Predicted: 0
Q.5–

# Importing necessary libraries


import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score,
classification_report
import matplotlib.pyplot as plt
import seaborn as sns

# Sample dataset: 10 samples


data = {
'Dose': [20, 50, 10, 30, 40, 15, 25, 35, 45, 55],
'Age': [25, 30, 35, 40, 28, 32, 45, 50, 60, 65],
'Sex': [0, 1, 0, 1, 1, 0, 1, 0, 1, 0], # 0: Female, 1: Male
'Effectiveness': [70, 80, 65, 85, 75, 60, 77, 90, 88, 95] # %
effectiveness to cure
}
# Creating a DataFrame
df = pd.DataFrame(data)

# Introducing a binary classification target 'Cure' (0: Less effective, 1:


More effective)
# Threshold: If Effectiveness >= 80, label it as '1' (effective), else '0'
(not effective)
df['Cure'] = np.where(df['Effectiveness'] >= 80, 1, 0)

# Defining features (Dose, Age, Sex) and target (Cure)


X = df[['Dose', 'Age', 'Sex']]
y = df['Cure']

# Splitting the data into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

# Initializing the Random Forest Classifier


classifier = RandomForestClassifier(random_state=42, n_estimators=100)

# Training the model


classifier.fit(X_train, y_train)

# Predicting the 'Cure' on test data


y_pred = classifier.predict(X_test)

# Calculating accuracy and F1 score for evaluation


accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("F1 Score:", f1)

# Detailed classification report


print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Showing the predictions for each test sample


print("\nTest Sample Classification Results:")
for i in range(len(X_test)):
print(f"Sample {i + 1}: Predicted Cure = {y_pred[i]}, Actual Cure =
{y_test.values[i]}")

# Visualizing feature importance


importances = classifier.feature_importances_
features = X.columns
indices = np.argsort(importances)[::-1]

plt.figure(figsize=(8, 6))
sns.barplot(x=importances[indices], y=features[indices],
palette="viridis")
plt.title("Feature Importances in Random Forest")
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.show()

Accuracy: 0.5
F1 Score: 0.6666666666666666

Classification Report:
precision recall f1-score support

0 0.00 0.00 0.00 0


1 1.00 0.50 0.67 2

accuracy 0.50 2
macro avg 0.50 0.25 0.33 2
weighted avg 1.00 0.50 0.67 2

Test Sample Classification Results:


Sample 1: Predicted Cure = 1, Actual Cure = 1
Sample 2: Predicted Cure = 0, Actual Cure = 1

You might also like