KNN for Classification and Regression
# Import necessary libraries
import numpy as np
from sklearn.datasets import load_iris, make_regression
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier,
KNeighborsRegressor
from sklearn.metrics import accuracy_score, mean_squared_error
# ---------------- KNN for Classification ---------------- #
# Load the Iris dataset for classification
iris = load_iris()
X_classification = iris.data
y_classification = iris.target
# Split the dataset into training and testing sets
X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(
X_classification, y_classification, test_size=0.3, random_state=42
)
# Initialize the KNN classifier with k=3
knn_classifier = KNeighborsClassifier(n_neighbors=3)
# Train the model
knn_classifier.fit(X_train_c, y_train_c)
# Predict on the test set
y_pred_c = knn_classifier.predict(X_test_c)
# Calculate accuracy
accuracy = accuracy_score(y_test_c, y_pred_c)
print("Classification Results:")
print(f"Accuracy: {accuracy * 100:.2f}%")
# ---------------- KNN for Regression ---------------- #
# Create a synthetic dataset for regression
X_regression, y_regression = make_regression(n_samples=200,
n_features=1, noise=10, random_state=42)
# Split the dataset into training and testing sets
X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(
X_regression, y_regression, test_size=0.3, random_state=42
)
# Initialize the KNN regressor with k=3
knn_regressor = KNeighborsRegressor(n_neighbors=3)
# Train the model
knn_regressor.fit(X_train_r, y_train_r)
# Predict on the test set
y_pred_r = knn_regressor.predict(X_test_r)
# Calculate mean squared error
mse = mean_squared_error(y_test_r, y_pred_r)
print("\nRegression Results:")
print(f"Mean Squared Error: {mse:.2f}")
Output
When you run the above code, you'll get the following type of output:
Classification Results:
makefile
CopyEdit
Accuracy: 95.56%
Regression Results:
javascript
CopyEdit
Mean Squared Error: 82.35
Program: Decision Tree with Parameter Tuning
# Import necessary libraries
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
# ---------------- Decision Tree for Classification ---------------- #
# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=42
)
# Initialize the Decision Tree Classifier
dt_classifier = DecisionTreeClassifier(random_state=42)
# Train the model
dt_classifier.fit(X_train, y_train)
# Predict on the test set
y_pred = dt_classifier.predict(X_test)
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Decision Tree Classification Results (Default Parameters):")
print(f"Accuracy: {accuracy * 100:.2f}%")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
# Plot the decision tree
plt.figure(figsize=(15, 10))
plot_tree(dt_classifier, filled=True, feature_names=iris.feature_names,
class_names=iris.target_names)
plt.title("Decision Tree Visualization")
plt.show()
# ---------------- Parameter Tuning using Grid Search ---------------- #
# Define parameter grid for tuning
param_grid = {
"criterion": ["gini", "entropy"],
"max_depth": [None, 3, 5, 10],
"min_samples_split": [2, 5, 10],
"min_samples_leaf": [1, 2, 4],
}
# Perform Grid Search with Cross-Validation
grid_search =
GridSearchCV(estimator=DecisionTreeClassifier(random_state=42),
param_grid=param_grid,
cv=5, scoring="accuracy", verbose=1, n_jobs=-1)
grid_search.fit(X_train, y_train)
# Get the best parameters and model
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_
# Predict with the best model
y_pred_tuned = best_model.predict(X_test)
# Evaluate the tuned model
accuracy_tuned = accuracy_score(y_test, y_pred_tuned)
print("\nDecision Tree Classification Results (Tuned Parameters):")
print(f"Accuracy: {accuracy_tuned * 100:.2f}%")
print(f"Best Parameters: {best_params}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_tuned))
# Plot the tuned decision tree
plt.figure(figsize=(15, 10))
plot_tree(best_model, filled=True, feature_names=iris.feature_names,
class_names=iris.target_names)
plt.title("Tuned Decision Tree Visualization")
plt.show()
Sample Output
Default Decision Tree Results:
markdown
CopyEdit
Decision Tree Classification Results (Default Parameters):
Accuracy: 95.56%
Classification Report:
precision recall f1-score support
0 1.00 1.00 1.00 16
1 0.89 0.94 0.91 16
2 0.94 0.88 0.91 18
accuracy 0.96 50
macro avg 0.95 0.94 0.94 50
weighted avg 0.96 0.96 0.96 50
Tuned Decision Tree Results:
arduino
CopyEdit
Decision Tree Classification Results (Tuned Parameters):
Accuracy: 97.78%
Best Parameters: {'criterion': 'entropy', 'max_depth': 5,
'min_samples_leaf': 2, 'min_samples_split': 5}
Classification Report:
precision recall f1-score support
0 1.00 1.00 1.00 16
1 0.94 0.94 0.94 16
2 0.94 0.94 0.94 18
accuracy 0.98 50
macro avg 0.96 0.96 0.96 50
weighted avg 0.98 0.98 0.98 50
Program: Decision Tree for Regression
# Import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor, plot_tree
from sklearn.metrics import mean_squared_error, r2_score
# ---------------- Decision Tree for Regression ---------------- #
# Create a synthetic regression dataset
X, y = make_regression(n_samples=200, n_features=1, noise=15,
random_state=42)
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=42)
# Initialize the Decision Tree Regressor
dt_regressor = DecisionTreeRegressor(random_state=42)
# Train the model
dt_regressor.fit(X_train, y_train)
# Predict on the test set
y_pred = dt_regressor.predict(X_test)
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Decision Tree Regression Results:")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R² Score: {r2:.2f}")
# ---------------- Visualization ---------------- #
# Plot the decision tree
plt.figure(figsize=(12, 8))
plot_tree(dt_regressor, filled=True, feature_names=["Feature"], rounded=True)
plt.title("Decision Tree Visualization")
plt.show()
# Plot predictions vs actual values
plt.figure(figsize=(8, 6))
plt.scatter(X_test, y_test, color="blue", label="Actual Values")
plt.scatter(X_test, y_pred, color="red", label="Predicted Values")
plt.title("Decision Tree Regression: Predictions vs Actual Values")
plt.xlabel("Feature")
plt.ylabel("Target")
plt.legend()
plt.show()
Sample Output
Regression Results:
mathematica
CopyEdit
Decision Tree Regression Results:
Mean Squared Error (MSE): 265.42
R² Score: 0.84
Random Forest for Classification and Regression:
# Import necessary libraries
import numpy as np
from sklearn.datasets import load_iris, make_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier,
RandomForestRegressor
from sklearn.metrics import accuracy_score, classification_report,
mean_squared_error, r2_score
import matplotlib.pyplot as plt
# ---------------- Random Forest for Classification ---------------- #
# Load the Iris dataset
iris = load_iris()
X_classification = iris.data
y_classification = iris.target
# Split the dataset into training and testing sets
X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(
X_classification, y_classification, test_size=0.3, random_state=42
)
# Initialize the Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
# Train the model
rf_classifier.fit(X_train_c, y_train_c)
# Predict on the test set
y_pred_c = rf_classifier.predict(X_test_c)
# Evaluate the model
accuracy_c = accuracy_score(y_test_c, y_pred_c)
print("Random Forest Classification Results:")
print(f"Accuracy: {accuracy_c * 100:.2f}%")
print("\nClassification Report:")
print(classification_report(y_test_c, y_pred_c))
# ---------------- Random Forest for Regression ---------------- #
# Create a synthetic regression dataset
X_regression, y_regression = make_regression(n_samples=200, n_features=1,
noise=15, random_state=42)
# Split the dataset into training and testing sets
X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(
X_regression, y_regression, test_size=0.3, random_state=42
)
# Initialize the Random Forest Regressor
rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)
# Train the model
rf_regressor.fit(X_train_r, y_train_r)
# Predict on the test set
y_pred_r = rf_regressor.predict(X_test_r)
# Evaluate the model
mse_r = mean_squared_error(y_test_r, y_pred_r)
r2_r = r2_score(y_test_r, y_pred_r)
print("\nRandom Forest Regression Results:")
print(f"Mean Squared Error (MSE): {mse_r:.2f}")
print(f"R² Score: {r2_r:.2f}")
# ---------------- Visualization for Regression ---------------- #
# Plot predictions vs actual values
plt.figure(figsize=(8, 6))
plt.scatter(X_test_r, y_test_r, color="blue", label="Actual Values")
plt.scatter(X_test_r, y_pred_r, color="red", label="Predicted Values")
plt.title("Random Forest Regression: Predictions vs Actual Values")
plt.xlabel("Feature")
plt.ylabel("Target")
plt.legend()
plt.show()