ML Lab Manual

Download as pdf or txt
Download as pdf or txt
You are on page 1of 6

Department of CS & IT

MJP Rohilkhand University, Bareilly

Lab Manual
Machine Learning
(CS-401P)
1. Implement a Linear Regression model to predict house prices based on a dataset.
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Sample data
data = pd.DataFrame({
'Size': [650, 800, 1200, 1500, 1700, 2000],
'Price': [70000, 85000, 130000, 160000, 185000, 210000]
})

# Features and target variable


X = data[['Size']]
y = data['Price']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model


model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)

print("Predictions:", y_pred)
print("Mean Squared Error:", mse)

Output:
Predictions: [175000.]
Mean Squared Error: 625000000.0

2. Implement PCA to reduce the dimensionality of the Iris dataset.


from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
import pandas as pd

# Load dataset
iris = load_iris()
X = iris.data

# Apply PCA to reduce dimensions to 2


pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

# Output transformed features


print("Transformed Data (PCA):", X_pca)

Output:
Transformed Data (PCA): [[-2.68412563 0.31939725]
[-2.71414169 -0.17700123]
...
[ 1.56939475 -0.55628366]]

3. Implement a K-Nearest Neighbors (KNN) classifier to classify species in the Iris dataset.
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load dataset
iris = load_iris()
X = iris.data
y = iris.target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train the KNN model


knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

# Predictions and accuracy


y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print("Predicted labels:", y_pred)


print("Accuracy:", accuracy)

output:
Predicted labels: [1 0 2 ...]
Accuracy: 0.97

4. Implement a Decision Tree classifier to classify species in the Iris dataset.


from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load dataset
iris = load_iris()
X = iris.data
y = iris.target
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train the Decision Tree model


tree = DecisionTreeClassifier()
tree.fit(X_train, y_train)

# Predictions and accuracy


y_pred = tree.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print("Predicted labels:", y_pred)


print("Accuracy:", accuracy)

output:
Predicted labels: [1 0 2 ...]
Accuracy: 0.97

5. Naive Bayes Classifier on the Iris Dataset


# Import necessary libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the Iris dataset


iris = load_iris()
X = iris.data # Features: sepal length, sepal width, petal length, petal width
y = iris.target # Target: Species (setosa, versicolor, virginica)

# Split the dataset into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize and train the Naive Bayes model


nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

# Make predictions on the test set


y_pred = nb_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

# Display the results


print("Accuracy:", accuracy)
print("\nClassification Report:\n", classification_report(y_test, y_pred,
target_names=iris.target_names))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

output:
Accuracy: 0.9777777777777777

Classification Report:
precision recall f1-score support

setosa 1.00 1.00 1.00 16


versicolor 0.94 1.00 0.97 16
virginica 1.00 0.94 0.97 13

accuracy 0.98 45
macro avg 0.98 0.98 0.98 45
weighted avg 0.98 0.98 0.98 45

Confusion Matrix:
[[16 0 0]
[ 0 16 0]
[ 0 1 12]]

6. Implement an SVM classifier on the Iris dataset.


from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load dataset
iris = load_iris()
X = iris.data
y = iris.target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train the SVM model


svm = SVC(kernel='linear')
svm.fit(X_train, y_train)

# Predictions and accuracy


y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print("Predicted labels:", y_pred)


print("Accuracy:", accuracy)
Output:
Predicted labels: [1 0 2 ...]
Accuracy: 0.97

7. Implement K-Means clustering to group similar data points in the Iris dataset.
from sklearn.datasets import load_iris
from sklearn.cluster import KMeans
import pandas as pd

# Load dataset
iris = load_iris()
X = iris.data

# Train the K-Means model


kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(X)

# Output cluster centers and labels


print("Cluster Centers:", kmeans.cluster_centers_)
print("Labels:", kmeans.labels_)

Output:
Cluster Centers: [[5.006 3.418 1.464 0.244]
[5.901 2.748 4.393 1.433]
[6.85 3.073 5.742 2.071]]
Labels: [0 0 1 2 ...]

You might also like