0% found this document useful (0 votes)
21 views

Experiment 11 Code

Experiment 11 code

Uploaded by

maasalamix
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
21 views

Experiment 11 Code

Experiment 11 code

Uploaded by

maasalamix
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 4

Code:

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.spatial.distance import cdist

# Load Iris dataset


iris = datasets.load_iris()
X = iris.data
y = iris.target

# Standardize the dataset


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset into 70% train and 30% test


X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3,
random_state=42, stratify=y)

# Function to compute accuracy of k-means clustering


def kmeans_clustering(k, X_train, X_test, y_train, y_test):
# Train the KMeans model with explicit n_init to suppress the warning
kmeans = KMeans(n_clusters=k, n_init=10, random_state=42)
kmeans.fit(X_train)

# Predict the labels for train and test data


y_train_pred = kmeans.predict(X_train)
y_test_pred = kmeans.predict(X_test)

# Adjust predicted clusters to match the true labels


def adjust_labels(y_pred, y_true):
label_map = {}
for i in np.unique(y_pred):
mask = (y_pred == i)
label_map[i] = np.bincount(y_true[mask]).argmax()
return np.array([label_map[label] for label in y_pred])

y_train_pred_adj = adjust_labels(y_train_pred, y_train)


y_test_pred_adj = adjust_labels(y_test_pred, y_test)

# Calculate accuracy
train_accuracy = accuracy_score(y_train, y_train_pred_adj)
test_accuracy = accuracy_score(y_test, y_test_pred_adj)

return train_accuracy, test_accuracy

# Test the effect of different k values (number of clusters)


k_values = [2, 3, 4, 5]
train_accuracies = []
test_accuracies = []

for k in k_values:
train_acc, test_acc = kmeans_clustering(k, X_train, X_test, y_train, y_test)
train_accuracies.append(train_acc)
test_accuracies.append(test_acc)
print(f"K={k}: Train Accuracy={train_acc:.4f}, Test Accuracy={test_acc:.4f}")

# Plot the results


plt.figure(figsize=(8, 5))
plt.plot(k_values, train_accuracies, label='Train Accuracy', marker='o')
plt.plot(k_values, test_accuracies, label='Test Accuracy', marker='o')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Accuracy')
plt.title('K-means Clustering Accuracy with Different k')
plt.legend()
plt.grid(True)
plt.show()

# Try other distance metrics: e.g., 'cityblock' (Manhattan), 'cosine'


def kmeans_with_different_distance(X_train, X_test, y_train, y_test,
metric='euclidean'):
kmeans = KMeans(n_clusters=3, n_init=10, random_state=42)
kmeans.fit(X_train)

y_test_pred = kmeans.predict(X_test)

# Adjust predicted clusters to match the true labels


def adjust_labels(y_pred, y_true):
label_map = {}
for i in np.unique(y_pred):
mask = (y_pred == i)
label_map[i] = np.bincount(y_true[mask]).argmax()
return np.array([label_map[label] for label in y_pred])

y_test_pred_adj = adjust_labels(y_test_pred, y_test)


# Calculate accuracy
test_accuracy = accuracy_score(y_test, y_test_pred_adj)

return test_accuracy

# Manhattan and Cosine distances


print(f"Test Accuracy (Manhattan): {kmeans_with_different_distance(X_train, X_test,
y_train, y_test, metric='cityblock'):.4f}")
print(f"Test Accuracy (Cosine): {kmeans_with_different_distance(X_train, X_test,
y_train, y_test, metric='cosine'):.4f}")

Output:
K=2: Train Accuracy=0.6667, Test Accuracy=0.6667
K=3: Train Accuracy=0.8667, Test Accuracy=0.7556
K=4: Train Accuracy=0.8667, Test Accuracy=0.7556
K=5: Train Accuracy=0.8571, Test Accuracy=0.7778
Test Accuracy (Manhattan): 0.7556
Test Accuracy (Cosine): 0.7556

You might also like