School of Engineering: Lab Manual On Machine Learning Lab

Download as pdf or txt
Download as pdf or txt
You are on page 1of 23

School of Engineering

Lab Manual
on
Machine Learning Lab

Submitted By - Submitted To -
Ravi Kumawat Ms. Varsha Himthani
18BCON695

Department of Computer Science and Engineering


2021-2022
List of Practical

Lab 1. Implement the CANDIDATE – ELIMINATION algorithm. Show how it is used to learn
from training examples.

Lab 2. Write a program to implement Linear Regression and Logistic Regression

Lab 3. Implement the ID3 algorithm for learning Boolean–valued functions for classifying the
training examples by searching through the space of a Decision Tree.

Lab 4. Design and implement Naïve Bayes Algorithm for learning and classifying TEXT
DOCUMENTS.

Lab 5. Implement K-Nearest Neighbor algorithm to classify the iris data set. Also calculate the
score.

Lab 6. Write a program to implement Support Vector Machine. Also discuss the confusion matrix
and score of models.

Lab 7. Apply EM algorithm to cluster a set of data and also apply K-Means algorithm on the
same data set to compare two algorithms.

Lab 8. Build an Artificial Neural Network by implementing Back-Propagation algorithm and test
the same using an appropriate data set.

Lab 9. Implement the Non-Parametric Locally Weighted Regression Algorithm in order to fit
data points. Select the appropriate data set for your experiment and draw a graph.

Lab 10. Build a Face detection system to recognize faces in a frame or image. You can use
OpenCV for this task.
1. Implement the CANDIDATE – ELIMINATION algorithm. Show
how it is used to learn from training examples.

import numpy as np
import pandas as pd

data = pd.DataFrame(data=pd.read_csv('CE.csv'))
print(data.head())

concepts = np.array(data.iloc[:,0:-1])
target = np.array(data.iloc[:,-1])
print(target)
print(concepts)

def learn(concepts, target):


specific_h = concepts[0].copy()
print("Initialization of specific_h and general_h")
print("specific_h: ",specific_h)
general_h = [["?" for i in range(len(specific_h))] for i in
range(len(specific_h))]
print("general_h: ",general_h)
print("concepts: ",concepts)
for i, h in enumerate(concepts):
if target[i] == "yes":
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
specific_h[x] = '?'
general_h[x][x] = '?'
if target[i] == "no":
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'
print("\nSteps of Candidate Elimination Algorithm: ",i+1)
print("Specific_h: ",i+1)
print(specific_h,"\n")
print("general_h :", i+1)
print(general_h)
indices = [i for i, val in enumerate(general_h) if val ==
['?', '?', '?', '?', '?', '?']]
print("\nIndices",indices)
for i in indices:
general_h.remove(['?', '?', '?', '?', '?', '?'])
return specific_h, general_h

s_final, g_final = learn(concepts, target)


print("\nFinal Specific_h:", s_final, sep="\n")
print("Final General_h:", g_final, sep="\n")

OUTPUT -

2. Write a program to implement Linear Regression and Logistic


Regression.

Simple Linear Regression


# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# Importing the dataset
dataset = pd.read_csv('Income_Data.csv')
features = dataset.iloc[:, :-1].values
labels = dataset.iloc[:, -1].values
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
features_train, features_test, labels_train, labels_test =
train_test_split(features, labels, test_size = 0.2)
# Fitting Simple Linear Regression to the Training set
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(features_train, labels_train)
# Predicting the Test set results
labels_pred = regressor.predict(features_test)
print regressor.predict(6.5)
# Visualizing the Training set results
plt.scatter(features_train, labels_train, color = 'red')
plt.plot(features_train, regressor.predict(features_train), color
= 'blue')
plt.title('Income vs ML-Experience (Training set)')
plt.xlabel('ML-Experience')
plt.ylabel('Income')
plt.show()
# Visualizing the Test set results
plt.scatter(features_train, labels_train, color = 'green')
plt.scatter(features_test, labels_test, color = 'red')
plt.plot(features_train, regressor.predict(features_train), color
= 'blue')
plt.title('Income vs ML-Experience (Test set)')
plt.xlabel('ML-Experience')
plt.ylabel('Income')
plt.show()
print (regressor.score(features_test, labels_test))
print (regressor.score(features_train, labels_train))

Logistic Regression
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Importing the dataset


dataset = pd.read_csv('Social_Network_Ads.csv')
features = dataset.iloc[:, [2, 3]].values
labels = dataset.iloc[:, 4].values

# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
features_train, features_test, labels_train, labels_test =
train_test_split(features, labels, test_size = 0.25, random_state
= 0)

# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
features_train = sc.fit_transform(features_train)
features_test = sc.transform(features_test)

# Fitting Logistic Regression to the Training set


from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()
classifier.fit(features_train, labels_train)

# Predicting the Test set results


labels_pred = classifier.predict(features_test)

# Making the Confusion Matrix


from sklearn.metrics import confusion_matrix
cm = confusion_matrix(labels_test, labels_pred)

# Visualising the Training set results


from matplotlib.colors import ListedColormap
features_set, labels_set = features_train, labels_train
features1, features2 = np.meshgrid(np.arange(start =
features_set[:, 0].min() - 1, stop = features_set[:, 0].max() + 1,
step = 0.01),
np.arange(start = features_set[:, 1].min() -
1, stop = features_set[:, 1].max() + 1, step = 0.01))
plt.contourf(features1, features2,
classifier.predict(np.array([features1.ravel(),
features2.ravel()]).T).reshape(features1.shape),
alpha = 0.75, cmap = ListedColormap(('red',
'green')))
plt.xlim(features1.min(), features1.max())
plt.ylim(features2.min(), features2.max())
for i, j in enumerate(np.unique(labels_set)):
plt.scatter(features_set[labels_set == j, 0],
features_set[labels_set == j, 1],
c = ListedColormap(('red', 'green'))(i), label =
j)
plt.title('Logistic Regression (Training set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()

# Visualising the Test set results


from matplotlib.colors import ListedColormap
features_set, labels_set = features_test, labels_test
features1, features2 = np.meshgrid(np.arange(start =
features_set[:, 0].min() - 1, stop = features_set[:, 0].max() + 1,
step = 0.01),
np.arange(start = features_set[:, 1].min() -
1, stop = features_set[:, 1].max() + 1, step = 0.01))
plt.contourf(features1, features2,
classifier.predict(np.array([features1.ravel(),
features2.ravel()]).T).reshape(features1.shape),
alpha = 0.75, cmap = ListedColormap(('red',
'green')))
plt.xlim(features1.min(), features1.max())
plt.ylim(features2.min(), features2.max())
for i, j in enumerate(np.unique(labels_set)):
plt.scatter(features_set[labels_set == j, 0],
features_set[labels_set == j, 1],
c = ListedColormap(('red', 'green'))(i), label =
j)
plt.title('Logistic Regression (Test set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()
Output -
Linear Regression

Logistic Regression
4. Assuming a set of documents that need to be classified, use the naïve
Bayesian Classifier model to perform this task. Built-in Java classes/API
can be used to write the program. Calculate the accuracy, precision, and
recall for your data set.

import pandas as pd
msg=pd.read_csv('naivetext1.csv',names=['message','label'])
print('The dimensions of the dataset',msg.shape)
msg['labelnum']=msg.label.map({'pos':1,'neg':0})
X=msg.message, y=msg.labelnum
print(X)
print(y)

#splitting the dataset into train and test data


from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(X,y)
print(xtest.shape)
print(xtrain.shape)
print(ytest.shape)
print(ytrain.shape)
#output of count vectoriser is a sparse matrix
from sklearn.feature_extraction.text import CountVectorizer
count_vect = CountVectorizer()
xtrain_dtm = count_vect.fit_transform(xtrain)
xtest_dtm=count_vect.transform(xtest)
print(count_vect.get_feature_names())

df=pd.DataFrame(xtrain_dtm.toarray(),columns=count_vect.get_feature_names())
print(df)#tabular representation
print(xtrain_dtm) #sparse matrix representation
# Training Naive Bayes (NB) classifier on training data. from sklearn.naive_bayes import
MultinomialNB
clf = MultinomialNB().fit(xtrain_dtm,ytrain)
predicted = clf.predict(xtest_dtm)

#printing accuracy metrics from sklearn import metrics print('Accuracy metrics')


print('Accuracy of the classifer is',metrics.accuracy_score(ytest,predicted))
print('Confusion matrix')
print(metrics.confusion_matrix(ytest,predicted))
print('Recall and Precison ')
print(metrics.recall_score(ytest,predicted))
print(metrics.precision_score(ytest,predicted))

docs_new = ['I like this place', 'My boss is not my saviour']

X_new_counts = count_vect.transform(docs_new)
predictednew = clf.predict(X_new_counts)
for doc, category in zip(docs_new, predictednew):
print('%s->%s' % (doc, msg.labelnum[category]))

OUTPUT -
['about', 'am', 'amazing', 'an', 'and', 'awesome', 'beers', 'best', 'boss', 'can', 'deal',
'do', 'enemy', 'feel', 'fun', 'good', 'have', 'horrible', 'house', 'is', 'like', 'love', 'my',
'not', 'of', 'place', 'restaurant', 'sandwich', 'sick', 'stuff', 'these', 'this', 'tired', 'to',
'today', 'tomorrow', 'very', 'view', 'we', 'went', 'what', 'will', 'with', 'work'] about
am amazing an and awesome beers best boss can ... today]

tomorrow very view we went what


will with work 0 0 1 0 000 0
00
1 0 0 0 0 00 0 0 1
2 0 0 0 0 00 0 0 0
3 0 0 0 0 10 0 0 0
4 0 0 0 0 00 0 0 0
5 0 0 0 0 00 0 0 0
6 0 0 0 0 00 0 1 0
7 1 0 0 1 00 1 0 0
8 0 0 0 0 00 0 0 0

5. Write a program to implement k-Nearest Neighbour algorithm to


classify the iris data set. Print both correct and wrong predictions.
Java/Python ML library classes can be used for this problem.

import csv
import random
import math
import operator

def loadDataset(filename, split, trainingSet=[] , testSet=[]):


with open(filename, 'rb') as csvfile:
lines = csv.reader(csvfile)
dataset = list(lines)
for x in range(len(dataset)-1):
for y in range(4):
dataset[x][y] = float(dataset[x][y]) if random.random() < split:
trainingSet.append(dataset[x]) else:
testSet.append(dataset[x])

def euclideanDistance(instance1, instance2, length):


distance = 0
for x in range(length):
distance += pow((instance1[x] - instance2[x]), 2) return math.sqrt(distance)
def getNeighbors(trainingSet, testInstance, k):
distances = []
length = len(testInstance)-1
for x in range(len(trainingSet)):
dist = euclideanDistance(testInstance, trainingSet[x], length)
distances.append((trainingSet[x], dist))
distances.sort(key=operator.itemgetter(1)) neighbors = []
for x in range(k):
neighbors.append(distances[x][0]) return neighbors

def getResponse(neighbors):
classVotes = {}
for x in range(len(neighbors)): response = neighbors[x][-1] if response in classVotes:
classVotes[response] += 1
else:
classVotes[response] = 1

sortedVotes = sorted(classVotes.iteritems(),
reverse=True)
return sortedVotes[0][0]

def getAccuracy(testSet, predictions):


correct = 0 for x in range(len(testSet)):
key=operator.itemgetter(1)
if testSet[x][-1] == predictions[x]:
correct += 1
return (correct/float(len(testSet))) * 100.0
def main():
# prepare data trainingSet= [] testSet=[] split = 0.67
loadDataset('knndat.data', split, trainingSet, testSet)
print('Train set: ' + repr(len(trainingSet))) print('Test set: ' + repr(len(testSet)))
# generate predictions predictions=[] k=3
for x in range(len(testSet)):
neighbors = getNeighbors(trainingSet, testSet[x],k)
result = getResponse(neighbors) predictions.append(result)
print('> predicted=' + repr(result) + ', actual=' + repr(testSet[x][- 1])) accuracy =
getAccuracy(testSet, predictions)
print('Accuracy: ' + repr(accuracy) + '%') main()

OUTPUT -
Confusion matrix is as follows -
[
[11 0 0]
[0 9 1]
[0 1 8]
]

Accuracy metrics -
0 1.00 1.00 1.00 11
1 0.90 0.90 0.90 10
2 0.89 0.89 0,89 9

Avg/Total -
0.93 0.93 0.93 30
6. Write a program to implement Support Vector Machine. Also discuss
the confusion matrix and score of the model.

#Data Pre-processing Step


# importing libraries
import numpy as nm
import matplotlib.pyplot as mtp
import pandas as pd

#importing datasets
data_set= pd.read_csv('user_data.csv')

#Extracting Independent and dependent Variable


x= data_set.iloc[:, [2,3]].values
y= data_set.iloc[:, 4].values

# Splitting the dataset into training and test set.


from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.25, random_state=0)
#feature Scaling
from sklearn.preprocessing import StandardScaler
st_x= StandardScaler()
x_train= st_x.fit_transform(x_train)
x_test= st_x.transform(x_test)
from sklearn.svm import SVC # "Support vector classifier"
classifier = SVC(kernel='linear', random_state=0)
classifier.fit(x_train, y_train)

#Predicting the test set result


y_pred= classifier.predict(x_test)
#Creating the Confusion matrix
from sklearn.metrics import confusion_matrix
cm= confusion_matrix(y_test, y_pred)

from matplotlib.colors import ListedColormap


x_set, y_set = x_train, y_train
x1, x2 = nm.meshgrid(nm.arange(start = x_set[:, 0].min() - 1, stop = x_set[:, 0].max() + 1,
step =0.01),
nm.arange(start = x_set[:, 1].min() - 1, stop = x_set[:, 1].max() + 1, step = 0.01))
mtp.contourf(x1, x2, classifier.predict(nm.array([x1.ravel(),
x2.ravel()]).T).reshape(x1.shape),
alpha = 0.75, cmap = ListedColormap(('red', 'green')))
mtp.xlim(x1.min(), x1.max())
mtp.ylim(x2.min(), x2.max())
for i, j in enumerate(nm.unique(y_set)):
mtp.scatter(x_set[y_set == j, 0], x_set[y_set == j, 1],
c = ListedColormap(('red', 'green'))(i), label = j)
mtp.title('SVM classifier (Training set)')
mtp.xlabel('Age')
mtp.ylabel('Estimated Salary')
mtp.legend()
mtp.show()

#Visulaizing the test set result


from matplotlib.colors import ListedColormap
x_set, y_set = x_test, y_test
x1, x2 = nm.meshgrid(nm.arange(start = x_set[:, 0].min() - 1, stop = x_set[:, 0].max() + 1,
step =0.01),
nm.arange(start = x_set[:, 1].min() - 1, stop = x_set[:, 1].max() + 1, step = 0.01))
mtp.contourf(x1, x2, classifier.predict(nm.array([x1.ravel(),
x2.ravel()]).T).reshape(x1.shape),
alpha = 0.75, cmap = ListedColormap(('red','green' )))
mtp.xlim(x1.min(), x1.max())
mtp.ylim(x2.min(), x2.max())
for i, j in enumerate(nm.unique(y_set)):
mtp.scatter(x_set[y_set == j, 0], x_set[y_set == j, 1],
c = ListedColormap(('red', 'green'))(i), label = j)
mtp.title('SVM classifier (Test set)')
mtp.xlabel('Age')
mtp.ylabel('Estimated Salary')
mtp.legend()
mtp.show()

OUTPUT -

7. Apply EM algorithm to cluster a set of data stored in a .CSV file. Use


the same data set for clustering using k-Means algorithm. Compare the
results of these two algorithms and comment on the quality of clustering.
You can add Java/Python ML library classes/API in the program.

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets.samples_generator import make_blobs
X, y_true = make_blobs(n_samples=100, centers = 4,Cluster_std=0.60,random_state=0)
X = X[:, ::-1]

# flip axes for better plotting


from sklearn.mixture import GaussianMixture
gmm = GaussianMixture (n_components = 4).fit(X)
lables = gmm.predict(X)
plt.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap="viridis");
probs = gmm.predict_proba(X)
print(probs[:5].round(3))
size = 50 * probs.max(1) ** 2 # square emphasizes differences plt.scatter(X[:, 0], X[:, 1],
c=labels, cmap=‟viridis‟, s=size);

from matplotlib.patches import Ellipse


def draw_ellipse(position, covariance, ax=None, **kwargs):
"""Draw an ellipse with a given position and covariance"""
Ax = ax or plt.gca()
if covariance.shape ==(2,2):
U, s, Vt = np.linalg.svd(covariance)
angle = np.degrees(np.arctan2(U[1, 0], U[0,0]))
Width, height = 2 * np.sqrt(s)
else:
angle = 0
width, height = 2 * np.sqrt(covariance)
for nsig in range(1,4):
ax.add_patch(Ellipse(position, nsig * width, nsig *height, angle, **kwargs))

def plot_gmm(gmm, X, label=True, ax=None):


ax = ax or plt.gca()
labels = gmm.fit(X).predict(X)
if label:
ax.scatter(X[:, 0], x[:, 1], c=labels, s=40, cmap="viridis", zorder=2)
else:
ax.scatter(X[:, 0], x[:, 1], s=40, zorder=2) ax.axis(„equal‟)

w_factor = 0.2 / gmm.weights_.max()


for pos, covar, w in zip(gmm.means_, gmm.covariances_, gmm.weights_):
draw_ellipse(pos, covar, alpha=w * w_factor)
gmm = GaussianMixture(n_components=4, random_state=42)
plot_gmm(gmm, X)
gmm = GaussianMixture(n_components=4, covariance_type=‟full‟, random_state=42)
plot_gmm(gmm, X)

OUTPUT -
[[1 ,0, 0, 0]
[0 ,0, 1, 0]
[1 ,0, 0, 0]
[1 ,0, 0, 0]
[1 ,0, 0, 0]]

8. Build an Artificial Neural Network by implementing the


Backpropagation algorithm and test the same using appropriate data
sets.
import numpy as np
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float)
y = np.array(([92], [86], [89]), dtype=float)
X = X/np.amax(X,axis=0) # maximum of X array longitudinally y = y/100

#Sigmoid Function
def sigmoid (x):
return 1/(1 + np.exp(-x))

#Derivative of Sigmoid Function


def derivatives_sigmoid(x):
return x * (1 - x)

#Variable initialization
epoch=7000 #Setting training iterations
lr=0.1 #Setting learning rate
inputlayer_neurons = 2 #number of features in data set
hiddenlayer_neurons = 3 #number of hidden layers neurons
output_neurons = 1 #number of neurons at output layer #weight and bias initialization
wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons))
bh=np.random.uniform(size=(1,hiddenlayer_neurons))
wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons))
bout=np.random.uniform(size=(1,output_neurons))
#draws a random range of numbers uniformly of dim x*y for i in range(epoch):

#Forward Propogation
hinp1=np.dot(X,wh)
hinp=hinp1 + bh
hlayer_act = sigmoid(hinp)
outinp1=np.dot(hlayer_act,wout)
outinp= outinp1+ bout
output = sigmoid(outinp)

#Backpropagation
EO = y-output
outgrad = derivatives_sigmoid(output)
d_output = EO* outgrad
EH = d_output.dot(wout.T)
hiddengrad = derivatives_sigmoid(hlayer_act)#how much hidden layer wts contributed to
error

d_hiddenlayer = EH * hiddengrad
wout += hlayer_act.T.dot(d_output) *lr# dotproduct of nextlayererror and currentlayerop
# bout += np.sum(d_output, axis=0,keepdims=True) *lr wh += X.T.dot(d_hiddenlayer) *lr
#bh += np.sum(d_hiddenlayer, axis=0,keepdims=True) *lr print("Input: \n" + str(X))
print("Actual Output: \n" + str(y))
print("Predicted Output: \n" ,output)

OUTPUT -
Input:
[[ 0.66666667 1. ]
[ 0.33333333 0.55555556]
[ 1. 0.66666667]]

Actual Output:
[[ 0.92]
[ 0.86]
[ 0.89]]

Predicted Output:
[[ 0.89559591]
[ 0.88142069]
[ 0.8928407 ]]
9. Implement the non-parametric Locally Weighted Regression algorithm
in order to fit data points. Select appropriate data set for your
experiment and drawgraphs.

import pandas as pd
import numpy as np1

def kernel(point,xmat, k):


m,n = np1.shape(xmat)
weights = np1.mat(np1.eye((m)))
for j in range(m):
diff = point - X[j]
weights[j,j] = np1.exp(diff*diff.T/(-2.0*k**2))
return weights

def localWeight(point,xmat,ymat,k):
wei = kernel(point,xmat,k)
W=(X.T*(wei*X)).I*(X.T*(wei*ymat.T))
return W

def localWeightRegression(xmat,ymat,k):
m,n = np1.shape(xmat)
ypred = np1.zeros(m)
for i in range(m):
ypred[i] = xmat[i]*localWeight(xmat[i],xmat,ymat,k)
return ypred

# load data points


data = pd.read_csv('data10.csv')
bill = np1.array(data.total_bill)
tip = np1.array(data.tip)

#preparing and add 1 in bill mbill = np1.mat(bill)


mtip = np1.mat(tip)
m= np1.shape(bill)[1]
one = np1.mat(np1.ones(m))
X= np1.hstack((one.T,bill.T))
#set k here
ypred = localWeightRegression(X,mtip,2)

SortIndex = X[:,1].argsort(0)
xsort = X[SortIndex][:,0]

OUTPUT -
10. Build a Face detection system to recognize faces in a frame or image.
You can use OpenCV for this task

import cv2
from matplotlib import pyplot as plt #To plot the image

image = cv2.imread('bts.jpg') #Reading image


#converting into grayscale image
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
print(image.shape)
print(gray.shape)
print(gray) #printing gray image

#create Cascade Classifier it contains all the face feature


haar_face_cascade=cv2.CascadeClassifier('face.xml')

#Search for the co-ordinates of face


faces=haar_face_cascade.detectMultiScale(image,scaleFactor=1.06,

minNeighbors=5);
print("face found",len(faces))

#create a rectangle outline on the face


for (x, y, w, h) in faces:
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0),2)

plt.imshow(image) #show the image


plt.axis('off')
plt.show()

You might also like