ML Lab Prog1-5 (5) College PDF
ML Lab Prog1-5 (5) College PDF
ML Lab Prog1-5 (5) College PDF
Implement and demonstrate the FIND-S algorithm for finding the most specific hypothesis based
on a given set of training data samples. Read the training data from a .CSV file.
import csv
with open('data.csv','r') as f:
reader=csv.reader(f)
your_list=list(reader)
h=[['0','0','0','0','0','0']]
for i in your_list:
print(i)
if i[-1]=="yes":
j=0
for x in i:
if x!="yes":
if x!=h[0][j]and h[0][j]=='0':
h[0][j]=x
elif x!=h[0][j] and h[0][j]!='0':
h[0][j]='?'
else:
pass
j=j+1
print("final hypothesis is")
print(h)
Data Set:
sunny,warm,normal,strong,warm,same,yes
sunny,warm,high,strong,warm,same,yes
rain,cold,high,strong,warm,change,no
sunny,warm,hgh,strong,cool,change,yes
output:
C:\Users\admin\PycharmProjects\1rr16cs181\venv\Scripts\python.exe
C:/Users/admin/PycharmProjects/1rr16cs181/finds.py
['sunny', 'warm', 'normal', 'strong', 'warm', 'same', 'yes']
['sunny', 'warm', 'high', 'strong', 'warm', 'same', 'yes']
['rain', 'cold', 'high', 'strong', 'warm', 'change', 'no']
['sunny', 'warm', 'hgh', 'strong', 'cool', 'change', 'yes']
final hypothesis is
[['sunny', 'warm', '?', 'strong', '?', '?']]
import numpy as np
import pandas as pd
data=pd.DataFrame(data=pd.read_csv('data.csv'))
concepts=np.array(data.iloc[:,0:-1])
target=np.array(data.iloc[:,-1])
def learn(concepts,target):
specific_h=concepts[0].copy()
print("intilization of specific_h and general_h")
print(specific_h)
general_h=[["?" for i in range(len(specific_h))] for i in range(len(specific_h))]
print(general_h)
for i,h in enumerate(concepts):
if target[i]=="yes":
for x in range(len(specific_h)):
if h[x]!=specific_h[x]:
specific_h[x]='?'
general_h[x][x]='?'
if target[i]=="no":
for x in range(len(specific_h)):
if h[x]!=specific_h[x]:
general_h[x][x]=specific_h[x]
else:
general_h[x][x]='?'
print("steps of candidate elimination algorithm",i+1)
print("specific_h",i+1,"\n")
print(specific_h)
print("general_h",i+1,"\n")
print(general_h)
Dataset:
sunny,warm,normal,strong,warm,same,yes
sunny,warm,high,strong,warm,same,yes
rain,cold,high,strong,warm,change,no
sunny,warm,hgh,strong,cool,change,yes
Output:
C:\Users\admin\PycharmProjects\1rr16cs181\venv\Scripts\python.exe
C:/Users/admin/PycharmProjects/1rr16cs181/candi.py
[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'],
['?', '?', '?', '?', '?', '?']]
specific_h 3
general_h 3
[['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?',
'?', '?', '?'], ['?', '?', '?', '?', '?', '?']]
Final specific_h:
final general_h:
[['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?']]
import numpy as np
import math
from data_loader import read_data
class Node:
def __init__(self,attribute):
self.attribute=attribute
self.children=[]
self.answer=""
#def __str__(self):
#return self.attribute
def sub(data,col,delete):
dict={}
items=np.unique(data[:,col])
count=np.zeros((items.shape[0],1),dtype=np.int32)
for x in range(items.shape[0]):
for y in range(data.shape[0]):
if data[y,col]==items[x]:
count[x]+=1
for x in range(items.shape[0]):
dict[items[x]]=np.empty((int(count[x]),data.shape[1]),dtype='|S32')
pos=0
for y in range(data.shape[0]):
if data[y,col]==items[x]:
dict[items[x]][pos]=data[y]
pos+=1
if delete:
dict[items[x]]=np.delete(dict[items[x]],col,1)
return items,dict
def entropy(s):
items=np.unique(s)
if items.size==1:
return 0
counts=np.zeros((items.shape[0],1))
sums=0
for x in range(items.shape[0]):
counts[x]=sum(s==items[x])/(s.size*1.0)
for count in counts:
sums+=-1*count*math.log(count,2)
return sums
def gain(data,col):
items,dict=sub(data,col,delete=False)
total_size=data.shape[0]
entropies=np.zeros((items.shape[0],1))
intrinsic=np.zeros((items.shape[0],1))
for x in range((items.shape[0])):
ratio=dict[items[x]].shape[0]/(total_size*1.0)
entropies[x]=ratio*entropy(dict[items[x]][:,-1])
intrinsic[x]=ratio*math.log(ratio,2)
total_entropy=entropy(data[:,-1])
iv =-1*sum(intrinsic)
for x in range(entropies.shape[0]):
total_entropy-=entropies[x]
return (total_entropy/iv)
def create(data,metadata):
if(np.unique(data[:,-1])).shape[0]==1:
node=Node("")
node.answer=np.unique(data[:,-1])[0]
return node
gains=np.zeros((data.shape[1]-1,1))
for col in range(data.shape[-1]-1):
gains[col]=gain(data,col)
split=np.argmax(gains)
node=Node(metadata[split])
metadata=np.delete(metadata,split,0)
items,dict=sub(data,split,delete=True)
for x in range(items.shape[0]):
child=create(dict[items[x]],metadata)
node.children.append((items[x],child))
return node
def empty(size):
s=""
for x in range(size):
s+=""
return s
def print_tree(node,level):
if node.answer!="":
print(empty(level),node.answer)
return
print(empty(level),node.attribute)
Data set:
outlook,temprature,humidity,wind,palytennis
sunny,hot,high,weak,no
sunny,hot,high,strong,no
overcast,hot,high,weak,yes
rain,mild,high,weak,yes
rain,cool,normal,weak,yes
rain,cool,normal,strong,no
overcast,cool,normal,strong,yes
sunny,mild,high,weak,no
sunny,cool,normal,weak,yes
rain,mild,normal,weak,yes
sunny,mild,normal,strong,yes
overcast,mild,high,strong,yes
overcast,hot,normal,weak,yes
rain,mild,high,strong,no
Output:
C:\Users\admin\PycharmProjects\1rr16cs181\venv\Scripts\python.exe
C:/Users/admin/PycharmProjects/1rr16cs181/id3.py
outlook
overcast
b'yes'
rain
wind
b'strong'
b'no'
b'weak'
b'yes'
sunny
humidity
b'high'
b'no'
b'normal'
b'yes'
import numpy as np
x=np.array(([2,9],[1,5],[3,6]),dtype=float)
y=np.array(([92],[86],[89]),dtype=float)
x=x/np.amax(x,axis=0)
y=y/100
def sigmoid(x):
return (1/(1+np.exp(-x)))
def derivatives_sigmoid(x):
return x*(1-x)
epoch=7000
lr=0.1
inputlayer_neuron=2
hiddenlayer_neuron=3
output_neuron=1
wh=np.random.uniform(size=(inputlayer_neuron,hiddenlayer_neuron))
bh=np.random.uniform(size=(1,hiddenlayer_neuron))
wout=np.random.uniform(size=(hiddenlayer_neuron,output_neuron))
bout=np.random.uniform(size=(1,output_neuron))
for i in range(epoch):
hinp1=np.dot(x,wh)
hinp=hinp1+bh
hlayer_act=sigmoid(hinp)
outinp1=np.dot(hlayer_act,wout)
outinp=outinp1+bout
output=sigmoid(outinp)
EO=y-output
outgrad=derivatives_sigmoid(output)
d_output=EO*outgrad
EH=d_output.dot(wout.T)
hiddengrad=derivatives_sigmoid(hlayer_act)
d_hiddenlayer=EH*hiddengrad
wout+=hlayer_act.T.dot(d_output*lr)
wh+=x.T.dot(d_hiddenlayer)*lr
print("input:\n"+str(x))
print("actual output:\n"+str(y))
print("predicted output:\n",output)
OUTPUT
C:\Users\admin\PycharmProjects\1rr16cs181\venv\Scripts\python.exe
C:/Users/admin/PycharmProjects/1rr16cs181/nueral.py
input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
actual output:
[[0.92]
[0.86]
[0.89]]
predicted output:
[[0.89612636]
[0.87868519]
[0.89460368]]
5. Write a program to implement the naïve Bayesian classifier for a sample training data set stored
as a .CSV file. Compute the accuracy of the classifier, considering few test data sets.
def loadCsv(filename):
lines = csv.reader(open(filename))
dataset = list(lines)
for i in range(len(dataset)):
dataset[i] = [float(x) for x in dataset[i]]
return dataset
def separateByClass(dataset):
separated = {}
for i in range(len(dataset)):
vector = dataset[i]
if (vector[-1] not in separated):
separated[vector[-1]] = []
separated[vector[-1]].append(vector)
return separated
def mean(numbers):
return safe_div(sum(numbers), float(len(numbers)))
def stdev(numbers):
avg = mean(numbers)
variance = safe_div(sum([pow(x - avg, 2) for x in numbers]), float(len(numbers) - 1))
return math.sqrt(variance)
def summarize(dataset):
summaries = [(mean(attribute), stdev(attribute)) for attribute in zip(*dataset)]
del summaries[-1]
return summaries
def summarizeByClass(dataset):
separated = separateByClass(dataset)
summaries = {}
for classValue, instances in separated.items():
summaries[classValue] = summarize(instances)
return summaries
def main():
filename = 'NaiveBayes ConceptLearning.csv'
splitRatio = 0.75
dataset = loadCsv(filename)
trainingSet, testSet = splitDataset(dataset, splitRatio)
print('Split {0} rows into'.format(len(dataset)))
print('Number of Training data: ' + (repr(len(trainingSet))))
print('Number of Test Data: ' + (repr(len(testSet))))
print("\nThe values assumed for the concept learning attributes are\n")
print(
"OUTLOOK=> Sunny=1 Overcast=2 Rain=3\nTEMPERATURE=> Hot=1 Mild=2
Cool=3\nHUMIDITY=> High=1 Normal=2\nWIND=> Weak=1 Strong=2")
print("TARGET CONCEPT:PLAY TENNIS=> Yes=10 No=5")
print("\nThe Training set are:")
for x in trainingSet:
print(x)
print("\nThe Test data set are:")
for x in testSet:
print(x)
print("\n")
# prepare model
summaries = summarizeByClass(trainingSet)
# test model
predictions = getPredictions(summaries, testSet)
actual = []
for i in range(len(testSet)):
vector = testSet[i]
actual.append(vector[-1])
# Since there are five attribute values, each attribute constitutes to 20% accuracy. So if all attributes
match with predictions then 100% accuracy
print('Actual values: {0}%'.format(actual))
print('Predictions: {0}%'.format(predictions))
accuracy = getAccuracy(testSet, predictions)
print('Accuracy: {0}%'.format(accuracy))
main()
Data Set:
1,1,1,1,5
1,1,1,2,5
2,1,1,2,10
3,2,1,1,10
3,3,2,1,10
3,3,2,2,5
2,3,2,2,10
1,2,1,1,5
1,3,2,1,10
3,2,2,2,10
1,2,2,2,10
2,2,1,2,10
2,1,2,1,10
3,2,1,2,5
1,2,1,2,10
1,2,1,2,5
OUTPUT
C:\Users\admin\PycharmProjects\ss\venv\Scripts\python.exe
"C:/Users/admin/Desktop/RNSIT_ML_LAB_PROGRAMS_JULY2018/RNSIT_ML_LAB_PROGRAM
S_JULY2018/5-naive-bayes/NaiveBayes ConceptLearning.py"