0% found this document useful (0 votes)
632 views

Candidate Elimination Algorithm

The document describes an implementation of the Candidate-Elimination algorithm to learn hypotheses from a set of training examples stored in a CSV file. It defines functions for determining if one hypothesis is more general than another, generating the minimum generalizations and specializations of a hypothesis, and the core candidate-elimination algorithm. Running on sample training data, it iteratively updates the sets of possible general (G) and specific (S) hypotheses consistent with the examples.

Uploaded by

jothi1083
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
632 views

Candidate Elimination Algorithm

The document describes an implementation of the Candidate-Elimination algorithm to learn hypotheses from a set of training examples stored in a CSV file. It defines functions for determining if one hypothesis is more general than another, generating the minimum generalizations and specializations of a hypothesis, and the core candidate-elimination algorithm. Running on sample training data, it iteratively updates the sets of possible general (G) and specific (S) hypotheses consistent with the examples.

Uploaded by

jothi1083
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 3

2. For a given set of training data examples stored in a .

CSV file, implement and demonstrate the


Candidate-Elimination algorithm to output a description of the set of all hypotheses consistent with
the training examples.

import random
import csv
def g_0(n):
return ("?",)*n
def s_0(n):
return ('0',)*n

def more_general(h1, h2):


more_general_parts = []
for x, y in zip(h1, h2):
mg = x == "?" or (x != "0" and (x == y or y == "0"))
more_general_parts.append(mg)
return all(more_general_parts)

# min_generalizations
def fulfills(example, hypothesis):
### the implementation is the same as for hypotheses:
return more_general(hypothesis, example)

def min_generalizations(h, x):


h_new = list(h)
for i in range(len(h)):
if not fulfills(x[i:i+1], h[i:i+1]):
h_new[i] = '?' if h[i] != '0' else x[i]
return [tuple(h_new)]

def min_specializations(h, domains, x):


results = []
for i in range(len(h)):
if h[i] == "?":
for val in domains[i]:
if x[i] != val:
h_new = h[:i] + (val,) + h[i+1:]
results.append(h_new)
elif h[i] != "0":
h_new = h[:i] + ('0',) + h[i+1:]
results.append(h_new)
return results

with open('data2.csv') as csvFile:


examples = [tuple(line) for line in csv.reader(csvFile)]

def get_domains(examples):
d = [set() for i in examples[0]]
for x in examples:
for i, xi in enumerate(x):
d[i].add(xi)
return [list(sorted(x)) for x in d]

def candidate_elimination(examples):
domains = get_domains(examples)[:-1]

G = set([g_0(len(domains))])
S = set([s_0(len(domains))])
i=0
print("\n G[{0}]:".format(i),G)
print("\n S[{0}]:".format(i),S)
for xcx in examples:
i=i+1
x, cx = xcx[:-1], xcx[-1] # Splitting data into attributes and decisions
if cx=='Y': # x is positive example
G = {g for g in G if fulfills(x, g)}
S = generalize_S(x, G, S)
else: # x is negative example
S = {s for s in S if not fulfills(x, s)}
G = specialize_G(x, domains, G, S)
print("\n G[{0}]:".format(i),G)
print("\n S[{0}]:".format(i),S)
print()
return

def generalize_S(x, G, S):


S_prev = list(S)
for s in S_prev:
if s not in S:
continue
if not fulfills(x, s):
S.remove(s)
Splus = min_generalizations(s, x)
## keep only generalizations that have a counterpart in G
S.update([h for h in Splus if any([more_general(g,h)
for g in G])])
## remove hypotheses less specific than any other in S
S.difference_update([h for h in S if
any([more_general(h, h1)
for h1 in S if h != h1])])
return S

def specialize_G(x, domains, G, S):


G_prev = list(G)
for g in G_prev:
if g not in G:
continue
if fulfills(x, g):
G.remove(g)
Gminus = min_specializations(g, domains, x)
## keep only specializations that have a conuterpart in S
G.update([h for h in Gminus if any([more_general(h, s)
for s in S])])
## remove hypotheses less general than any other in G
G.difference_update([h for h in G if
any([more_general(g1, h)
for g1 in G if h != g1])])
return G
candidate_elimination(examples)

data2.csv
sunny warm normal strong warm same Y
sunny warm high strong warm same Y
rainy cold high strong warm change N
sunny warm high strong cool change Y

OUTPUT
G[0]: {('?', '?', '?', '?', '?', '?')}
S[0]: {('0', '0', '0', '0', '0', '0')}

G[1]: {('?', '?', '?', '?', '?', '?')}


S[1]: {('sunny', 'warm', 'normal', 'strong', 'warm', 'same')}

G[2]: {('?', '?', '?', '?', '?', '?')}


S[2]: {('sunny', 'warm', '?', 'strong', 'warm', 'same')}

G[3]: {('sunny', '?', '?', '?', '?', '?'), ('?', 'warm', '?', '?', '?', '?'), ('?', '?', '?', '?', '?', 'same')}
S[3]: {('sunny', 'warm', '?', 'strong', 'warm', 'same')}

G[4]: {('sunny', '?', '?', '?', '?', '?'), ('?', 'warm', '?', '?', '?', '?')}
S[4]: {('sunny', 'warm', '?', 'strong', '?', '?')}

You might also like