ML Ex1

1.
Implement and demonstrate the FIND-S algorithm for finding the most specific
hypothesis based on a given set of training data samples. Read the training data from a
CSV file
import csv
a = []
with open('enjoysport.csv', 'r') as

csvfile: for row in
csv.reader(csvfile):
a.append(row)
print(a)
print("\n The total number of training instances are
: ",len(a)) num_attribute = len(a[0])-1
print("\n The initial

hypothesis is : ") hypothesis =
['0']*num_attribute
print(hypothesis)
for i in range(0, len(a)):

if a[i][num_attribute] == 'yes':
for j in range(0, num_attribute):
if hypothesis[j] == '0' or hypothesis[j]
== a[i][j]: hypothesis[j] = a[i][j]
else:
hypothesis[j] = '?'
print("\n The hypothesis for the training instance {} is : \n"
.format(i+1),hypothesis)
print("\n The Maximally specific hypothesis for the training

instance is ") print(hypothesis)
csv files:
enjoysport.csv:-
sky airtemp humidity wind water forcast enjoysport

sunny warm normal strong warm same yes
sunny warm high strong warm same yes
rainy cold high strong warm change no
sunny warm high strong cool change yes
2. For a given set of training data examples stored in a .CSV file, implement and
demonstrate the Candidate-Elimination algorithm to output a description of the set of
all hypotheses consistent with the training examples.
import numpy as np
import pandas as pd
data = pd.read_csv('enjoysport.csv')
concepts = np.array(data.iloc[:,0:-1])
print(concepts)
target = np.array(data.iloc[:,-1])
print(target)
def learn(concepts, target):
specific_h = concepts[0].copy()
print("initialization of specific_h and
general_h") print(specific_h)
general_h = [["?" for i in range(len(specific_h))] for i in
range(len(specific_h))] print(general_h)
for i, h in
enumerate(concepts):
print("For Loop
Starts")
if target[i] == "yes":
print("If instance is
Positive ") for x in
range(len(specific_h)):
if h[x]!= specific_h[x]:
specific_h[x] ='?'
general_h[x][x] ='?'
if target[i] == "no":
print("If instance is
Negative ") for x in
range(len(specific_h)):
if h[x]!= specific_h[x]:
general_h[x][x] =
specific_h[x] else:
general_h[x][x] = '?'
print(" steps of Candidate Elimination

Algorithm",i+1) print(specific_h)
print(general_h)
print("\n")
print("\n")
indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?',

'?', '?', '?']] for i in indices:
general_h.remove(['?', '?', '?', '?',
'?', '?']) return specific_h, general_h
s_final, g_final = learn(concepts, target)
print("Final Specific_h:", s_final,

sep="\n") print("Final General_h:",
g_final, sep="\n")
CSV file:
enjoysport.csv:-
sky airtemp humidity wind water forcast enjoysport

sunny warm normal strong warm same yes
sunny warm high strong warm same yes
rainy cold high strong warm change no
sunny warm high strong cool change yes
3. Write a program to demonstrate the working of the decision tree based ID3 algorithm. Use an
appropriate data set for building the decision tree and apply this knowledge to classify a new
sample.
import math
import csv
def load_csv(filename):
lines=csv.reader(open(filename,"r")); dataset =
list(lines)
headers = dataset.pop(0) return
dataset,headers
class Node:
def init (self,attribute):
self.attribute=attribute
self.children=[]
self.answer=""
def subtables(data,col,delete):
dic={}
coldata=[row[col] for row in data]
attr=list(set(coldata))
counts=[0]*len(attr) r=len(data)
c=len(data[0])
for x in range(len(attr)): for
y in range(r):
if data[y][col]==attr[x]:
counts[x]+=1
for x in range(len(attr)):
dic[attr[x]]=[[0 for i in range(c)] for j in range(counts[x])]
pos=0
for y in range(r):
if data[y][col]==attr[x]: if
delete:
del data[y][col]
dic[attr[x]][pos]=data[y]
pos+=1
return attr,dic
def entropy(S):
attr=list(set(S)) if
len(attr)==1: return 0
counts=[0,0]
for i in range(2):
counts[i]=sum([1 for x in S if attr[i]==x])/(len(S)*1.0)
sums=0
for cnt in counts:
sums+=-1*cnt*math.log(cnt,2) return sums
def compute_gain(data,col):
attr,dic = subtables(data,col,delete=False)
total_size=len(data)
entropies=[0]*len(attr) ratio=[0]*len(attr)
total_entropy=entropy([row[-1] for row in data]) for x in

range(len(attr)):
ratio[x]=len(dic[attr[x]])/(total_size*1.0)
entropies[x]=entropy([row[-1] for row in dic[attr[x]]])

total_entropy-=ratio[x]*entropies[x]
return total_entropy
def build_tree(data,features):
lastcol=[row[-1] for row in data]
if(len(set(lastcol)))==1:
node=Node("")
node.answer=lastcol[0] return
node
n=len(data[0])-1 gains=[0]*n
for col in range(n):
gains[col]=compute_gain(data,col)
split=gains.index(max(gains))
node=Node(features[split])
fea = features[:split]+features[split+1:]
attr,dic=subtables(data,split,delete=True) for x in
range(len(attr)):
child=build_tree(dic[attr[x]],fea)
node.children.append((attr[x],child)) return node
def print_tree(node,level): if
node.answer!="":
print(" "*level,node.answer)
return
print(" "*level,node.attribute) for

value,n in node.children:
print(" "*(level+1),value)
print_tree(n,level+2)
def classify(node,x_test,features): if
node.answer!="":
print(node.answer) return
pos=features.index(node.attribute) for
value, n in node.children:
if x_test[pos]==value:
classify(n,x_test,features)
'''Main program'''
dataset,features=load_csv("id3.csv")
node1=build_tree(dataset,features)
print("The decision tree for the dataset using ID3 algorithm is")
print_tree(node1,0)
testdata,features=load_csv("id3_test.csv")
for xtest in testdata:

print("The test instance:",xtest)
print("The label for test instance:",end=" ")
classify(node1,xtest,features)
csv files:
id3.csv:-
Temperatur Humidit
Outlook e y Wind Answer
sunny hot high weak no
sunny hot high strong no
overcast hot high weak yes
rain mild high weak yes
rain cool normal weak yes
rain cool normal strong no
overcast cool normal strong yes
sunny mild high weak no
sunny cool normal weak yes
rain mild normal weak yes
sunny mild normal strong yes
overcast mild high strong yes
overcast hot normal weak yes
rain mild high strong no
id3_test.csv:-
Temperatur Humidit
Outlook e y Wind
rain cool normal strong
sunny mild normal strong
4. Build an Artificial Neural Network by implementing the Backpropagation algorithm and test
the same using appropriate data sets.
import numpy as np
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float) # two inputs [sleep,study]
y = np.array(([92], [86], [89]), dtype=float) # one output [Expected % in Exams]
X = X/np.amax(X,axis=0) # maximum of X array
longitudinally y = y/100
#Sigmoid
Function def
sigmoid (x):
return 1/(1 + np.exp(-x))
#Derivative of Sigmoid
Function def
derivatives_sigmoid(x):
return x * (1 - x)
#Variable initialization
epoch=5000 #Setting training
iterations lr=0.1 #Setting learning
rate
inputlayer_neurons = 2 #number of features in
data set hiddenlayer_neurons = 3#number of hidden
layers neurons
output_neurons = 1 #number of neurons at output layer
#weight and bias initialization

wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons)) #weight of the link from
input node to hidden node
bh=np.random.uniform(size=(1,hiddenlayer_neurons)) # bias of the link from input
node to hidden node
wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons)) #weight of the link from
hidden node to output node
bout=np.random.uniform(size=(1,output_neurons)) #bias of the link from hidden node to
output node
#draws a random range of numbers uniformly of

dim x*y for i in range(epoch):
#Forward Propogation
hinp1=np.dot
(X,wh)
hinp=hinp1
+ bh
hlayer_act = sigmoid(hinp)
outinp1=np.dot(hlayer_act,wou
t) outinp= outinp1+ bout
output = sigmoid(outinp)
#Backprop
agatio
n EO
= y-
output
outgrad =
derivatives_sigmoid(output)
d_output = EO* outgrad
EH = d_output.dot(wout.T)
#how much hidden layer weights

contributed to error hiddengrad =
derivatives_sigmoid(hlayer_act)
d_hiddenlayer = EH * hiddengrad
# dotproduct of nextlayererror and

currentlayerop wout +=
hlayer_act.T.dot(d_output) *lr
wh += X.T.dot(d_hiddenlayer) *lr
print("Input: \n" + str(X))

print("Actual Output: \n" + str(y))
print("Predicted Output: \n" ,output)
5. Write a program to implement the naïve Bayesian classifier for a sample training data set
stored as a .CSV file. Compute the accuracy of the classifier, considering few test data sets.
import csv
import random
import math
def loadcsv(filename):
lines = csv.reader(open(filename, "r"));
dataset = list(lines)
for i in range(len(dataset)):
#converting strings into numbers for processing
dataset[i] = [float(x) for x in dataset[i]]
return dataset
def splitdataset(dataset, splitratio):

#67% training size
trainsize = int(len(dataset) * splitratio);
trainset = []
copy = list(dataset);
while len(trainset) < trainsize:
#generate indices for the dataset list randomly to pick ele for training data
index = random.randrange(len(copy));
trainset.append(copy.pop(index))
return [trainset, copy]
def separatebyclass(dataset):
separated = {} #dictionary of classes 1 and 0
#creates a dictionary of classes 1 and 0 where the values are
#the instances belonging to each class
for i in range(len(dataset)):
vector = dataset[i]
if (vector[-1] not in separated):
separated[vector[-1]]=[]
separated[vector[-1]].append(vector)
return separated
def mean(numbers):
return sum(numbers)/float(len(numbers))
def stdev(numbers):
avg = mean(numbers)
variance = sum([pow(x-avg,2) for x in numbers])/float(len(numbers)-1)
return math.sqrt(variance)
def summarize(dataset): #creates a dictionary of classes

summaries = [(mean(attribute), stdev(attribute)) for attribute in zip(*dataset)];
del summaries[-1] #excluding labels +ve or -ve
return summaries
def summarizebyclass(dataset):
separated = separatebyclass(dataset);
#print(separated)
summaries = {}
for classvalue, instances in
separated.items():
#for key,value in
dic.items()
#summaries is a dic of tuples(mean,std) for each class value
summaries[classvalue] = summarize(instances) #summarize is used to cal to mean
and
std
return summaries
def calculateprobability(x, mean, stdev):

exponent = math.exp(-(math.pow(x-mean,2)/(2*math.pow(stdev,2))))
return (1 / (math.sqrt(2*math.pi) * stdev)) * exponent
def calculateclassprobabilities(summaries, inputvector):
probabilities = {} # probabilities contains the all prob of all class of test data
for classvalue, classsummaries in summaries.items():#class and attribute information as
mean and sd
probabilities[classvalue] = 1
for i in range(len(classsummaries)):
mean, stdev = classsummaries[i] #take mean and sd of every attribute for
class 0 and 1 seperaely

x = inputvector[i] #testvector's first attribute
probabilities[classvalue] *= calculateprobability(x, mean, stdev);#use
normal dist
return probabilities
def predict(summaries, inputvector): #training and test data is passed

probabilities = calculateclassprobabilities(summaries, inputvector) bestLabel, bestProb
= None, -1
for classvalue, probability in probabilities.items():#assigns that class which has he
highest prob
if bestLabel is None or probability > bestProb:

bestProb = probability
bestLabel = classvalue
return bestLabel
def getpredictions(summaries, testset):

predictions = []
for i in range(len(testset)):
result = predict(summaries, testset[i])
predictions.append(result)
return predictions
def getaccuracy(testset, predictions):

correct = 0
for i in range(len(testset)):
if testset[i][-1] == predictions[i]:
correct += 1
return (correct/float(len(testset))) * 100.0
def main():
filename = 'naivedata.csv'
splitratio = 0.67
dataset = loadcsv(filename);
trainingset, testset = splitdataset(dataset, splitratio)

print('Split {0} rows into train={1} and test={2} rows'.format(len(dataset),
len(trainingset), len(testset)))
# prepare model
summaries = summarizebyclass(trainingset);
#print(summaries)
# test model
predictions = getpredictions(summaries, testset) #find the predictions of test data
with the training data
accuracy = getaccuracy(testset, predictions)
print('Accuracy of the classifier is : {0}%'.format(accuracy))
main()
csv files:
naivedata.csv:-
6 148 72 35 0 33.6 0.627 50 1
1 85 66 29 0 26.6 0.351 31 0
8 183 64 0 0 23.3 0.672 32 1
1 89 66 23 94 28.1 0.167 21 0
0 137 40 35 168 43.1 2.288 33 1
5 116 74 0 0 25.6 0.201 30 0
3 78 50 32 88 31 0.248 26 1
10 115 0 0 0 35.3 0.134 29 0
2 197 70 45 543 30.5 0.158 53 1
8 125 96 0 0 0 0.232 54 1
4 110 92 0 0 37.6 0.191 30 0
10 168 74 0 0 38 0.537 34 1
10 139 80 0 0 27.1 1.441 57 0
1 189 60 23 846 30.1 0.398 59 1
5 166 72 19 175 25.8 0.587 51 1
7 100 0 0 0 30 0.484 32 1
0 118 84 47 230 45.8 0.551 31 1
7 107 74 0 0 29.6 0.254 31 1
1 103 30 38 83 43.3 0.183 33 0
1 115 70 30 96 34.6 0.529 32 1
3 126 88 41 235 39.3 0.704 27 0
8 99 84 0 0 35.4 0.388 50 0
7 196 90 0 0 39.8 0.451 41 1
9 119 80 35 0 29 0.263 29 1
11 143 94 33 146 36.6 0.254 51 1
10 125 70 26 115 31.1 0.205 41 1
7 147 76 0 0 39.4 0.257 43 1
1 97 66 15 140 23.2 0.487 22 0
13 145 82 19 110 22.2 0.245 57 0
5 117 92 0 0 34.1 0.337 38 0
5 109 75 26 0 36 0.546 60 0
3 158 76 36 245 31.6 0.851 28 1
3 88 58 11 54 24.8 0.267 22 0
6 92 92 0 0 19.9 0.188 28 0
10 122 78 31 0 27.6 0.512 45 0
4 103 60 33 192 24 0.966 33 0
11 138 76 0 0 33.2 0.42 35 0
9 102 76 37 0 32.9 0.665 46 1
2 90 68 42 0 38.2 0.503 27 1
4 111 72 47 207 37.1 1.39 56 1
3 180 64 25 70 34 0.271 26 0
7 133 84 0 0 40.2 0.696 37 0
7 106 92 18 0 22.7 0.235 48 0
9 171 110 24 240 45.4 0.721 54 1
7 159 64 0 0 27.4 0.294 40 0
0 180 66 39 0 42 1.893 25 1
1 146 56 0 0 29.7 0.564 29 0
2 71 70 27 0 28 0.586 22 0
7 103 66 32 0 39.1 0.344 31 1

ML Ex1

Uploaded by

Document Informationclick to expand document information

Document Informationclick to expand document information

Copyright:

Available Formats

ML Ex1

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

ML Ex1

Uploaded by

Copyright:

Available Formats

1.

with open('enjoysport.csv', 'r') as

print("\n The total number of training instances are

: ",len(a)) num_attribute = len(a[0])-1

print("\n The initial

for i in range(0, len(a)):

print("\n The Maximally specific hypothesis for the training

sky airtemp humidity wind water forcast enjoysport

print(" steps of Candidate Elimination

indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?',

s_final, g_final = learn(concepts, target)

print("Final Specific_h:", s_final,

sky airtemp humidity wind water forcast enjoysport

total_entropy=entropy([row[-1] for row in data]) for x in

entropies[x]=entropy([row[-1] for row in dic[attr[x]]])

print(" "*level,node.attribute) for

for xtest in testdata:

#weight and bias initialization

#draws a random range of numbers uniformly of

#how much hidden layer weights

# dotproduct of nextlayererror and

print("Input: \n" + str(X))

def splitdataset(dataset, splitratio):

def summarize(dataset): #creates a dictionary of classes

def calculateprobability(x, mean, stdev):

class 0 and 1 seperaely

def predict(summaries, inputvector): #training and test data is passed

if bestLabel is None or probability > bestProb:

def getpredictions(summaries, testset):

def getaccuracy(testset, predictions):

trainingset, testset = splitdataset(dataset, splitratio)

6 148 72 35 0 33.6 0.627 50 1

8 183 64 0 0 23.3 0.672 32 1

0 137 40 35 168 43.1 2.288 33 1

5 116 74 0 0 25.6 0.201 30 0

10 115 0 0 0 35.3 0.134 29 0

2 197 70 45 543 30.5 0.158 53 1

4 110 92 0 0 37.6 0.191 30 0

10 139 80 0 0 27.1 1.441 57 0

1 189 60 23 846 30.1 0.398 59 1

5 166 72 19 175 25.8 0.587 51 1

0 118 84 47 230 45.8 0.551 31 1

7 107 74 0 0 29.6 0.254 31 1

1 103 30 38 83 43.3 0.183 33 0

1 115 70 30 96 34.6 0.529 32 1

3 126 88 41 235 39.3 0.704 27 0

7 196 90 0 0 39.8 0.451 41 1

11 143 94 33 146 36.6 0.254 51 1

10 125 70 26 115 31.1 0.205 41 1

7 147 76 0 0 39.4 0.257 43 1

1 97 66 15 140 23.2 0.487 22 0

13 145 82 19 110 22.2 0.245 57 0

5 117 92 0 0 34.1 0.337 38 0

3 158 76 36 245 31.6 0.851 28 1

10 122 78 31 0 27.6 0.512 45 0

4 103 60 33 192 24 0.966 33 0

11 138 76 0 0 33.2 0.42 35 0