0% found this document useful (0 votes)
11 views12 pages

ML Ex1

Download as docx, pdf, or txt
Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1/ 12

1.

Implement and demonstrate the FIND-S algorithm for finding the most specific
hypothesis based on a given set of training data samples. Read the training data from a
CSV file

import csv
a = []

with open('enjoysport.csv', 'r') as


csvfile: for row in
csv.reader(csvfile):
a.append(row)
print(a)

print("\n The total number of training instances are

: ",len(a)) num_attribute = len(a[0])-1

print("\n The initial


hypothesis is : ") hypothesis =
['0']*num_attribute
print(hypothesis)

for i in range(0, len(a)):


if a[i][num_attribute] == 'yes':
for j in range(0, num_attribute):
if hypothesis[j] == '0' or hypothesis[j]
== a[i][j]: hypothesis[j] = a[i][j]
else:
hypothesis[j] = '?'
print("\n The hypothesis for the training instance {} is : \n"
.format(i+1),hypothesis)

print("\n The Maximally specific hypothesis for the training


instance is ") print(hypothesis)

csv files:

enjoysport.csv:-

sky airtemp humidity wind water forcast enjoysport


sunny warm normal strong warm same yes
sunny warm high strong warm same yes
rainy cold high strong warm change no
sunny warm high strong cool change yes
2. For a given set of training data examples stored in a .CSV file, implement and
demonstrate the Candidate-Elimination algorithm to output a description of the set of
all hypotheses consistent with the training examples.

import numpy as np
import pandas as pd

data = pd.read_csv('enjoysport.csv')
concepts = np.array(data.iloc[:,0:-1])
print(concepts)
target = np.array(data.iloc[:,-1])
print(target)
def learn(concepts, target):
specific_h = concepts[0].copy()
print("initialization of specific_h and
general_h") print(specific_h)
general_h = [["?" for i in range(len(specific_h))] for i in
range(len(specific_h))] print(general_h)

for i, h in
enumerate(concepts):
print("For Loop
Starts")
if target[i] == "yes":
print("If instance is
Positive ") for x in
range(len(specific_h)):
if h[x]!= specific_h[x]:
specific_h[x] ='?'
general_h[x][x] ='?'

if target[i] == "no":
print("If instance is
Negative ") for x in
range(len(specific_h)):
if h[x]!= specific_h[x]:
general_h[x][x] =
specific_h[x] else:
general_h[x][x] = '?'

print(" steps of Candidate Elimination


Algorithm",i+1) print(specific_h)
print(general_h)
print("\n")
print("\n")

indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?',


'?', '?', '?']] for i in indices:
general_h.remove(['?', '?', '?', '?',
'?', '?']) return specific_h, general_h

s_final, g_final = learn(concepts, target)

print("Final Specific_h:", s_final,


sep="\n") print("Final General_h:",
g_final, sep="\n")
CSV file:

enjoysport.csv:-

sky airtemp humidity wind water forcast enjoysport


sunny warm normal strong warm same yes
sunny warm high strong warm same yes
rainy cold high strong warm change no
sunny warm high strong cool change yes
3. Write a program to demonstrate the working of the decision tree based ID3 algorithm. Use an
appropriate data set for building the decision tree and apply this knowledge to classify a new
sample.

import math
import csv
def load_csv(filename):
lines=csv.reader(open(filename,"r")); dataset =
list(lines)
headers = dataset.pop(0) return
dataset,headers

class Node:
def init (self,attribute):
self.attribute=attribute
self.children=[]
self.answer=""

def subtables(data,col,delete):
dic={}
coldata=[row[col] for row in data]
attr=list(set(coldata))

counts=[0]*len(attr) r=len(data)
c=len(data[0])
for x in range(len(attr)): for
y in range(r):
if data[y][col]==attr[x]:
counts[x]+=1

for x in range(len(attr)):
dic[attr[x]]=[[0 for i in range(c)] for j in range(counts[x])]
pos=0
for y in range(r):
if data[y][col]==attr[x]: if
delete:
del data[y][col]
dic[attr[x]][pos]=data[y]
pos+=1
return attr,dic

def entropy(S):
attr=list(set(S)) if
len(attr)==1: return 0

counts=[0,0]
for i in range(2):
counts[i]=sum([1 for x in S if attr[i]==x])/(len(S)*1.0)

sums=0
for cnt in counts:
sums+=-1*cnt*math.log(cnt,2) return sums

def compute_gain(data,col):
attr,dic = subtables(data,col,delete=False)

total_size=len(data)
entropies=[0]*len(attr) ratio=[0]*len(attr)

total_entropy=entropy([row[-1] for row in data]) for x in


range(len(attr)):
ratio[x]=len(dic[attr[x]])/(total_size*1.0)

entropies[x]=entropy([row[-1] for row in dic[attr[x]]])


total_entropy-=ratio[x]*entropies[x]
return total_entropy

def build_tree(data,features):
lastcol=[row[-1] for row in data]
if(len(set(lastcol)))==1:
node=Node("")
node.answer=lastcol[0] return
node

n=len(data[0])-1 gains=[0]*n
for col in range(n):
gains[col]=compute_gain(data,col)
split=gains.index(max(gains))
node=Node(features[split])
fea = features[:split]+features[split+1:]

attr,dic=subtables(data,split,delete=True) for x in

range(len(attr)):
child=build_tree(dic[attr[x]],fea)
node.children.append((attr[x],child)) return node

def print_tree(node,level): if
node.answer!="":
print(" "*level,node.answer)
return

print(" "*level,node.attribute) for


value,n in node.children:
print(" "*(level+1),value)
print_tree(n,level+2)

def classify(node,x_test,features): if
node.answer!="":
print(node.answer) return
pos=features.index(node.attribute) for
value, n in node.children:
if x_test[pos]==value:
classify(n,x_test,features)

'''Main program'''
dataset,features=load_csv("id3.csv")
node1=build_tree(dataset,features)

print("The decision tree for the dataset using ID3 algorithm is")
print_tree(node1,0)
testdata,features=load_csv("id3_test.csv")

for xtest in testdata:


print("The test instance:",xtest)
print("The label for test instance:",end=" ")
classify(node1,xtest,features)
csv files:

id3.csv:-

Temperatur Humidit
Outlook e y Wind Answer
sunny hot high weak no
sunny hot high strong no
overcast hot high weak yes
rain mild high weak yes
rain cool normal weak yes
rain cool normal strong no
overcast cool normal strong yes
sunny mild high weak no
sunny cool normal weak yes
rain mild normal weak yes
sunny mild normal strong yes
overcast mild high strong yes
overcast hot normal weak yes
rain mild high strong no

id3_test.csv:-

Temperatur Humidit
Outlook e y Wind
rain cool normal strong
sunny mild normal strong
4. Build an Artificial Neural Network by implementing the Backpropagation algorithm and test
the same using appropriate data sets.

import numpy as np
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float) # two inputs [sleep,study]
y = np.array(([92], [86], [89]), dtype=float) # one output [Expected % in Exams]
X = X/np.amax(X,axis=0) # maximum of X array
longitudinally y = y/100

#Sigmoid
Function def
sigmoid (x):
return 1/(1 + np.exp(-x))

#Derivative of Sigmoid
Function def
derivatives_sigmoid(x):
return x * (1 - x)

#Variable initialization
epoch=5000 #Setting training
iterations lr=0.1 #Setting learning
rate
inputlayer_neurons = 2 #number of features in
data set hiddenlayer_neurons = 3#number of hidden
layers neurons
output_neurons = 1 #number of neurons at output layer

#weight and bias initialization


wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons)) #weight of the link from
input node to hidden node
bh=np.random.uniform(size=(1,hiddenlayer_neurons)) # bias of the link from input
node to hidden node
wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons)) #weight of the link from
hidden node to output node
bout=np.random.uniform(size=(1,output_neurons)) #bias of the link from hidden node to
output node

#draws a random range of numbers uniformly of


dim x*y for i in range(epoch):

#Forward Propogation
hinp1=np.dot
(X,wh)
hinp=hinp1
+ bh
hlayer_act = sigmoid(hinp)
outinp1=np.dot(hlayer_act,wou
t) outinp= outinp1+ bout
output = sigmoid(outinp)

#Backprop
agatio
n EO
= y-
output
outgrad =
derivatives_sigmoid(output)
d_output = EO* outgrad
EH = d_output.dot(wout.T)

#how much hidden layer weights


contributed to error hiddengrad =
derivatives_sigmoid(hlayer_act)
d_hiddenlayer = EH * hiddengrad

# dotproduct of nextlayererror and


currentlayerop wout +=
hlayer_act.T.dot(d_output) *lr
wh += X.T.dot(d_hiddenlayer) *lr

print("Input: \n" + str(X))


print("Actual Output: \n" + str(y))
print("Predicted Output: \n" ,output)
5. Write a program to implement the naïve Bayesian classifier for a sample training data set
stored as a .CSV file. Compute the accuracy of the classifier, considering few test data sets.

import csv
import random
import math

def loadcsv(filename):
lines = csv.reader(open(filename, "r"));
dataset = list(lines)
for i in range(len(dataset)):
#converting strings into numbers for processing
dataset[i] = [float(x) for x in dataset[i]]

return dataset

def splitdataset(dataset, splitratio):


#67% training size
trainsize = int(len(dataset) * splitratio);
trainset = []
copy = list(dataset);
while len(trainset) < trainsize:
#generate indices for the dataset list randomly to pick ele for training data
index = random.randrange(len(copy));
trainset.append(copy.pop(index))
return [trainset, copy]

def separatebyclass(dataset):
separated = {} #dictionary of classes 1 and 0
#creates a dictionary of classes 1 and 0 where the values are
#the instances belonging to each class
for i in range(len(dataset)):
vector = dataset[i]
if (vector[-1] not in separated):
separated[vector[-1]]=[]
separated[vector[-1]].append(vector)
return separated

def mean(numbers):
return sum(numbers)/float(len(numbers))

def stdev(numbers):
avg = mean(numbers)
variance = sum([pow(x-avg,2) for x in numbers])/float(len(numbers)-1)
return math.sqrt(variance)

def summarize(dataset): #creates a dictionary of classes


summaries = [(mean(attribute), stdev(attribute)) for attribute in zip(*dataset)];
del summaries[-1] #excluding labels +ve or -ve
return summaries

def summarizebyclass(dataset):
separated = separatebyclass(dataset);
#print(separated)
summaries = {}
for classvalue, instances in
separated.items():
#for key,value in
dic.items()
#summaries is a dic of tuples(mean,std) for each class value
summaries[classvalue] = summarize(instances) #summarize is used to cal to mean
and
std
return summaries

def calculateprobability(x, mean, stdev):


exponent = math.exp(-(math.pow(x-mean,2)/(2*math.pow(stdev,2))))
return (1 / (math.sqrt(2*math.pi) * stdev)) * exponent
def calculateclassprobabilities(summaries, inputvector):
probabilities = {} # probabilities contains the all prob of all class of test data
for classvalue, classsummaries in summaries.items():#class and attribute information as
mean and sd
probabilities[classvalue] = 1
for i in range(len(classsummaries)):
mean, stdev = classsummaries[i] #take mean and sd of every attribute for

class 0 and 1 seperaely


x = inputvector[i] #testvector's first attribute
probabilities[classvalue] *= calculateprobability(x, mean, stdev);#use

normal dist
return probabilities

def predict(summaries, inputvector): #training and test data is passed


probabilities = calculateclassprobabilities(summaries, inputvector) bestLabel, bestProb
= None, -1
for classvalue, probability in probabilities.items():#assigns that class which has he
highest prob

if bestLabel is None or probability > bestProb:


bestProb = probability
bestLabel = classvalue
return bestLabel

def getpredictions(summaries, testset):


predictions = []
for i in range(len(testset)):
result = predict(summaries, testset[i])
predictions.append(result)
return predictions

def getaccuracy(testset, predictions):


correct = 0
for i in range(len(testset)):
if testset[i][-1] == predictions[i]:
correct += 1
return (correct/float(len(testset))) * 100.0

def main():
filename = 'naivedata.csv'
splitratio = 0.67
dataset = loadcsv(filename);

trainingset, testset = splitdataset(dataset, splitratio)


print('Split {0} rows into train={1} and test={2} rows'.format(len(dataset),
len(trainingset), len(testset)))
# prepare model
summaries = summarizebyclass(trainingset);
#print(summaries)
# test model
predictions = getpredictions(summaries, testset) #find the predictions of test data
with the training data
accuracy = getaccuracy(testset, predictions)
print('Accuracy of the classifier is : {0}%'.format(accuracy))

main()
csv files:
naivedata.csv:-

6 148 72 35 0 33.6 0.627 50 1

1 85 66 29 0 26.6 0.351 31 0

8 183 64 0 0 23.3 0.672 32 1

1 89 66 23 94 28.1 0.167 21 0

0 137 40 35 168 43.1 2.288 33 1

5 116 74 0 0 25.6 0.201 30 0

3 78 50 32 88 31 0.248 26 1

10 115 0 0 0 35.3 0.134 29 0

2 197 70 45 543 30.5 0.158 53 1

8 125 96 0 0 0 0.232 54 1

4 110 92 0 0 37.6 0.191 30 0

10 168 74 0 0 38 0.537 34 1

10 139 80 0 0 27.1 1.441 57 0

1 189 60 23 846 30.1 0.398 59 1

5 166 72 19 175 25.8 0.587 51 1

7 100 0 0 0 30 0.484 32 1

0 118 84 47 230 45.8 0.551 31 1

7 107 74 0 0 29.6 0.254 31 1

1 103 30 38 83 43.3 0.183 33 0

1 115 70 30 96 34.6 0.529 32 1

3 126 88 41 235 39.3 0.704 27 0

8 99 84 0 0 35.4 0.388 50 0

7 196 90 0 0 39.8 0.451 41 1

9 119 80 35 0 29 0.263 29 1

11 143 94 33 146 36.6 0.254 51 1

10 125 70 26 115 31.1 0.205 41 1

7 147 76 0 0 39.4 0.257 43 1

1 97 66 15 140 23.2 0.487 22 0

13 145 82 19 110 22.2 0.245 57 0

5 117 92 0 0 34.1 0.337 38 0

5 109 75 26 0 36 0.546 60 0

3 158 76 36 245 31.6 0.851 28 1

3 88 58 11 54 24.8 0.267 22 0

6 92 92 0 0 19.9 0.188 28 0

10 122 78 31 0 27.6 0.512 45 0

4 103 60 33 192 24 0.966 33 0

11 138 76 0 0 33.2 0.42 35 0

9 102 76 37 0 32.9 0.665 46 1

2 90 68 42 0 38.2 0.503 27 1

4 111 72 47 207 37.1 1.39 56 1

3 180 64 25 70 34 0.271 26 0

7 133 84 0 0 40.2 0.696 37 0

7 106 92 18 0 22.7 0.235 48 0

9 171 110 24 240 45.4 0.721 54 1

7 159 64 0 0 27.4 0.294 40 0

0 180 66 39 0 42 1.893 25 1

1 146 56 0 0 29.7 0.564 29 0

2 71 70 27 0 28 0.586 22 0

7 103 66 32 0 39.1 0.344 31 1

You might also like