ML Ex1
ML Ex1
ML Ex1
Implement and demonstrate the FIND-S algorithm for finding the most specific
hypothesis based on a given set of training data samples. Read the training data from a
CSV file
import csv
a = []
csv files:
enjoysport.csv:-
import numpy as np
import pandas as pd
data = pd.read_csv('enjoysport.csv')
concepts = np.array(data.iloc[:,0:-1])
print(concepts)
target = np.array(data.iloc[:,-1])
print(target)
def learn(concepts, target):
specific_h = concepts[0].copy()
print("initialization of specific_h and
general_h") print(specific_h)
general_h = [["?" for i in range(len(specific_h))] for i in
range(len(specific_h))] print(general_h)
for i, h in
enumerate(concepts):
print("For Loop
Starts")
if target[i] == "yes":
print("If instance is
Positive ") for x in
range(len(specific_h)):
if h[x]!= specific_h[x]:
specific_h[x] ='?'
general_h[x][x] ='?'
if target[i] == "no":
print("If instance is
Negative ") for x in
range(len(specific_h)):
if h[x]!= specific_h[x]:
general_h[x][x] =
specific_h[x] else:
general_h[x][x] = '?'
enjoysport.csv:-
import math
import csv
def load_csv(filename):
lines=csv.reader(open(filename,"r")); dataset =
list(lines)
headers = dataset.pop(0) return
dataset,headers
class Node:
def init (self,attribute):
self.attribute=attribute
self.children=[]
self.answer=""
def subtables(data,col,delete):
dic={}
coldata=[row[col] for row in data]
attr=list(set(coldata))
counts=[0]*len(attr) r=len(data)
c=len(data[0])
for x in range(len(attr)): for
y in range(r):
if data[y][col]==attr[x]:
counts[x]+=1
for x in range(len(attr)):
dic[attr[x]]=[[0 for i in range(c)] for j in range(counts[x])]
pos=0
for y in range(r):
if data[y][col]==attr[x]: if
delete:
del data[y][col]
dic[attr[x]][pos]=data[y]
pos+=1
return attr,dic
def entropy(S):
attr=list(set(S)) if
len(attr)==1: return 0
counts=[0,0]
for i in range(2):
counts[i]=sum([1 for x in S if attr[i]==x])/(len(S)*1.0)
sums=0
for cnt in counts:
sums+=-1*cnt*math.log(cnt,2) return sums
def compute_gain(data,col):
attr,dic = subtables(data,col,delete=False)
total_size=len(data)
entropies=[0]*len(attr) ratio=[0]*len(attr)
def build_tree(data,features):
lastcol=[row[-1] for row in data]
if(len(set(lastcol)))==1:
node=Node("")
node.answer=lastcol[0] return
node
n=len(data[0])-1 gains=[0]*n
for col in range(n):
gains[col]=compute_gain(data,col)
split=gains.index(max(gains))
node=Node(features[split])
fea = features[:split]+features[split+1:]
attr,dic=subtables(data,split,delete=True) for x in
range(len(attr)):
child=build_tree(dic[attr[x]],fea)
node.children.append((attr[x],child)) return node
def print_tree(node,level): if
node.answer!="":
print(" "*level,node.answer)
return
def classify(node,x_test,features): if
node.answer!="":
print(node.answer) return
pos=features.index(node.attribute) for
value, n in node.children:
if x_test[pos]==value:
classify(n,x_test,features)
'''Main program'''
dataset,features=load_csv("id3.csv")
node1=build_tree(dataset,features)
print("The decision tree for the dataset using ID3 algorithm is")
print_tree(node1,0)
testdata,features=load_csv("id3_test.csv")
id3.csv:-
Temperatur Humidit
Outlook e y Wind Answer
sunny hot high weak no
sunny hot high strong no
overcast hot high weak yes
rain mild high weak yes
rain cool normal weak yes
rain cool normal strong no
overcast cool normal strong yes
sunny mild high weak no
sunny cool normal weak yes
rain mild normal weak yes
sunny mild normal strong yes
overcast mild high strong yes
overcast hot normal weak yes
rain mild high strong no
id3_test.csv:-
Temperatur Humidit
Outlook e y Wind
rain cool normal strong
sunny mild normal strong
4. Build an Artificial Neural Network by implementing the Backpropagation algorithm and test
the same using appropriate data sets.
import numpy as np
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float) # two inputs [sleep,study]
y = np.array(([92], [86], [89]), dtype=float) # one output [Expected % in Exams]
X = X/np.amax(X,axis=0) # maximum of X array
longitudinally y = y/100
#Sigmoid
Function def
sigmoid (x):
return 1/(1 + np.exp(-x))
#Derivative of Sigmoid
Function def
derivatives_sigmoid(x):
return x * (1 - x)
#Variable initialization
epoch=5000 #Setting training
iterations lr=0.1 #Setting learning
rate
inputlayer_neurons = 2 #number of features in
data set hiddenlayer_neurons = 3#number of hidden
layers neurons
output_neurons = 1 #number of neurons at output layer
#Forward Propogation
hinp1=np.dot
(X,wh)
hinp=hinp1
+ bh
hlayer_act = sigmoid(hinp)
outinp1=np.dot(hlayer_act,wou
t) outinp= outinp1+ bout
output = sigmoid(outinp)
#Backprop
agatio
n EO
= y-
output
outgrad =
derivatives_sigmoid(output)
d_output = EO* outgrad
EH = d_output.dot(wout.T)
import csv
import random
import math
def loadcsv(filename):
lines = csv.reader(open(filename, "r"));
dataset = list(lines)
for i in range(len(dataset)):
#converting strings into numbers for processing
dataset[i] = [float(x) for x in dataset[i]]
return dataset
def separatebyclass(dataset):
separated = {} #dictionary of classes 1 and 0
#creates a dictionary of classes 1 and 0 where the values are
#the instances belonging to each class
for i in range(len(dataset)):
vector = dataset[i]
if (vector[-1] not in separated):
separated[vector[-1]]=[]
separated[vector[-1]].append(vector)
return separated
def mean(numbers):
return sum(numbers)/float(len(numbers))
def stdev(numbers):
avg = mean(numbers)
variance = sum([pow(x-avg,2) for x in numbers])/float(len(numbers)-1)
return math.sqrt(variance)
def summarizebyclass(dataset):
separated = separatebyclass(dataset);
#print(separated)
summaries = {}
for classvalue, instances in
separated.items():
#for key,value in
dic.items()
#summaries is a dic of tuples(mean,std) for each class value
summaries[classvalue] = summarize(instances) #summarize is used to cal to mean
and
std
return summaries
normal dist
return probabilities
def main():
filename = 'naivedata.csv'
splitratio = 0.67
dataset = loadcsv(filename);
main()
csv files:
naivedata.csv:-
1 85 66 29 0 26.6 0.351 31 0
1 89 66 23 94 28.1 0.167 21 0
3 78 50 32 88 31 0.248 26 1
8 125 96 0 0 0 0.232 54 1
10 168 74 0 0 38 0.537 34 1
7 100 0 0 0 30 0.484 32 1
8 99 84 0 0 35.4 0.388 50 0
9 119 80 35 0 29 0.263 29 1
5 109 75 26 0 36 0.546 60 0
3 88 58 11 54 24.8 0.267 22 0
6 92 92 0 0 19.9 0.188 28 0
2 90 68 42 0 38.2 0.503 27 1
3 180 64 25 70 34 0.271 26 0
0 180 66 39 0 42 1.893 25 1
2 71 70 27 0 28 0.586 22 0