Pra 5 ML
Pra 5 ML
Pra 5 ML
Practical 5
AIM: Write a program to demonstrate the working of the decision tree based ID3
algorithm. Use an appropriate data set for building the decision tree and apply this
knowledge to classify a new sample.
THEORY:
GCET 19
Machine Learning [102045609] 12102080501071
where S_v is the subset of instances for each possible value of attribute A, and
|S_v| is the number of instances in that subset.
CODE :
import numpy as np
import math
import csv
def read_data(filename):
with open(filename, 'r') as csvfile:
datareader = csv.reader(csvfile, delimiter=',')
headers = next(datareader)
metadata = []
traindata = []
for name in headers:
metadata.append(name)
for row in datareader:
traindata.append(row)
class Node:
def __init__(self, attribute):
self.attribute = attribute
self.children = []
self.answer = ""
def __str__(self):
return self.attribute
GCET 20
Machine Learning [102045609] 12102080501071
for x in range(items.shape[0]):
for y in range(data.shape[0]):
if data[y, col] == items[x]:
count[x] += 1
for x in range(items.shape[0]):
dict[items[x]] = np.empty((int(count[x]), data.shape[1]), dtype="|S32")
pos = 0
for y in range(data.shape[0]):
if data[y, col] == items[x]:
dict[items[x]][pos] = data[y]
pos += 1
if delete:
dict[items[x]] = np.delete(dict[items[x]], col, 1)
if items.size == 1:
return 0
for x in range(items.shape[0]):
ratio = dict[items[x]].shape[0]/(total_size * 1.0)
entropies[x] = ratio * entropy(dict[items[x]][:, -1])
intrinsic[x] = ratio * math.log(ratio, 2)
total_entropy = entropy(data[:, -1])
iv = -1 * sum(intrinsic)
for x in range(entropies.shape[0]):
total_entropy -= entropies[x]
return total_entropy / iv
GCET 21
Machine Learning [102045609] 12102080501071
split = np.argmax(gains)
node = Node(metadata[split])
metadata = np.delete(metadata, split, 0)
for x in range(items.shape[0]):
child = create_node(dict[items[x]], metadata)
node.children.append((items[x], child))
return node
def empty(size):
s = ""
for x in range(size):
s += " "
return s
GCET 22
Machine Learning [102045609] 12102080501071
OUTPUT:
GCET 23