ML Record Print

Download as pdf or txt
Download as pdf or txt
You are on page 1of 20

1 . Implement and demonstrate the FIND-S algorithm?

Source code:
import csv

# Initialize the hypothesis


hypo = ['%', '%', '%', '%', '%', '%']

with open('trainingdata.csv') as csv_file:


readcsv = csv.reader(csv_file, delimiter=',')
print(readcsv)

data = []
print("\nThe given training examples are:")
for row in readcsv:
print(row)
if row[len(row)-1].upper() == "YES":
data.append(row)

print("\nThe positive examples are:")


for x in data:
print(x)
print("\n")

TotalExamples = len(data)
i=0
j=0
k=0
print("The steps of the Find-s algorithm are :\n", hypo)

# Initialize the list


list = []
p=0
d = len(data[p]) - 1
for j in range(d):
list.append(data[i][j])
hypo = list

i=1
for i in range(TotalExamples):
for k in range(d):
if hypo[k] != data[i][k]:
hypo[k] = '?'
else:
hypo[k] = hypo[k]
print(hypo)
i=i+1
print("\nThe maximally specific Find-s hypothesis for the given training examples is :")
list = []
for i in range(d):
list.append(hypo[i])
print(list)

Output :
<_csv.reader object at 0x00000260B3FFB3A0>

The given training examples are:


['sky', 'airTemp', 'humidity', 'wind', 'water', 'forecast', 'enjoySport']
['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same', 'Yes']
['Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same', 'Yes']
['Rainy', 'Cold', 'High', 'Strong', 'Warm', 'Change', 'No']
['Sunny', 'Warm', 'High', 'Strong', 'Cool', 'Change', 'Yes']

The positive examples are:


['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same', 'Yes']
['Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same', 'Yes']
['Sunny', 'Warm', 'High', 'Strong', 'Cool', 'Change', 'Yes']

The steps of the Find-s algorithm are :


['%', '%', '%', '%', '%', '%']
['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same']
['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']
['Sunny', 'Warm', '?', 'Strong', '?', '?']

The maximally specific Find-s hypothesis for the given training examples is :
['Sunny', 'Warm', '?', 'Strong', '?', '?']
2. Implement Linear Regression?

Source code:
import numpy as np
import matplotlib.pyplot as plt

def estimate_coef(x, y):


# number of observations/points
n = np.size(x)

# mean of x and y vector


m_x = np.mean(x)
m_y = np.mean(y)

# calculating cross-deviation and deviation about x


SS_xy = np.sum(y * x) - n * m_y * m_x
SS_xx = np.sum(x * x) - n * m_x * m_x

# calculating regression coefficients


b_1 = SS_xy / SS_xx
b_0 = m_y - b_1 * m_x

return (b_0, b_1)

def plot_regression_line(x, y, b):


# plotting the actual points as scatter plot
plt.scatter(x, y, color="m", marker="o", s=30)

# predicted response vector


y_pred = b[0] + b[1] * x

# plotting the regression line


plt.plot(x, y_pred, color="g")

# putting labels
plt.xlabel('x')
plt.ylabel('y')
plt.show()

def main():
# observations / data
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])

# estimating coefficients
b = estimate_coef(x, y)
print("Estimated coefficients:\nb_0 = {} \nb_1 = {}".format(b[0], b[1]))

# plotting regression line


plot_regression_line(x, y, b)

# calling main function


if __name__ == "__main__":
main()

Output:

Estimated coefficients:
b_0 = 1.2363636363636363
b_1 = 1.1696969696969697
3. Implement RANDOM-FOREST REGRESSION?

Source code:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import warnings
warnings.filterwarnings('ignore')

# Load the Titanic dataset


url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
titanic_data = pd.read_csv(url)

# Drop rows with missing target values


titanic_data = titanic_data.dropna(subset=['Survived'])

# Select relevant features and target variable


X = titanic_data[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']]
y = titanic_data['Survived']

# Convert categorical variable 'Sex' to numerical


X['Sex'] = X['Sex'].map({'female': 0, 'male': 1})

# Handle missing values in the 'Age' column


X['Age'].fillna(X['Age'].median(), inplace=True)

# Split the dataset into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest Classifier


rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier


rf_classifier.fit(X_train, y_train)

# Make predictions on the test set


y_pred = rf_classifier.predict(X_test)

# Evaluate the model


accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

# Print the results


print(f"Accuracy: {accuracy:.2f}")
print("\nClassification Report:\n", classification_rep)

Output :
Accuracy: 0.80

Classification Report:
precision recall f1-score support

0 0.82 0.85 0.83 105


1 0.77 0.73 0.75 74

accuracy 0.80 179


macro avg 0.79 0.79 0.79 179
weighted avg 0.80 0.80 0.80 179
4. Build an artificial Neural Network by Implementing the BACK
PROPAGATION algorithm & test the same?

Source code:
import numpy as np

X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float) # X = (hours sleeping, hours studying)
y = np.array(([92], [86], [89]), dtype=float) # y = score on test

# scale units
X = X / np.amax(X, axis=0) # maximum of X array
y = y / 100 # max test score is 100

class Neural_Network(object):
def __init__(self):
# Parameters
self.inputSize = 2
self.outputSize = 1
self.hiddenSize = 3
# Weights
self.W1 = np.random.randn(self.inputSize, self.hiddenSize) # (3x2) weight matrix from input to
hidden layer
self.W2 = np.random.randn(self.hiddenSize, self.outputSize) # (3x1) weight matrix from hidden to
output layer

def forward(self, X):


# forward propagation through our network
self.z = np.dot(X, self.W1) # dot product of X (input) and first set of 3x2 weights
self.z2 = self.sigmoid(self.z) # activation function
self.z3 = np.dot(self.z2, self.W2) # dot product of hidden layer (z2) and second set of 3x1 weights
o = self.sigmoid(self.z3) # final activation function
return o

def sigmoid(self, s):


return 1 / (1 + np.exp(-s)) # activation function

def sigmoidPrime(self, s):


return s * (1 - s) # derivative of sigmoid

def backward(self, X, y, o):


# backward propagate through the network
self.o_error = y - o # error in output
self.o_delta = self.o_error * self.sigmoidPrime(o) # applying derivative of sigmoid to output error
self.z2_error = self.o_delta.dot(self.W2.T) # z2 error: how much our hidden layer weights
contributed to output error
self.z2_delta = self.z2_error * self.sigmoidPrime(self.z2) # applying derivative of sigmoid to z2 error
self.W1 += X.T.dot(self.z2_delta) # adjusting first set (input --> hidden) weights
self.W2 += self.z2.T.dot(self.o_delta) # adjusting second set (hidden --> output) weights

def train(self, X, y):


o = self.forward(X)
self.backward(X, y, o)

# Instantiate the Neural Network


NN = Neural_Network()

# Print initial outputs


print("\nInput: \n" + str(X))
print("\nActual Output: \n" + str(y))
print("\nPredicted Output before training: \n" + str(NN.forward(X)))

# Print outputs after training

print("\nLoss: \n" + str(np.mean(np.square(y - NN.forward(X))))) # mean sum squared loss


NN.train(X,y)

output:
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]

Actual Output:
[[0.92]
[0.86]
[0.89]]

Predicted Output before training:


[[0.59715299]
[0.58580314]
[0.58832216]]

Loss:
0.09014121131592957
5. Implement LOCALLY WEIGHTED REGRESSION?

Source code:
from math import ceil
import numpy as np
from scipy import linalg
import matplotlib.pyplot as plt

def lowess(x, y, f, iterations):


n = len(x)
r = int(ceil(f * n))
h = [np.sort(np.abs(x - x[i]))[r] for i in range(n)]
w = np.clip(np.abs((x[:, None] - x[None, :]) / h), 0.0, 1.0)
w = (1 - w ** 3) ** 3
yest = np.zeros(n)
delta = np.ones(n)

for iteration in range(iterations):


for i in range(n):
weights = delta * w[:, i]
b = np.array([np.sum(weights * y), np.sum(weights * y * x)])
A = np.array([[np.sum(weights), np.sum(weights * x)],
[np.sum(weights * x), np.sum(weights * x * x)]])
beta = linalg.solve(A, b)
yest[i] = beta[0] + beta[1] * x[i]

residuals = y - yest
s = np.median(np.abs(residuals))
delta = np.clip(residuals / (6.0 * s), -1, 1)
delta = (1 - delta ** 2) ** 2

return yest

n = 100
x = np.linspace(0, 2 * np.pi, n)
y = np.sin(x) + 0.3 * np.random.randn(n)
f = 0.25
iterations = 3
yest = lowess(x, y, f, iterations)

plt.plot(x, y, "r.")
plt.plot(x, yest, "b-")
plt.show()

output:
6. For a given set of training data example store in csv.file implement
and demonstrate ELIMINATION algorithm?

Source code:
import numpy as np
import pandas as pd

# Loading Data from a CSV File


data = pd.read_csv('trainingdata.csv')
print(data)

# Separating concept features from Target


concepts = np.array(data.iloc[:, 0:-1])
print("Concepts:\n", concepts)

# Isolating target into a separate array


target = np.array(data.iloc[:, -1])
print("Target:\n", target)

def learn(concepts, target):


"""
learn() function implements the learning method of the Candidate elimination algorithm.
Arguments:
concepts - a data frame with all the features
target - a data frame with corresponding output values
"""
# Initialize S0 with the first instance from concepts
specific_h = concepts[0].copy()
print("\nInitialization of specific_h and general_h")
print("specific_h:", specific_h)

general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))]


print("general_h:", general_h)

# The learning iterations


for i, h in enumerate(concepts):
# Checking if the hypothesis has a positive target
if target[i] == "Yes":
for x in range(len(specific_h)):
# Change values in S & G only if values change
if h[x] != specific_h[x]:
specific_h[x] = '?'
general_h[x][x] = '?'
# Checking if the hypothesis has a negative target
if target[i] == "No":
for x in range(len(specific_h)):
# For negative hypothesis change values only in G
if h[x] != specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'

print("\nSteps of Candidate Elimination Algorithm", i+1)


print("specific_h:", specific_h)
print("general_h:", general_h)

# Remove empty rows from general_h


indices = [i for i, val in enumerate(general_h) if val == ['?' for _ in range(len(specific_h))]]
for i in indices:
general_h.remove(['?' for _ in range(len(specific_h))])

# Return final values


return specific_h, general_h

s_final, g_final = learn(concepts, target)


print("\nFinal Specific_h:", s_final)
print("\nFinal General_h:", g_final)

output:
sky airTemp humidity wind water forecast enjoySport
0 Sunny Warm Normal Strong Warm Same Yes
1 Sunny Warm High Strong Warm Same Yes
2 Rainy Cold High Strong Warm Change No
3 Sunny Warm High Strong Cool Change Yes
Concepts:
[['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same']
['Sunny' 'Warm' 'High' 'Strong' 'Warm' 'Same']
['Rainy' 'Cold' 'High' 'Strong' 'Warm' 'Change']
['Sunny' 'Warm' 'High' 'Strong' 'Cool' 'Change']]
Target:
['Yes' 'Yes' 'No' 'Yes']

Initialization of specific_h and general_h


specific_h: ['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same']
general_h: [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?',
'?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]

Steps of Candidate Elimination Algorithm 1


specific_h: ['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same']
general_h: [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?',
'?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]

Steps of Candidate Elimination Algorithm 2


specific_h: ['Sunny' 'Warm' '?' 'Strong' 'Warm' 'Same']
general_h: [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?',
'?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]

Steps of Candidate Elimination Algorithm 3


specific_h: ['Sunny' 'Warm' '?' 'Strong' 'Warm' 'Same']
general_h: [['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?',
'?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', 'Same']]

Steps of Candidate Elimination Algorithm 4


specific_h: ['Sunny' 'Warm' '?' 'Strong' '?' '?']
general_h: [['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?',
'?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]

Final Specific_h: ['Sunny' 'Warm' '?' 'Strong' '?' '?']

Final General_h: [['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?']]
7. Write a program to demonstrate the working of the DECISION TREE
based ID3 algorithm?

Source code:
import numpy as np
import math
import csv

def read_data(filename):
with open(filename, 'r') as csvfile:
datareader = csv.reader(csvfile, delimiter=',')
headers = next(datareader)
metadata = headers # No need to loop over headers to append to metadata
traindata = []
for row in datareader:
traindata.append(row)
return metadata, traindata

class Node:
def __init__(self, attribute):
self.attribute = attribute
self.children = []
self.answer = ""

def __str__(self):
return self.attribute

def subtables(data, col, delete):


dict = {}
items = np.unique(data[:, col])
count = np.zeros((items.shape[0],), dtype=int)

for x in range(items.shape[0]):
for y in range(data.shape[0]):
if data[y, col] == items[x]:
count[x] += 1

for x in range(items.shape[0]):
dict[items[x]] = np.empty((count[x], data.shape[1]), dtype=data.dtype)
pos = 0
for y in range(data.shape[0]):
if data[y, col] == items[x]:
dict[items[x]][pos] = data[y]
pos += 1
if delete:
dict[items[x]] = np.delete(dict[items[x]], col, 1)
return items, dict

def entropy(S):
items, counts = np.unique(S, return_counts=True)
entropy_value = 0

for i in range(len(items)):
p_i = counts[i] / len(S)
entropy_value -= p_i * math.log2(p_i)

return entropy_value

def gain_ratio(data, col):


items, dict = subtables(data, col, delete=False)
total_size = data.shape[0]
entropies = np.zeros((items.shape[0],))
intrinsic = np.zeros((items.shape[0],))

for x in range(items.shape[0]):
ratio = dict[items[x]].shape[0] / total_size
entropies[x] = ratio * entropy(dict[items[x]][:, -1])
intrinsic[x] = ratio * math.log2(ratio) if ratio != 0 else 0

total_entropy = entropy(data[:, -1])


iv = -sum(intrinsic)

total_gain = total_entropy - sum(entropies)


return total_gain / iv if iv != 0 else 0

def create_node(data, metadata):


if len(np.unique(data[:, -1])) == 1:
node = Node("")
node.answer = np.unique(data[:, -1])[0]
return node

gains = np.zeros((data.shape[1] - 1,))

for col in range(data.shape[1] - 1):


gains[col] = gain_ratio(data, col)

split = np.argmax(gains)
node = Node(metadata[split])
new_metadata = np.delete(metadata, split)

items, dict = subtables(data, split, delete=True)

for x in range(items.shape[0]):
child = create_node(dict[items[x]], new_metadata)
node.children.append((items[x], child))

return node

def empty(size):
return " " * size

def print_tree(node, level=0):


if node.answer != "":
print(empty(level), node.answer)
return
print(empty(level), node.attribute)
for value, n in node.children:
print(empty(level + 1), value)
print_tree(n, level + 2)

metadata, traindata = read_data("tennisdata.csv")


data = np.array(traindata)
node = create_node(data, np.array(metadata))
print_tree(node)

output:
Outlook
Overcast
Yes
Rainy
Windy
False
Yes
True
No
Sunny
Humidity
High
No
Normal
Yes
8. Write a program NAIVE BAYESIAN CLASSIFIER for or a sample training
data set stored?

Source code:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load data from CSV


data = pd.read_csv('tennisdata.csv')
print("The first 5 values of the data are:\n", data.head())

# Obtain train data (features) and train output (target)


X = data.iloc[:, :-1]
print("\nThe first 5 values of the train data are:\n", X.head())

y = data.iloc[:, -1]
print("\nThe first 5 values of the train output are:\n", y.head())

# Convert categorical data to numerical data


le_outlook = LabelEncoder()
X['Outlook'] = le_outlook.fit_transform(X['Outlook'])

le_temperature = LabelEncoder()
X['Temperature'] = le_temperature.fit_transform(X['Temperature'])

le_humidity = LabelEncoder()
X['Humidity'] = le_humidity.fit_transform(X['Humidity'])

le_windy = LabelEncoder()
X['Windy'] = le_windy.fit_transform(X['Windy'])

print("\nNow the train data is:\n", X.head())

le_play_tennis = LabelEncoder()
y = le_play_tennis.fit_transform(y)
print("\nNow the train output is:\n", y)

# Split the dataset into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

# Train the Naive Bayes classifier


classifier = GaussianNB()
classifier.fit(X_train, y_train)
# Predict and evaluate the model
y_pred = classifier.predict(X_test)
print("Accuracy is:", accuracy_score(y_pred, y_test))

output:
The first 5 values of the data are:
Outlook Temperature Humidity Windy PlayTennis
0 Sunny Hot High False No
1 Sunny Hot High True No
2 Overcast Hot High False Yes
3 Rainy Mild High False Yes
4 Rainy Cool Normal False Yes

The first 5 values of the train data are:


Outlook Temperature Humidity Windy
0 Sunny Hot High False
1 Sunny Hot High True
2 Overcast Hot High False
3 Rainy Mild High False
4 Rainy Cool Normal False

The first 5 values of the train output are:


0 No
1 No
2 Yes
3 Yes
4 Yes
Name: PlayTennis, dtype: object

Now the train data is:


Outlook Temperature Humidity Windy
0 2 1 0 0
1 2 1 0 1
2 0 1 0 0
3 1 2 0 0
4 1 0 1 0

Now the train output is:


[0 0 1 1 1 0 1 0 1 1 1 1 1 0]
Accuracy is: 1.0

You might also like