ML Record Print

1 . Implement and demonstrate the FIND-S algorithm?
Source code:
import csv
# Initialize the hypothesis

hypo = ['%', '%', '%', '%', '%', '%']
with open('trainingdata.csv') as csv_file:

readcsv = csv.reader(csv_file, delimiter=',')
print(readcsv)
data = []
print("\nThe given training examples are:")
for row in readcsv:
print(row)
if row[len(row)-1].upper() == "YES":
data.append(row)
print("\nThe positive examples are:")

for x in data:
print(x)
print("\n")
TotalExamples = len(data)
i=0
j=0
k=0
print("The steps of the Find-s algorithm are :\n", hypo)
# Initialize the list

list = []
p=0
d = len(data[p]) - 1
for j in range(d):
list.append(data[i][j])
hypo = list
i=1
for i in range(TotalExamples):
for k in range(d):
if hypo[k] != data[i][k]:
hypo[k] = '?'
else:
hypo[k] = hypo[k]
print(hypo)
i=i+1
print("\nThe maximally specific Find-s hypothesis for the given training examples is :")
list = []
for i in range(d):
list.append(hypo[i])
print(list)
Output :
<_csv.reader object at 0x00000260B3FFB3A0>
The given training examples are:

['sky', 'airTemp', 'humidity', 'wind', 'water', 'forecast', 'enjoySport']
['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same', 'Yes']
['Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same', 'Yes']
['Rainy', 'Cold', 'High', 'Strong', 'Warm', 'Change', 'No']
['Sunny', 'Warm', 'High', 'Strong', 'Cool', 'Change', 'Yes']
The positive examples are:

['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same', 'Yes']
['Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same', 'Yes']
['Sunny', 'Warm', 'High', 'Strong', 'Cool', 'Change', 'Yes']
The steps of the Find-s algorithm are :

['%', '%', '%', '%', '%', '%']
['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same']
['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']
['Sunny', 'Warm', '?', 'Strong', '?', '?']
The maximally specific Find-s hypothesis for the given training examples is :
['Sunny', 'Warm', '?', 'Strong', '?', '?']
2. Implement Linear Regression?
Source code:
import numpy as np
import matplotlib.pyplot as plt
def estimate_coef(x, y):

# number of observations/points
n = np.size(x)
# mean of x and y vector

m_x = np.mean(x)
m_y = np.mean(y)
# calculating cross-deviation and deviation about x

SS_xy = np.sum(y * x) - n * m_y * m_x
SS_xx = np.sum(x * x) - n * m_x * m_x
# calculating regression coefficients

b_1 = SS_xy / SS_xx
b_0 = m_y - b_1 * m_x
return (b_0, b_1)
def plot_regression_line(x, y, b):

# plotting the actual points as scatter plot
plt.scatter(x, y, color="m", marker="o", s=30)
# predicted response vector

y_pred = b[0] + b[1] * x
# plotting the regression line

plt.plot(x, y_pred, color="g")
# putting labels
plt.xlabel('x')
plt.ylabel('y')
plt.show()
def main():
# observations / data
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])
# estimating coefficients
b = estimate_coef(x, y)
print("Estimated coefficients:\nb_0 = {} \nb_1 = {}".format(b[0], b[1]))
# plotting regression line

plot_regression_line(x, y, b)
# calling main function

if __name__ == "__main__":
main()
Output:
Estimated coefficients:
b_0 = 1.2363636363636363
b_1 = 1.1696969696969697
3. Implement RANDOM-FOREST REGRESSION?
Source code:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import warnings
warnings.filterwarnings('ignore')
# Load the Titanic dataset

url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
titanic_data = pd.read_csv(url)
# Drop rows with missing target values

titanic_data = titanic_data.dropna(subset=['Survived'])
# Select relevant features and target variable

X = titanic_data[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']]
y = titanic_data['Survived']
# Convert categorical variable 'Sex' to numerical

X['Sex'] = X['Sex'].map({'female': 0, 'male': 1})
# Handle missing values in the 'Age' column

X['Age'].fillna(X['Age'].median(), inplace=True)
# Split the dataset into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Create a Random Forest Classifier

rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
# Train the classifier

rf_classifier.fit(X_train, y_train)
# Make predictions on the test set

y_pred = rf_classifier.predict(X_test)
# Evaluate the model

accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)
# Print the results

print(f"Accuracy: {accuracy:.2f}")
print("\nClassification Report:\n", classification_rep)
Output :
Accuracy: 0.80
Classification Report:
precision recall f1-score support
0 0.82 0.85 0.83 105

1 0.77 0.73 0.75 74
accuracy 0.80 179

macro avg 0.79 0.79 0.79 179
weighted avg 0.80 0.80 0.80 179
4. Build an artificial Neural Network by Implementing the BACK
PROPAGATION algorithm & test the same?
Source code:
import numpy as np
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float) # X = (hours sleeping, hours studying)
y = np.array(([92], [86], [89]), dtype=float) # y = score on test
# scale units
X = X / np.amax(X, axis=0) # maximum of X array
y = y / 100 # max test score is 100
class Neural_Network(object):
def __init__(self):
# Parameters
self.inputSize = 2
self.outputSize = 1
self.hiddenSize = 3
# Weights
self.W1 = np.random.randn(self.inputSize, self.hiddenSize) # (3x2) weight matrix from input to
hidden layer
self.W2 = np.random.randn(self.hiddenSize, self.outputSize) # (3x1) weight matrix from hidden to
output layer
def forward(self, X):

# forward propagation through our network
self.z = np.dot(X, self.W1) # dot product of X (input) and first set of 3x2 weights
self.z2 = self.sigmoid(self.z) # activation function
self.z3 = np.dot(self.z2, self.W2) # dot product of hidden layer (z2) and second set of 3x1 weights
o = self.sigmoid(self.z3) # final activation function
return o
def sigmoid(self, s):

return 1 / (1 + np.exp(-s)) # activation function
def sigmoidPrime(self, s):

return s * (1 - s) # derivative of sigmoid
def backward(self, X, y, o):

# backward propagate through the network
self.o_error = y - o # error in output
self.o_delta = self.o_error * self.sigmoidPrime(o) # applying derivative of sigmoid to output error
self.z2_error = self.o_delta.dot(self.W2.T) # z2 error: how much our hidden layer weights
contributed to output error
self.z2_delta = self.z2_error * self.sigmoidPrime(self.z2) # applying derivative of sigmoid to z2 error
self.W1 += X.T.dot(self.z2_delta) # adjusting first set (input --> hidden) weights
self.W2 += self.z2.T.dot(self.o_delta) # adjusting second set (hidden --> output) weights
def train(self, X, y):

o = self.forward(X)
self.backward(X, y, o)
# Instantiate the Neural Network

NN = Neural_Network()
# Print initial outputs

print("\nInput: \n" + str(X))
print("\nActual Output: \n" + str(y))
print("\nPredicted Output before training: \n" + str(NN.forward(X)))
# Print outputs after training
print("\nLoss: \n" + str(np.mean(np.square(y - NN.forward(X))))) # mean sum squared loss

NN.train(X,y)
output:
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output before training:

[[0.59715299]
[0.58580314]
[0.58832216]]
Loss:
0.09014121131592957
5. Implement LOCALLY WEIGHTED REGRESSION?
Source code:
from math import ceil
import numpy as np
from scipy import linalg
import matplotlib.pyplot as plt
def lowess(x, y, f, iterations):

n = len(x)
r = int(ceil(f * n))
h = [np.sort(np.abs(x - x[i]))[r] for i in range(n)]
w = np.clip(np.abs((x[:, None] - x[None, :]) / h), 0.0, 1.0)
w = (1 - w ** 3) ** 3
yest = np.zeros(n)
delta = np.ones(n)
for iteration in range(iterations):

for i in range(n):
weights = delta * w[:, i]
b = np.array([np.sum(weights * y), np.sum(weights * y * x)])
A = np.array([[np.sum(weights), np.sum(weights * x)],
[np.sum(weights * x), np.sum(weights * x * x)]])
beta = linalg.solve(A, b)
yest[i] = beta[0] + beta[1] * x[i]
residuals = y - yest
s = np.median(np.abs(residuals))
delta = np.clip(residuals / (6.0 * s), -1, 1)
delta = (1 - delta ** 2) ** 2
return yest
n = 100
x = np.linspace(0, 2 * np.pi, n)
y = np.sin(x) + 0.3 * np.random.randn(n)
f = 0.25
iterations = 3
yest = lowess(x, y, f, iterations)
plt.plot(x, y, "r.")
plt.plot(x, yest, "b-")
plt.show()
output:
6. For a given set of training data example store in csv.file implement
and demonstrate ELIMINATION algorithm?
Source code:
import numpy as np
import pandas as pd
# Loading Data from a CSV File

data = pd.read_csv('trainingdata.csv')
print(data)
# Separating concept features from Target

concepts = np.array(data.iloc[:, 0:-1])
print("Concepts:\n", concepts)
# Isolating target into a separate array

target = np.array(data.iloc[:, -1])
print("Target:\n", target)
def learn(concepts, target):

"""
learn() function implements the learning method of the Candidate elimination algorithm.
Arguments:
concepts - a data frame with all the features
target - a data frame with corresponding output values
"""
# Initialize S0 with the first instance from concepts
specific_h = concepts[0].copy()
print("\nInitialization of specific_h and general_h")
print("specific_h:", specific_h)
general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))]

print("general_h:", general_h)
# The learning iterations

for i, h in enumerate(concepts):
# Checking if the hypothesis has a positive target
if target[i] == "Yes":
for x in range(len(specific_h)):
# Change values in S & G only if values change
if h[x] != specific_h[x]:
specific_h[x] = '?'
general_h[x][x] = '?'
# Checking if the hypothesis has a negative target
if target[i] == "No":
for x in range(len(specific_h)):
# For negative hypothesis change values only in G
if h[x] != specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'
print("\nSteps of Candidate Elimination Algorithm", i+1)

print("specific_h:", specific_h)
print("general_h:", general_h)
# Remove empty rows from general_h

indices = [i for i, val in enumerate(general_h) if val == ['?' for _ in range(len(specific_h))]]
for i in indices:
general_h.remove(['?' for _ in range(len(specific_h))])
# Return final values

return specific_h, general_h
s_final, g_final = learn(concepts, target)

print("\nFinal Specific_h:", s_final)
print("\nFinal General_h:", g_final)
output:
sky airTemp humidity wind water forecast enjoySport
0 Sunny Warm Normal Strong Warm Same Yes
1 Sunny Warm High Strong Warm Same Yes
2 Rainy Cold High Strong Warm Change No
3 Sunny Warm High Strong Cool Change Yes
Concepts:
[['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same']
['Sunny' 'Warm' 'High' 'Strong' 'Warm' 'Same']
['Rainy' 'Cold' 'High' 'Strong' 'Warm' 'Change']
['Sunny' 'Warm' 'High' 'Strong' 'Cool' 'Change']]
Target:
['Yes' 'Yes' 'No' 'Yes']
Initialization of specific_h and general_h

specific_h: ['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same']
general_h: [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?',
'?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]
Steps of Candidate Elimination Algorithm 1

specific_h: ['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same']
general_h: [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?',
'?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]

specific_h: ['Sunny' 'Warm' '?' 'Strong' 'Warm' 'Same']
general_h: [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?',
'?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]

specific_h: ['Sunny' 'Warm' '?' 'Strong' 'Warm' 'Same']
general_h: [['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?',
'?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', 'Same']]

specific_h: ['Sunny' 'Warm' '?' 'Strong' '?' '?']
general_h: [['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?',
'?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]
Final Specific_h: ['Sunny' 'Warm' '?' 'Strong' '?' '?']
Final General_h: [['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?']]
7. Write a program to demonstrate the working of the DECISION TREE
based ID3 algorithm?
Source code:
import numpy as np
import math
import csv
def read_data(filename):
with open(filename, 'r') as csvfile:
datareader = csv.reader(csvfile, delimiter=',')
headers = next(datareader)
metadata = headers # No need to loop over headers to append to metadata
traindata = []
for row in datareader:
traindata.append(row)
return metadata, traindata
class Node:
def __init__(self, attribute):
self.attribute = attribute
self.children = []
self.answer = ""
def __str__(self):
return self.attribute
def subtables(data, col, delete):

dict = {}
items = np.unique(data[:, col])
count = np.zeros((items.shape[0],), dtype=int)
for x in range(items.shape[0]):
for y in range(data.shape[0]):
if data[y, col] == items[x]:
count[x] += 1
dict[items[x]] = np.empty((count[x], data.shape[1]), dtype=data.dtype)
pos = 0
for y in range(data.shape[0]):
if data[y, col] == items[x]:
dict[items[x]][pos] = data[y]
pos += 1
if delete:
dict[items[x]] = np.delete(dict[items[x]], col, 1)
return items, dict
def entropy(S):
items, counts = np.unique(S, return_counts=True)
entropy_value = 0
for i in range(len(items)):
p_i = counts[i] / len(S)
entropy_value -= p_i * math.log2(p_i)
return entropy_value
def gain_ratio(data, col):

items, dict = subtables(data, col, delete=False)
total_size = data.shape[0]
entropies = np.zeros((items.shape[0],))
intrinsic = np.zeros((items.shape[0],))
ratio = dict[items[x]].shape[0] / total_size
entropies[x] = ratio * entropy(dict[items[x]][:, -1])
intrinsic[x] = ratio * math.log2(ratio) if ratio != 0 else 0
total_entropy = entropy(data[:, -1])

iv = -sum(intrinsic)
total_gain = total_entropy - sum(entropies)

return total_gain / iv if iv != 0 else 0
def create_node(data, metadata):

if len(np.unique(data[:, -1])) == 1:
node = Node("")
node.answer = np.unique(data[:, -1])[0]
return node
gains = np.zeros((data.shape[1] - 1,))
for col in range(data.shape[1] - 1):

gains[col] = gain_ratio(data, col)
split = np.argmax(gains)
node = Node(metadata[split])
new_metadata = np.delete(metadata, split)
items, dict = subtables(data, split, delete=True)
child = create_node(dict[items[x]], new_metadata)
node.children.append((items[x], child))
return node
def empty(size):
return " " * size
def print_tree(node, level=0):

if node.answer != "":
print(empty(level), node.answer)
return
print(empty(level), node.attribute)
for value, n in node.children:
print(empty(level + 1), value)
print_tree(n, level + 2)
metadata, traindata = read_data("tennisdata.csv")

data = np.array(traindata)
node = create_node(data, np.array(metadata))
print_tree(node)
output:
Outlook
Overcast
Yes
Rainy
Windy
False
Yes
True
No
Sunny
Humidity
High
No
Normal
Yes
8. Write a program NAIVE BAYESIAN CLASSIFIER for or a sample training
data set stored?
Source code:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# Load data from CSV

data = pd.read_csv('tennisdata.csv')
print("The first 5 values of the data are:\n", data.head())
# Obtain train data (features) and train output (target)

X = data.iloc[:, :-1]
print("\nThe first 5 values of the train data are:\n", X.head())
y = data.iloc[:, -1]
print("\nThe first 5 values of the train output are:\n", y.head())
# Convert categorical data to numerical data

le_outlook = LabelEncoder()
X['Outlook'] = le_outlook.fit_transform(X['Outlook'])
le_temperature = LabelEncoder()
X['Temperature'] = le_temperature.fit_transform(X['Temperature'])
le_humidity = LabelEncoder()
X['Humidity'] = le_humidity.fit_transform(X['Humidity'])
le_windy = LabelEncoder()
X['Windy'] = le_windy.fit_transform(X['Windy'])
print("\nNow the train data is:\n", X.head())
le_play_tennis = LabelEncoder()
y = le_play_tennis.fit_transform(y)
print("\nNow the train output is:\n", y)
# Split the dataset into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
# Train the Naive Bayes classifier

classifier = GaussianNB()
classifier.fit(X_train, y_train)
# Predict and evaluate the model
y_pred = classifier.predict(X_test)
print("Accuracy is:", accuracy_score(y_pred, y_test))
output:
The first 5 values of the data are:
Outlook Temperature Humidity Windy PlayTennis
0 Sunny Hot High False No
1 Sunny Hot High True No
2 Overcast Hot High False Yes
3 Rainy Mild High False Yes
4 Rainy Cool Normal False Yes
The first 5 values of the train data are:

Outlook Temperature Humidity Windy
0 Sunny Hot High False
1 Sunny Hot High True
2 Overcast Hot High False
3 Rainy Mild High False
4 Rainy Cool Normal False
The first 5 values of the train output are:

0 No
1 No
2 Yes
3 Yes
4 Yes
Name: PlayTennis, dtype: object
Now the train data is:

Outlook Temperature Humidity Windy
0 2 1 0 0
1 2 1 0 1
2 0 1 0 0
3 1 2 0 0
4 1 0 1 0
Now the train output is:

[0 0 1 1 1 0 1 0 1 1 1 1 1 0]
Accuracy is: 1.0

ML Record Print

Uploaded by

Copyright:

Available Formats

ML Record Print

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

ML Record Print

Uploaded by

Copyright:

Available Formats

1 . Implement and demonstrate the FIND-S algorithm?

# Initialize the hypothesis

with open('trainingdata.csv') as csv_file:

print("\nThe positive examples are:")

# Initialize the list

The given training examples are:

The positive examples are:

The steps of the Find-s algorithm are :

def estimate_coef(x, y):

# mean of x and y vector

# calculating cross-deviation and deviation about x

# calculating regression coefficients

return (b_0, b_1)

def plot_regression_line(x, y, b):

# predicted response vector

# plotting the regression line

# plotting regression line

# calling main function

# Load the Titanic dataset

# Drop rows with missing target values

# Select relevant features and target variable

# Convert categorical variable 'Sex' to numerical

# Handle missing values in the 'Age' column

# Split the dataset into training and testing sets

# Create a Random Forest Classifier

# Train the classifier

# Make predictions on the test set

# Evaluate the model

# Print the results

0 0.82 0.85 0.83 105

accuracy 0.80 179

def forward(self, X):

def sigmoid(self, s):

def sigmoidPrime(self, s):

def backward(self, X, y, o):

def train(self, X, y):

# Instantiate the Neural Network

# Print initial outputs

# Print outputs after training

print("\nLoss: \n" + str(np.mean(np.square(y - NN.forward(X))))) # mean sum squared loss

Predicted Output before training:

def lowess(x, y, f, iterations):

for iteration in range(iterations):

# Loading Data from a CSV File

# Separating concept features from Target

# Isolating target into a separate array

def learn(concepts, target):

general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))]

# The learning iterations

print("\nSteps of Candidate Elimination Algorithm", i+1)

# Remove empty rows from general_h

# Return final values

s_final, g_final = learn(concepts, target)

Initialization of specific_h and general_h

Steps of Candidate Elimination Algorithm 1

Steps of Candidate Elimination Algorithm 2

Steps of Candidate Elimination Algorithm 3

Steps of Candidate Elimination Algorithm 4

Final Specific_h: ['Sunny' 'Warm' '?' 'Strong' '?' '?']