Machine Learning

Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1of 8

Candidate elimination

import numpy as np

import pandas as pd

data = pd.read_csv('2.csv')

concepts = np.array(data.iloc[:,0:-1])

target = np.array(data.iloc[:,-1])

def learn(concepts, target):

specific_h = concepts[0].copy()

print("initialization of specific_h \n",specific_h)

general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))]

print("initialization of general_h \n", general_h)

for i, h in enumerate(concepts):

if target[i] == "yes":

print("If instance is Positive ")

for x in range(len(specific_h)):

if h[x]!= specific_h[x]:

specific_h[x] ='?'

general_h[x][x] ='?'

if target[i] == "no":

print("If instance is Negative ")

for x in range(len(specific_h)):

if h[x]!= specific_h[x]:

general_h[x][x] = specific_h[x]

else:

general_h[x][x] = '?'

print(" step {}".format(i+1))

print(specific_h)
print(general_h)

print("\n")

print("\n")

indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?', '?', '?']]

for i in indices:

general_h.remove(['?', '?', '?', '?', '?', '?'])

return specific_h, general_h

s_final, g_final = learn(concepts, target)

print("Final Specific_h:", s_final, sep="\n")

print("Final General_h:", g_final, sep="\n")

output

initialization of specific_h

['sunny' 'warm' 'normal' 'strong' 'warm' 'same']

initialization of general_h

[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'],

['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]

If instance is Positive

step 1

['sunny' 'warm' 'normal' 'strong' 'warm' 'same']

[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'],

['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]

If instance is Positive

step 2

['sunny' 'warm' '?' 'strong' 'warm' 'same']

[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'],
['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]

If instance is Negative

step 3

['sunny' 'warm' '?' 'strong' 'warm' 'same']

[['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'],

['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', 'same']]

If instance is Positive

step 4

['sunny' 'warm' '?' 'strong' '?' '?']

[['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'],

['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]

Final Specific_h:

['sunny' 'warm' '?' 'strong' '?' '?']

Final General_h:

[['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?']]
Write a program to implement the naïve Bayesian classifier for a sampletraining data set stored as
a .CSV file. Compute the accuracy of the classifier, considering few test data sets.
Assuming a set of documents that need to be classified, use the naïve BayesianClassifier model to
perform this task. Built-in Java classes/API can be used towrite the program. Calculate the accuracy,
precision, and recall for your dataset
Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the samedata set for clustering
using k-Means algorithm. Compare the results of thesetwo algorithms and comment on the quality
of clustering. You can addJava/Python ML library classes/API in the program.

from sklearn.cluster import KMeans

from sklearn import preprocessing

from sklearn.mixture import GaussianMixture

from sklearn.datasets import load_iris

import sklearn.metrics as sm

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

dataset=load_iris()

# print(dataset)

X=pd.DataFrame(dataset.data)
X.columns=['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width']

y=pd.DataFrame(dataset.target)

y.columns=['Targets']

# print(X)

plt.figure(figsize=(14,7))

colormap=np.array(['red','lime','black'])

# REAL PLOT

plt.subplot(1,3,1)

plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[y.Targets],s=40)

plt.title('Real')

# K-PLOT

plt.subplot(1,3,2)

model=KMeans(n_clusters=3)

model.fit(X)

predY=np.choose(model.labels_,[0,1,2]).astype(np.int64)

plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[predY],s=40)

plt.title('KMeans')

# GMM PLOT

scaler=preprocessing.StandardScaler()

scaler.fit(X)

xsa=scaler.transform(X)

xs=pd.DataFrame(xsa,columns=X.columns)

gmm=GaussianMixture(n_components=3)

gmm.fit(xs)

y_cluster_gmm=gmm.predict(xs)

plt.subplot(1,3,3)

plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[y_cluster_gmm],s=40)
plt.title('GMM Classification')

You might also like