KNN Model Find Optimanl K
KNN Model Find Optimanl K
KNN Model Find Optimanl K
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets, neighbors
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
from collections import Counter
from matplotlib.colors import ListedColormap
In [2]:
df = pd.read_csv("/home/shri/Documents/iris.csv")
df.head(4)
Out[2]:
sepal.length sepal.width petal.length petal.width variety
0 5.1 3.5 1.4 0.2 Setosa
1 4.9 3.0 1.4 0.2 Setosa
2 4.7 3.2 1.3 0.2 Setosa
3 4.6 3.1 1.5 0.2 Setosa
In [3]:
# Import label encoder
from sklearn import preprocessing
df['label_variety'].unique()
Out[3]:
array([0, 1, 2])
In [4]:
# creating Matrix 'x' and target vector 'y'
CV Accuracy for k = 3 is 96 %
CV Accuracy for k = 5 is 93 %
CV Accuracy for k = 7 is 96 %
CV Accuracy for k = 11 is 96 %
CV Accuracy for k = 13 is 96 %
CV Accuracy for k = 15 is 90 %
CV Accuracy for k = 17 is 90 %
CV Accuracy for k = 19 is 90 %
CV Accuracy for k = 21 is 90 %
CV Accuracy for k = 23 is 87 %
CV Accuracy for k = 25 is 90 %
CV Accuracy for k = 27 is 87 %
CV Accuracy for k = 29 is 84 %
In [7]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(x_tr,y_tr)
pred = knn.predict(x_test)
li = list(range(0,50))
neighbors = list(filter(lambda x: x%2 != 0,li))
# determining best k
optimal_k = neighbors[MSE.index(min(MSE))]
print('\nThe optimal number of neighbors is %d.' % optimal_k)
the misclassification error for each k value is : [0.057 0.057 0.048 0.039 0.039
0.039 0.039 0.039 0.039 0.048 0.048 0.057
0.066 0.056 0.056 0.046 0.056 0.056 0.056 0.056 0.066 0.065 0.065 0.075
0.095]
*********************************************************************
optimal k value is: 7
In [9]:
# ============================== KNN with k = optimal_k
===============================================
# instantiate learning model k = optimal_k
knn_optimal = KNeighborsClassifier(n_neighbors=7)
# evaluate accuracy
acc = accuracy_score(y_test, pred) * 100
print('\nThe accuracy of the knn classifier for k = %d is %f%%' % (optimal_k, acc))
The accuracy of the knn classifier for k = 7 is 97.777778%
In [ ]: