1 KNN - Jupyter Notebook

[1]: import numpy as np

import matplotlib.pyplot as plt
import pandas as pd

In [2]: wbcd = pd.read_csv("D:\\Course\\Python\\Datasets\\wbcd.csv")



In [3]: wbcd

del wbcd['id']

In [4]: wbcd


In [5]: wbcd['diagnosis'].value_counts()

Out[5]: B 357

M 212

Name: diagnosis, dtype: int64

In [6]: freq = pd.crosstab(index=wbcd['diagnosis'], # Make a crosstab


In [7]: freq/freq.sum()


In [8]: wbcd


In [9]: # Consider the inpur varabile as X and Output variable as Y

X = wbcd.iloc[:, 1:].values
y = wbcd.iloc[:, 0].values


In [10]: # Normalization
#X = (X-X.min())/(X.max() -X.min())


Splitting the dataset into the Training set and Test set
In [10]: from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.80)

Feature Scaling (Very Important)

In [11]: ​
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

Training the Model

In [12]: # import alogrithm method name from required libararies

from sklearn.neighbors import KNeighborsClassifier

# create an alogirthm ( same like as function)
classifier = KNeighborsClassifier(n_neighbors=3)

In [13]: # apply the model on training dataset using fit

classifier.fit(X_train, y_train)


In [14]: # Predicting the Model : input variable of testing datset - xtest

y_pred = classifier.predict(X_test)

Out[14]: array(['M', 'B', 'M', 'B', 'B', 'B', 'M', 'B', 'B', 'B', 'M', 'M', 'M',

'B', 'M', 'M', 'B', 'B', 'M', 'B', 'B', 'B', 'B', 'B', 'M', 'B',

'B', 'B', 'B', 'B', 'B', 'M', 'B', 'B', 'M', 'B', 'B', 'M', 'B',

'B', 'B', 'B', 'M', 'B', 'B', 'B', 'B', 'M', 'B', 'B', 'B', 'B',

'B', 'B', 'B', 'M', 'B', 'B', 'M', 'M', 'B', 'B', 'B', 'B', 'M',

'B', 'M', 'B', 'M', 'M', 'M', 'B', 'B', 'M', 'M', 'M', 'M', 'B',

'B', 'B', 'B', 'B', 'M', 'M', 'B', 'M', 'B', 'B', 'B', 'B', 'B',

'M', 'M', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'M', 'B', 'B',

'B', 'B', 'B', 'B', 'B', 'M', 'M', 'B', 'M', 'M'], dtype=object)

In [15]: ##Evaluating the Algorithm

from sklearn.metrics import confusion_matrix

# creating confustion matrix table for TP and TN scenarios

print(confusion_matrix(y_test, y_pred))

[[75 0]

[ 2 37]]

In [16]: from sklearn.metrics import accuracy_score

# calculate the accuracy for the model by validating y pred and Y test

Accuracy_Score = accuracy_score(y_test, y_pred)

In [17]: Accuracy_Score

Out[17]: 0.9824561403508771

