1 KNN - Jupyter Notebook

Download as pdf or txt
Download as pdf or txt
You are on page 1of 3

In 

[1]: import numpy as np


import matplotlib.pyplot as plt
import pandas as pd

In [2]: wbcd = pd.read_csv("D:\\Course\\Python\\Datasets\\wbcd.csv")


wbcd

...

In [3]: wbcd

del wbcd['id']

In [4]: wbcd

...

In [5]: wbcd['diagnosis'].value_counts()

Out[5]: B 357

M 212

Name: diagnosis, dtype: int64

In [6]: freq = pd.crosstab(index=wbcd['diagnosis'], # Make a crosstab


columns="count")
freq
...

In [7]: freq/freq.sum()

...

In [8]: wbcd

...

In [9]: # Consider the inpur varabile as X and Output variable as Y


X = wbcd.iloc[:, 1:].values
y = wbcd.iloc[:, 0].values
X

...

In [10]: # Normalization
#X = (X-X.min())/(X.max() -X.min())
#X

...

Splitting the dataset into the Training set and Test set
In [10]: from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.80)

Feature Scaling (Very Important)

In [11]: ​
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

Training the Model

In [12]: # import alogrithm method name from required libararies


from sklearn.neighbors import KNeighborsClassifier

# create an alogirthm ( same like as function)
classifier = KNeighborsClassifier(n_neighbors=3)

In [13]: # apply the model on training dataset using fit


classifier.fit(X_train, y_train)

...

In [14]: # Predicting the Model : input variable of testing datset - xtest


y_pred = classifier.predict(X_test)
y_pred

Out[14]: array(['M', 'B', 'M', 'B', 'B', 'B', 'M', 'B', 'B', 'B', 'M', 'M', 'M',

'B', 'M', 'M', 'B', 'B', 'M', 'B', 'B', 'B', 'B', 'B', 'M', 'B',

'B', 'B', 'B', 'B', 'B', 'M', 'B', 'B', 'M', 'B', 'B', 'M', 'B',

'B', 'B', 'B', 'M', 'B', 'B', 'B', 'B', 'M', 'B', 'B', 'B', 'B',

'B', 'B', 'B', 'M', 'B', 'B', 'M', 'M', 'B', 'B', 'B', 'B', 'M',

'B', 'M', 'B', 'M', 'M', 'M', 'B', 'B', 'M', 'M', 'M', 'M', 'B',

'B', 'B', 'B', 'B', 'M', 'M', 'B', 'M', 'B', 'B', 'B', 'B', 'B',

'M', 'M', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'M', 'B', 'B',

'B', 'B', 'B', 'B', 'B', 'M', 'M', 'B', 'M', 'M'], dtype=object)

In [15]: ##Evaluating the Algorithm


from sklearn.metrics import confusion_matrix

# creating confustion matrix table for TP and TN scenarios

print(confusion_matrix(y_test, y_pred))

[[75 0]

[ 2 37]]

In [16]: from sklearn.metrics import accuracy_score



# calculate the accuracy for the model by validating y pred and Y test

Accuracy_Score = accuracy_score(y_test, y_pred)

In [17]: Accuracy_Score

Out[17]: 0.9824561403508771

You might also like