1 Kmeans-Pratical-No-1

Download as pdf or txt
Download as pdf or txt
You are on page 1of 8

kmeans-pratical-no-1

March 14, 2024

[5]: from sklearn.cluster import KMeans


import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from matplotlib import pyplot as plt
%matplotlib inline

[7]: df = pd.read_csv("income.csv")
df.head()

[7]: Name Age Income($)


0 Rob 27 70000
1 Michael 29 90000
2 Mohan 29 61000
3 Ismail 28 60000
4 Kory 42 150000

[8]: plt.scatter(df.Age,df['Income($)'])
plt.xlabel('Age')
plt.ylabel('Income($)')

[8]: Text(0, 0.5, 'Income($)')

1
[9]: km = KMeans(n_clusters=3)
y_predicted = km.fit_predict(df[['Age','Income($)']])
y_predicted

/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870:
FutureWarning: The default value of `n_init` will change from 10 to 'auto' in
1.4. Set the value of `n_init` explicitly to suppress the warning
warnings.warn(

[9]: array([0, 0, 2, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2],


dtype=int32)

[10]: df['cluster']=y_predicted
df.head()

[10]: Name Age Income($) cluster


0 Rob 27 70000 0
1 Michael 29 90000 0
2 Mohan 29 61000 2
3 Ismail 28 60000 2
4 Kory 42 150000 1

2
[11]: km.cluster_centers_

[11]: array([[3.40000000e+01, 8.05000000e+04],


[3.82857143e+01, 1.50000000e+05],
[3.29090909e+01, 5.61363636e+04]])

[12]: df1 = df[df.cluster==0]


df2 = df[df.cluster==1]
df3 = df[df.cluster==2]
plt.scatter(df1.Age,df1['Income($)'],color='green')
plt.scatter(df2.Age,df2['Income($)'],color='red')
plt.scatter(df3.Age,df3['Income($)'],color='black')
plt.scatter(km.cluster_centers_[:,0],km.cluster_centers_[:
↪,1],color='purple',marker='*',label='centroid')

plt.xlabel('Age')
plt.ylabel('Income ($)')
plt.legend()

[12]: <matplotlib.legend.Legend at 0x7902c6233f70>

3
[13]: scaler = MinMaxScaler()

scaler.fit(df[['Income($)']])
df['Income($)'] = scaler.transform(df[['Income($)']])

scaler.fit(df[['Age']])
df['Age'] = scaler.transform(df[['Age']])

[14]: df.head()

[14]: Name Age Income($) cluster


0 Rob 0.058824 0.213675 0
1 Michael 0.176471 0.384615 0
2 Mohan 0.176471 0.136752 2
3 Ismail 0.117647 0.128205 2
4 Kory 0.941176 0.897436 1

[15]: plt.scatter(df.Age,df['Income($)'])

[15]: <matplotlib.collections.PathCollection at 0x7902c62da230>

4
[16]: km = KMeans(n_clusters=3)
y_predicted = km.fit_predict(df[['Age','Income($)']])
y_predicted

/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870:
FutureWarning: The default value of `n_init` will change from 10 to 'auto' in
1.4. Set the value of `n_init` explicitly to suppress the warning
warnings.warn(

[16]: array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2],


dtype=int32)

[17]: df['cluster']=y_predicted
df.head()

[17]: Name Age Income($) cluster


0 Rob 0.058824 0.213675 0
1 Michael 0.176471 0.384615 0
2 Mohan 0.176471 0.136752 0
3 Ismail 0.117647 0.128205 0
4 Kory 0.941176 0.897436 1

[18]: km.cluster_centers_

[18]: array([[0.1372549 , 0.11633428],


[0.72268908, 0.8974359 ],
[0.85294118, 0.2022792 ]])

[19]: df1 = df[df.cluster==0]


df2 = df[df.cluster==1]
df3 = df[df.cluster==2]
plt.scatter(df1.Age,df1['Income($)'],color='green')
plt.scatter(df2.Age,df2['Income($)'],color='red')
plt.scatter(df3.Age,df3['Income($)'],color='black')
plt.scatter(km.cluster_centers_[:,0],km.cluster_centers_[:
↪,1],color='purple',marker='*',label='centroid')

plt.legend()

[19]: <matplotlib.legend.Legend at 0x7902c3947be0>

5
[20]: sse = []
k_rng = range(1,10)
for k in k_rng:
km = KMeans(n_clusters=k)
km.fit(df[['Age','Income($)']])
sse.append(km.inertia_)

/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870:
FutureWarning: The default value of `n_init` will change from 10 to 'auto' in
1.4. Set the value of `n_init` explicitly to suppress the warning
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870:
FutureWarning: The default value of `n_init` will change from 10 to 'auto' in
1.4. Set the value of `n_init` explicitly to suppress the warning
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870:
FutureWarning: The default value of `n_init` will change from 10 to 'auto' in
1.4. Set the value of `n_init` explicitly to suppress the warning
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870:
FutureWarning: The default value of `n_init` will change from 10 to 'auto' in
1.4. Set the value of `n_init` explicitly to suppress the warning

6
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870:
FutureWarning: The default value of `n_init` will change from 10 to 'auto' in
1.4. Set the value of `n_init` explicitly to suppress the warning
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870:
FutureWarning: The default value of `n_init` will change from 10 to 'auto' in
1.4. Set the value of `n_init` explicitly to suppress the warning
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870:
FutureWarning: The default value of `n_init` will change from 10 to 'auto' in
1.4. Set the value of `n_init` explicitly to suppress the warning
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870:
FutureWarning: The default value of `n_init` will change from 10 to 'auto' in
1.4. Set the value of `n_init` explicitly to suppress the warning
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870:
FutureWarning: The default value of `n_init` will change from 10 to 'auto' in
1.4. Set the value of `n_init` explicitly to suppress the warning
warnings.warn(

[21]: plt.xlabel('K')
plt.ylabel('Sum of squared error')
plt.plot(k_rng,sse)

[21]: [<matplotlib.lines.Line2D at 0x7902c39dd9c0>]

7
[ ]:

You might also like