Compute2

Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1of 10

1st task: perform data cleaning, if any, in the dataset.

As the data is already cleaned so there is no requirement of data cleaning

import numpy as np
import pandas as pd
import os

x = pd.read_csv('data.csv')

x.dropna()
print(x)

2nd task: perform K-means Clustering for K=3,5,7 and also Fuzzy C means. Capture the Clusters
generated with Both K Means & C means.

import pandas as pd
from sklearn.cluster import KMeans

from sklearn.preprocessing import StandardScaler


from sklearn.decomposition import PCA
from fcmeans import FCM

import matplotlib.pyplot as plt

z = pd.read_csv('data.csv')

X = z.iloc[:, 1:59].values

scale = StandardScaler()
X = scale.fit_transform(X)

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

k_values = [3, 5, 7]
fuzzy_cmeans_c = [3, 5, 7]

for k in k_values:
kmeans = KMeans(n_clusters=k, random_state=42)
y_kmeans = kmeans.fit_predict(X_pca)

plt.figure(figsize=(6, 4))
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y_kmeans, cmap='viridis')
plt.title('K-means clustering (K = ' + str(k) + ')')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.show()
for c in fuzzy_cmeans_c:
fcm = FCM(n_clusters=10, m=c)
fcm.fit(X_pca)
y_fcm = fcm.predict(X_pca)

plt.figure(figsize=(6, 4))
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y_fcm, cmap='viridis')
plt.title('Fuzzy C means clustering (c = ' + str(c) + ')')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.show()

OUTPUT
3rd task: perform Bottom-up Clustering (Agglomerative clustering). Capture the Clusters generated
at a different level, and also prepare dendrograms.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import AgglomerativeClustering
from scipy.cluster.hierarchy import dendrogram, linkage

Data = pd.read_csv('data.csv')

mfs = np.array(Data.iloc[:, 1:59])

agg_clustering = AgglomerativeClustering(n_clusters=None, linkage='ward',


distance_threshold=0)
cluster = agg_clustering.fit_predict(mfs)

linked = linkage(mfs, method='ward')


dendrogram(linked, truncate_mode='lastp', p=30, orientation='top')
plt.show()
for i in range(2, 12):
clustering = AgglomerativeClustering(n_clusters=i, linkage='ward')
clustering.fit(mfs)
print(f'Clusters at level {i}: {clustering.labels_}')

# Observations:

# The dendogram shows hierarchy of clusters formed by the agglomerative


clustering algorithm
# The clusters start merging from the bottom level and go up to the top level.
# At the top level, we can see that all the data points belong to a single
cluster.
# By looking at the dendrogram, we can choose the appropriate level to get the
desired number of clusters.
# We can also see that at each level, the clustering algorithm forms a
different set of clusters based on the distance threshold and linkage
criterion used.
4rth task: perform density-based (DBSCAN) Clustering,
5th Task: prepare a brief Comparative summary of clusters generated using the above clustering
techniques.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
from sklearn.decomposition import PCA

data = pd.read_csv('data.csv')

mfs = np.array(data.iloc[:, 1:-1])

dbscn_clustering = DBSCAN(eps=20, min_samples=5)


clusters = dbscn_clustering.fit_predict(mfs)

pc = PCA(n_components=2)
reduced_features = pc.fit_transform(mfs)

plt.scatter(reduced_features[:, 0], reduced_features[:, 1], c=clusters,


cmap='viridis')
plt.show()

# Observations:
# DBSCAN clustering algorithm forms clusters based on the density of the data
points.
# The resulting clusters are not well

You might also like