C23 DWM Pracs
C23 DWM Pracs
C23 DWM Pracs
Batch: C23
Roll No: 1902112
Question Number: 10
AIM
We have several objects (5 types of medicines) and each object have
two attributes or features Weight Index and as pH shown in table
below.
Cluster the objects using hierarchical clustering.
Medicine WeightIndex pH
A 1 3
B 2 5
C 5 4
D 1 2
E 5 6
Solution
Data Cleaning:
Since an entire row is missing, we will have to ignore the tuple
completely in the data cleaning process.
maximum_dist = -1
for point1 in cluster1:
for point2 in cluster2:
dist = 0
for i in range(len(point1)-1):
dist += math.pow((point1[i] - point2[i]), 2)
dist = math.sqrt(dist)
return maximum_dist
clusters = list(pd.unique(dataset['Cluster']))
new_clusters = list(range(num))
mapping = {}
for key, val in zip(clusters, new_clusters):
mapping[key] = val
dataset["Cluster"] = dataset["Cluster"].map(lambda x:
mapping[x])
return dataset
• hierarchical_clustering function between 2 clusters
dataset['Cluster'] = dataset.index
while (True):
clusters = []
for i in range(max(pd.unique(dataset['Cluster']))
+ 1):
cl =
dataset[dataset['Cluster']==i].values.tolist()
if len(cl) > 0:
clusters.append(cl)
if len(clusters) == num:
dataset = renaming_clusters(dataset,num)
return dataset
minimum_dist = math.inf
c1 = -1
c2 = -1
cluster1 = clusters[i]
cluster2 = clusters[j]
dist = calculate_dist(cluster1, cluster2)
plt.figure(figsize=(10, 7))
plt.title("Dendrograms")
dend = shc.dendrogram(shc.linkage(data, method='ward'))
plt.title('{} Clusters'.format(num))
plt.xlabel('Weighted Index')
plt.ylabel('pH')
plt.legend()
plt.show()
FINAL OUTPUT:
10/30/21, 10:58 AM DWM_Pracs - Jupyter Notebook
Batch: C23
In [2]: data = {
'Medicine' : ['A', 'B', 'C', 'D', 'E'],
'WeightIndex' : [1, 2, 5, 1, 5],
'pH' : [3, 5, 4, 2, 6]
}
In [4]: data
Out[4]:
Medicine WeightIndex pH
0 A 1 3
1 B 2 5
2 C 5 4
3 D 1 2
4 E 5 6
In [9]: data
Out[9]:
WeightIndex pH
0 1 3
1 2 5
2 5 4
3 1 2
4 5 6
maximum_dist = -1
for point1 in cluster1:
for point2 in cluster2:
dist = 0
for i in range(len(point1)-1):
dist += math.pow((point1[i] - point2[i]), 2)
dist = math.sqrt(dist)
return maximum_dist
clusters = list(pd.unique(dataset['Cluster']))
new_clusters = list(range(num))
mapping = {}
for key, val in zip(clusters, new_clusters):
mapping[key] = val
dataset['Cluster'] = dataset.index
while (True):
clusters = []
if len(clusters) == num:
dataset = renaming_clusters(dataset,num)
return dataset
minimum_dist = math.inf
c1 = -1
c2 = -1
for i in range(len(clusters) - 1):
for j in range(i + 1, len(clusters)):
cluster1 = clusters[i]
cluster2 = clusters[j]
dist = calculate_dist(cluster1, cluster2)
In [40]: #The above dendogram shows that the ideal number of clusters should be 2
num = int(input('Enter number of clusters : '))
result = hierarchical_clustering(data, num)
result
Out[40]:
WeightIndex pH Cluster
0 1 3 0
1 2 5 1
2 5 4 2
3 1 2 0
4 5 6 2
In [ ]: