A2 Vishal Borra

Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1of 2

import pandas as pd

from sklearn.cluster import MiniBatchKMeans, SpectralClustering, OPTICS


from sklearn.metrics import davies_bouldin_score
import matplotlib.pyplot as plt

# Data preprocessing function


def preprocess_data(file_path):
# Reading the data from CSV
data = pd.read_csv(file_path, header=None)
# Normalizing the data
data_normalized = (data - data.mean()) / data.std()
return data_normalized

# Function to apply clustering and calculate the Davies-Bouldin Index


def apply_clustering_and_evaluate(data, algorithm):
# Applying the clustering algorithm to the data
labels = algorithm.fit_predict(data)
# Calculating Davies-Bouldin Index
db_index = davies_bouldin_score(data, labels)
return labels, db_index

# Defin ing the clustering algorithms


algorithms = {
'MiniBatchKMeans': MiniBatchKMeans(n_clusters=3, batch_size=100),
'SpectralClustering': SpectralClustering(n_clusters=3,
affinity='nearest_neighbors'),
'OPTICS': OPTICS(min_samples=5, xi=0.05, min_cluster_size=0.1)
}

# Main execution loop


for path in [
(r"C:\Users\Vishal\Desktop\DM2\D01.csv"),
(r"C:\Users\Vishal\Desktop\DM2\D02.csv"),
(r"C:\Users\Vishal\Desktop\DM2\D03.csv")
]:
data = preprocess_data(path)

best_davies_bouldin_score = float('inf')
best_algorithm_name = None
best_labels = None

for name, alg in algorithms.items():


labels, db_index = apply_clustering_and_evaluate(data, alg)
if db_index < best_davies_bouldin_score:
best_davies_bouldin_score = db_index
best_algorithm_name = name
best_labels = labels

# Counting number of clusters


num_clusters = len(set(best_labels))

# Plotting the results


plt.scatter(data[0], data[1], c=best_labels, cmap='viridis', marker='o')
plt.title(f'{best_algorithm_name} - Davies-Bouldin Index:
{best_davies_bouldin_score:.2f}, Number of Clusters: {num_clusters}')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.show()
print(f"The best algorithm for {path} is {best_algorithm_name} with a Davies-
Bouldin Index of {best_davies_bouldin_score:.2f} and {num_clusters} clusters")

You might also like