ML Assignment 1 - Nageswar

Download as pdf or txt
Download as pdf or txt
You are on page 1of 7

Name Nageswar.

MSC DS.

Subject ML

Assignment 1:

1. illustrate the K means clustering with a suitably identified choice of k for mall-data and
drawing inerfaces related to the resulting clusters

Answer:

import pandas as pd

import matplotlib.pyplot as plt

from sklearn.cluster import KMeans

# Load the dataset

data = pd.read_csv('Mall_Customers.csv')

# Selecting the features for clustering

X = data[['Annual Income (k$)', 'Spending Score (1-100)']]

# Applying K-means

kmeans = KMeans(n_clusters=5, random_state=42)

data['Cluster'] = kmeans.fit_predict(X)

# Plotting the clusters

plt.figure(figsize=(10, 6))

for cluster in range(5):

plt.scatter(X[data['Cluster'] == cluster]['Annual Income (k$)'],

X[data['Cluster'] == cluster]['Spending Score (1-100)'],

label=f'Cluster {cluster}')

plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=300, c='red',


label='Centroids')
plt.xlabel('Annual Income (k$)')

plt.ylabel('Spending Score (1-100)')

plt.legend()

plt.title('Customer Segments')

plt.show()
2. Illustrate and create decision tree for any data set and compare its performance.

Answer:

import pandas as pd

from sklearn.datasets import load_iris

from sklearn.model_selection import train_test_split

from sklearn.tree import DecisionTreeClassifier

from sklearn.metrics import accuracy_score, classification_report

# Load the Iris dataset

iris = load_iris()

X, y = iris.data, iris.target

# Split data into training and test sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a decision tree classifier

clf = DecisionTreeClassifier(random_state=42)

clf.fit(X_train, y_train)

# Make predictions

y_pred = clf.predict(X_test)

# Evaluate performance

accuracy = accuracy_score(y_test, y_pred)

report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")

print("Classification Report:\n", report)


3. Using SVM for any data, performing hyper-parameter turning and compare the result.

Answer:

import pandas as pd

import numpy as np

from sklearn.datasets import load_breast_cancer

from sklearn.model_selection import train_test_split, GridSearchCV

from sklearn.svm import SVC

from sklearn.metrics import classification_report, confusion_matrix

# Load the breast cancer dataset

cancer = load_breast_cancer()
# Extract features and target

df_feat = pd.DataFrame(cancer['data'], columns=cancer['feature_names'])

df_target = pd.DataFrame(cancer['target'], columns=['Cancer'])

# Split data into train and test sets

X_train, X_test, y_train, y_test = train_test_split(df_feat, np.ravel(df_target), test_size=0.30,


random_state=101)

# Create an SVM classifier

svc = SVC()

# Define hyperparameter grid

param_grid = {'C': [0.1, 1, 10], 'gamma': [0.1, 0.01, 0.001]}

# Perform grid search

grid = GridSearchCV(svc, param_grid, verbose=2)

grid.fit(X_train, y_train)

# Get best hyperparameters

best_C = grid.best_params_['C']

best_gamma = grid.best_params_['gamma']

# Train SVM with best hyperparameters

best_svc = SVC(C=best_C, gamma=best_gamma)

best_svc.fit(X_train, y_train)

# Evaluate performance

y_pred = best_svc.predict(X_test)

print("Classification Report:\n", classification_report(y_test, y_pred))


print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

You might also like