Week 1 To Week 9
Week 1 To Week 9
Week 1 To Week 9
def submit():
name = name_entry.get()
reg_no = reg_no_entry.get()
pathway = pathway_entry.get()
def clear_fields():
name_entry.delete(0, tk.END)
reg_no_entry.delete(0, tk.END)
pathway_entry.delete(0, tk.END)
root.mainloop()
Mysql:
Database changed
Output:
mysql> select * from students;
+------------------+--------+------------+
+------------------+--------+------------+
+------------------+--------+------------+
import pandas as pd
import matplotlib.pyplot as plt
df=pd.read_csv("/content/data.csv")
df.head()
plt.hist(df['FastCharge_KmH'],bins=10)
plt.hist(df['TopSpeed_KmH'],bins=10)
plt.show()
plt.scatter(x='TopSpeed_KmH',y='FastCharge_KmH',data=df)
plt.xlabel('TopSpeed_KmH')
plt.ylabel('FastCharge_KmH')
plt.show()
plt.boxplot(df['FastCharge_KmH'])
plt.show()
plt.hist(df['TopSpeed_KmH'],width=10)
plt.show()
Week4
Use revelent python packages to compute Central tendency for the parameters
Dispersion for the parameters
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
import numpy as np
data=pd.read_csv("/content/Copy of ev_sales_year1.csv")
data.head()
missing_values=data.isnull().sum()
print("MissingValues:\n",missing_values)
MissingValues:
Date 0
HEAVY GOODS VEHICLE 0
HEAVY MOTOR VEHICLE 0
HEAVY PASSENGER VEHICLE 0
LIGHT GOODS VEHICLE 0
LIGHT MOTOR VEHICLE 0
LIGHT PASSENGER VEHICLE 0
MEDIUM MOTOR VEHICLE 0
OTHER THAN MENTIONED ABOVE 0
THREE WHEELER(NT) 0
THREE WHEELER(T) 0
TWO WHEELER(NT) 0
TWO WHEELER(T) 0
dtype: int64
y train data : [1 2 1 0 1 2 0 0 1 1 0 2 0 0 1 1 2 1 2 2 1 0 0 2 2 0 0 0 1 2 0 2 2 0 1 1 2
1202121110110122012202012212112201201
2]
y test data : [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0 0 0 1 0 0 2 1
0002110012212121021000120001012012022
1]
WEEK - 7
Iris dataset from sci-kit learn Perform data exploration,preprocessing and
splitting
Data Exploration :
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df=pd.read_csv('/content/Iris.csv')
summary=df.describe()
sample_data=df.head()
missing_values=df.isnull().sum()
print("\nsummary of the dataset:",summary)
print("\nsample data of the dataset:",sample_data)
print("\nchecking missing values in dataset:",missing_values)
sns.histplot(df['SepalLengthCm'],bins=10)
plt.title('Histplot of SepalLengthCm column')
plt.show()
sns.barplot(data=df,x='SepalLengthCm')
plt.title('count plot of SepalWidthCm column')
plt.show()
Data Splitting :
import pandas as pd
from sklearn.datasets import load_iris
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
iris=load_iris()
x,y=iris.data,iris.target
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.5,random_state=42)
print("x train data:",x_train)
print("x test data:",x_test)
print("y train data:",y_train)
print("y test data:",y_test)
df=pd.DataFrame(data=iris.data,columns=iris.feature_names)
print(df)
x train data: [[5.4 3. 4.5 1.5]
[6.2 3.4 5.4 2.3]
[5.5 2.3 4. 1.3]
[5.4 3.9 1.7 0.4]
[5. 2.3 3.3 1. ]
[6.4 2.7 5.3 1.9]
[5. 3.3 1.4 0.2]
[5. 3.2 1.2 0.2]
[5.5 2.4 3.8 1.1]
[6.7 3. 5. 1.7]
[4.9 3.1 1.5 0.2]
[5.8 2.8 5.1 2.4]
[5. 3.4 1.5 0.2]
[5. 3.5 1.6 0.6]
[5.9 3.2 4.8 1.8]
[5.1 2.5 3. 1.1]
[6.9 3.2 5.7 2.3]
[6. 2.7 5.1 1.6]
[6.1 2.6 5.6 1.4]
[7.7 3. 6.1 2.3]
[5.5 2.5 4. 1.3]
[4.4 2.9 1.4 0.2]
[4.3 3. 1.1 0.1]
[6. 2.2 5. 1.5]
[7.2 3.2 6. 1.8]
[4.6 3.1 1.5 0.2]
[5.1 3.5 1.4 0.3]
[4.4 3. 1.3 0.2]
[6.3 2.5 4.9 1.5]
[6.3 3.4 5.6 2.4]
[4.6 3.4 1.4 0.3]
[6.8 3. 5.5 2.1]
[6.3 3.3 6. 2.5]
[4.7 3.2 1.3 0.2]
[6.1 2.9 4.7 1.4]
[6.5 2.8 4.6 1.5]
[6.2 2.8 4.8 1.8]
[7. 3.2 4.7 1.4]
[6.4 3.2 5.3 2.3]
[5.1 3.8 1.6 0.2]
[6.9 3.1 5.4 2.1]
[5.9 3. 4.2 1.5]
[6.5 3. 5.2 2. ]
[5.7 2.6 3.5 1. ]
[5.2 2.7 3.9 1.4]
[6.1 3. 4.6 1.4]
[4.5 2.3 1.3 0.3]
[6.6 2.9 4.6 1.3]
[5.5 2.6 4.4 1.2]
[5.3 3.7 1.5 0.2]
[5.6 3. 4.1 1.3]
[7.3 2.9 6.3 1.8]
[6.7 3.3 5.7 2.1]
[5.1 3.7 1.5 0.4]
[4.9 2.4 3.3 1. ]
[6.7 3.3 5.7 2.5]
[7.2 3. 5.8 1.6]
[7.1 3. 5.9 2.1]]
x test data: [[6.1 2.8 4.7 1.2]
[5.7 3.8 1.7 0.3]
[7.7 2.6 6.9 2.3]
[6. 2.9 4.5 1.5]
[6.8 2.8 4.8 1.4]
[5.4 3.4 1.5 0.4]
[5.6 2.9 3.6 1.3]
[6.9 3.1 5.1 2.3]
[6.2 2.2 4.5 1.5]
[5.8 2.7 3.9 1.2]
[6.5 3.2 5.1 2. ]
[4.8 3. 1.4 0.1]
[5.5 3.5 1.3 0.2]
[4.9 3.1 1.5 0.1]
[5.1 3.8 1.5 0.3]
[6.3 3.3 4.7 1.6]
[6.5 3. 5.8 2.2]
[5.6 2.5 3.9 1.1]
[5.7 2.8 4.5 1.3]
[6.4 2.8 5.6 2.2]
[4.7 3.2 1.6 0.2]
[6.1 3. 4.9 1.8]
[5. 3.4 1.6 0.4]
[6.4 2.8 5.6 2.1]
[7.9 3.8 6.4 2. ]
[6.7 3. 5.2 2.3]
[6.7 2.5 5.8 1.8]
[6.8 3.2 5.9 2.3]
[4.8 3. 1.4 0.3]
[4.8 3.1 1.6 0.2]
[4.6 3.6 1. 0.2]
[5.7 4.4 1.5 0.4]
[6.7 3.1 4.4 1.4]
[4.8 3.4 1.6 0.2]
[4.4 3.2 1.3 0.2]
[6.3 2.5 5. 1.9]
[6.4 3.2 4.5 1.5]
[5.2 3.5 1.5 0.2]
[5. 3.6 1.4 0.2]
[5.2 4.1 1.5 0.1]
[5.8 2.7 5.1 1.9]
[6. 3.4 4.5 1.6]
[6.7 3.1 4.7 1.5]
[5.4 3.9 1.3 0.4]
[5.4 3.7 1.5 0.2]
[5.5 2.4 3.7 1. ]
[6.3 2.8 5.1 1.5]
[6.4 3.1 5.5 1.8]
y train data: [1 2 1 0 1 2 0 0 1 1 0 2 0 0 1 1 2 1 2 2 1 0 0 2 2 0 0 0 1 2 0 2 2 0 1 1 2
1202121110110122012202012212112201201
2]
y test data: [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0 0 0 1 0 0 2 1
0002110012212121021000120001012012022
1]
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm)
0 5.1 3.5 1.4 0.2
1 4.9 3.0 1.4 0.2
2 4.7 3.2 1.3 0.2
3 4.6 3.1 1.5 0.2
4 5.0 3.6 1.4 0.2
.. ... ... ... ...
145 6.7 3.0 5.2 2.3
146 6.3 2.5 5.0 1.9
147 6.5 3.0 5.2 2.0
148 6.2 3.4 5.4 2.3
149 5.9 3.0 5.1 1.8
Data preprocessing :
missing=df.dropna()
print("Removed the missing values:",missing)
correlation=df.corr()
print("checking the correlation of dataset:",correlation)
sns.heatmap(df.corr())
plt.title('Heatmap of correlation of dataset')
plt.show()
x=df.drop(columns=['sepal length (cm)'])
print("Removed the SepalLengthCm column:",x)
OUTPUT :
Removed the missing values: sepal length (cm) sepal width (cm) petal length (cm) petal
width (cm)
0 5.1 3.5 1.4 0.2
1 4.9 3.0 1.4 0.2
2 4.7 3.2 1.3 0.2
3 4.6 3.1 1.5 0.2
4 5.0 3.6 1.4 0.2
.. ... ... ... ...
145 6.7 3.0 5.2 2.3
146 6.3 2.5 5.0 1.9
147 6.5 3.0 5.2 2.0
148 6.2 3.4 5.4 2.3
149 5.9 3.0 5.1 1.8
OUTPUT :
accuracy 1.00 30
macro avg 1.00 1.00 1.00 30
weighted avg 1.00 1.00 1.00 30
import pandas as pd
from sklearn.model_selection import train_test_split
df=pd.read_csv('/content/fish.csv')x=df[['LIVE_BAIT','CAMPER','PERSONS','CHILDREN']]
y=df['FISH_COUNT']
print("Shape of the dataset :",df.shape)
print("Five columns of dataset :\n",df.head())
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=109)
Shape of the dataset : (250, 5)
Five columns of dataset :
LIVE_BAIT CAMPER PERSONS CHILDREN FISH_COUNT
0 0 0 1 0 0
1 1 1 1 0 0
2 1 0 1 0 0
3 1 1 2 1 0
4 1 0 1 0 1
from sklearn import svm
clf=svm.SVC(kernel='linear')
clf.fit(x_train,y_train)
xtest=clf.predict(x_test)
from sklearn.metrics import accuracy_score,classification_report
print("Accuracy of the model is :",accuracy_score(y_test,xtest))
print("classification reort of the model is :",classification_report(y_test,xtest))
Accuracy of the model is : 0.5733333333333334
classification reort of the model is : precision recall f1-score support
accuracy 0.57 75
macro avg 0.04 0.08 0.06 75
weighted avg 0.33 0.57 0.42 75
WEEK - 09
1.Python Implementation of K-Means clustering Algorithm.import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
# Load the dataset
data = pd.read_csv('/content/fish.csv')
x = data.iloc[:, [3, 4]].values
# Determine the optimal number of clusters using the elbow method
wc = []
for i in range(1, 11):
#The following line was not indented and has been corrected
kmeans = KMeans(n_clusters=i, init='k-means++', random_state=42)
kmeans.fit(x)
wc.append(kmeans.inertia_)
# Plot the elbow method graph
plt.plot(range(1, 11), wc)
plt.title("The Elbow Method Graph")
plt.xlabel("Number of Clusters")
plt.ylabel("WCSS (Within-Cluster Sum of Squares)")
plt.show()
# Perform K-means clustering with 5 clusters
num_clusters = 5
kmeans = KMeans(n_clusters=num_clusters, init='k-means++',
random_state=42)
y_predict = kmeans.fit_predict(x)
# Visualize the clusters
for cluster_num in range(num_clusters):
plt.scatter(x[y_predict == cluster_num, 0], x[y_predict == cluster_num, 1],
s=100, label=f'Cluster {cluster_num + 1}')
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=300,
c='yellow', label='Centroid')
plt.title('Clusters of customers (5 clusters)')
plt.xlabel('Annual Income (k$)')
plt.ylabel('Spending Score (1-100)')
plt.legend()
plt.show()