3rd Code

Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1of 3

11:

### Question: Create a binary classifier for the digit 5 using stochastic
### gradient descent for only 5 iterations
### then obtain the cross validation accuracy using 3 folds

from sklearn.linear_model import SGDClassifier


from sklearn.model_selection import cross_val_score

y_train_5 = (y_train == 5)
y_test_5 = (y_test == 5)

sgd_clf = SGDClassifier(max_iter=5, tol=-np.infty, random_state=42)


sgd_clf.fit(X_train, y_train_5)
cross_val_score(sgd_clf, X_train, y_train_5, cv=3, scoring="accuracy")

12:

### Question: obtain prediction using cross validation with 3 folds for
### X_train as X and y_train_5 as Y
### then obtain the confusion matrix for this prediction
### then obtain the confusion matrix for the perfect prediction
### then obtain precision, recall and f1 score

from sklearn.model_selection import cross_val_predict


from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import f1_score

y_train_pred = cross_val_predict(sgd_clf, X_train, y_train_5, cv=3)


print('confusion_matrix')
print(confusion_matrix(y_train_5, y_train_pred))

y_train_perfect_predictions = y_train_5
print('confusion_matrix_perfect')
print(confusion_matrix(y_train_5, y_train_5))

print('precision:', precision_score(y_train_5, y_train_pred))


print('recall:', recall_score(y_train_5, y_train_pred))
print('f1 score:', f1_score(y_train_5, y_train_pred))

14:

### Question: obtain prediction scores using cross validation with 3 folds
### for X_train as X and y_train_5 as Y
### then obtain the precision recall curve then plot it

from sklearn.metrics import precision_recall_curve

y_scores = cross_val_predict(sgd_clf, X_train, y_train_5, cv=3,


method="decision_function")
precisions, recalls, thresholds = precision_recall_curve(y_train_5, y_scores)

plot_precision_recall_vs_threshold(precisions, recalls, thresholds)


plt.show()

16:

### Question: obtain prediction scores using cross validation with 3 folds
### for X_train as X and y_train_5 as Y
### then obtain the roc curve then plot it
### then obtain the roc area under the curve score

from sklearn.metrics import roc_curve


from sklearn.metrics import roc_auc_score

y_scores = cross_val_predict(sgd_clf, X_train, y_train_5, cv=3,


method="decision_function")
fpr, tpr, thresholds = roc_curve(y_train_5, y_scores)
plot_roc_curve(fpr, tpr)
plt.show()

print('roc auc score:', roc_auc_score(y_train_5, y_scores))

17:

### Question: Create a multiclass classifier using stochastic gradient descent


### for only 5 iterations such that X_train as X and y_train as Y
### then print the classifier available classes
### then obtain the cross validation accuracy for 3 folds
### then obtain the normalized version of X_train and obtain the cross
### validation accuracy for 3 folds for the normalized version

from sklearn.preprocessing import StandardScaler

sgd_clf = SGDClassifier(max_iter=5, tol=-np.infty, random_state=42)


sgd_clf.fit(X_train, y_train)
print(sgd_clf.classes_)
print(cross_val_score(sgd_clf, X_train, y_train, cv=3, scoring="accuracy"))

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train.astype(np.float64))
print(cross_val_score(sgd_clf, X_train_scaled, y_train, cv=3, scoring="accuracy"))

18:

### Question: obtain the cross validation predictions for the stochastic gradient
### descent multiclass classifier using 3 folds such that X_train_scaled
### as X and y_train as Y
### then obtain the confusion matrix and print it

y_train_pred = cross_val_predict(sgd_clf, X_train_scaled, y_train, cv=3)


conf_mx = confusion_matrix(y_train, y_train_pred)
print(conf_mx)

19:

### Question: create a random noise with max value of 100 for images
### with size 28*28 then add them to X_train and X_test
### then create a KNN classifier such that X_train with
### noise as X and X_train without noise as Y
### then do prediction for X_test of index 5500
### and plot it

from sklearn.neighbors import KNeighborsClassifier

noise = np.random.randint(0, 100, (len(X_train), 784))


X_train_mod = X_train + noise
noise = np.random.randint(0, 100, (len(X_test), 784))
X_test_mod = X_test + noise
y_train_mod = X_train
y_test_mod = X_test

knn_clf = KNeighborsClassifier()
knn_clf.fit(X_train_mod, y_train_mod)
clean_digit = knn_clf.predict([X_test_mod[5500]])
plot_digit(clean_digit)

You might also like