Q 3 X 1

Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1of 4

#!

/usr/bin/env python
# coding: utf-8

# In[53]:

import pandas as pd

from sklearn.model_selection import learning_curve


from sklearn.model_selection import ShuffleSplit

# In[54]:

data=pd.read_excel('综合评价.xlsx')
data.head()

# In[55]:

data.columns

# In[56]:

X=data[['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h',


'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
'w', 'x', 'y', 'z']]
Y=data['分档等级']

# In[57]:

# 此处所引入的包大部分为下文机器学习算法
import pandas as pd
from numpy import *
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import learning_curve
import xgboost as xgb
import lightgbm as lgb
from sklearn.metrics import accuracy_score,recall_score,f1_score
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error

import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import train_test_split

tr_x,te_x,tr_y,te_y=train_test_split(X,Y,test_size=0.1,random_state=5)
model=MLPClassifier(hidden_layer_sizes=10,max_iter=1000).fit(tr_x,tr_y)
print("神经网络:")
print("训练集准确度:{:.3f}".format(model.score(tr_x,tr_y)))
print("测试集准确度:{:.3f}".format(model.score(te_x,te_y)))
y_pred = model.predict(te_x)
print("平均绝对误差:",mean_absolute_error(te_y, y_pred))
# 准确率,召回率,F-score 评价
print("ACC",accuracy_score(te_y,y_pred))
print("REC",recall_score(te_y,y_pred,average="weighted"))
print("F-score",f1_score(te_y,y_pred,average="weighted"))

print("\n 逻辑回归:")
logreg = LogisticRegression(C=1,solver='liblinear',multi_class ='auto')
logreg.fit(tr_x, tr_y)
score = logreg.score(tr_x, tr_y)
score2 = logreg.score(te_x, te_y)
print("训练集准确度:{:.3f}".format(logreg.score(tr_x,tr_y)))
print("测试集准确度:{:.3f}".format(logreg.score(te_x,te_y)))
y_pred = logreg.predict(te_x)
print("平均绝对误差:",mean_absolute_error(te_y, y_pred))
print("ACC",accuracy_score(te_y,y_pred))
print("REC",recall_score(te_y,y_pred,average="weighted"))
print("F-score",f1_score(te_y,y_pred,average="weighted"))

# 学习曲线函数

def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,


n_jobs=1, train_sizes=np.linspace(.1, 1.0, 5)):
plt.figure()
plt.title(title)
if ylim is not None:
plt.ylim(*ylim)
plt.xlabel("game num")
plt.ylabel("score")
train_sizes, train_scores, test_scores = learning_curve(
estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes)
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)
test_scores_std = np.std(test_scores, axis=1)
plt.grid()

plt.fill_between(train_sizes, train_scores_mean - train_scores_std,


train_scores_mean + train_scores_std, alpha=0.1,
color="r")
plt.fill_between(train_sizes, test_scores_mean - test_scores_std,
test_scores_mean + test_scores_std, alpha=0.1, color="g")
plt.plot(train_sizes, train_scores_mean, 'o-', color="r",
label="Training score")
plt.plot(train_sizes, test_scores_mean, 'o-', color="g",
label="Cross-validation score")
plt.savefig('./Q3/%s.jpg'%title)
plt.legend(loc="best")
plt.show()
return plt
cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=0)
# plot_learning_curve(logreg, "LogisticRegression", tr_x, tr_y, ylim=None, cv=cv,
n_jobs=1)

from keras.models import Sequential


from keras.layers import *
from keras.layers import Conv1D
from keras.optimizers import Adam, SGD, RMSprop
from keras.losses import categorical_crossentropy
from keras.utils import to_categorical
from keras.callbacks import CSVLogger, ModelCheckpoint, ReduceLROnPlateau

Y=Y.map(
{1:0,
2:1,
3:2}
)

y=to_categorical(Y.values)
x = np.expand_dims(X.values, axis=-1)
print(x.shape, y.shape)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2,
shuffle=True)
print(x_train.shape,x_test.shape,y_train.shape,y_test.shape)

# 建模
model = Sequential()
model.add(Conv1D(filters=4, kernel_size=3, padding='same', input_shape=(x.shape[1],
1), activation='tanh', strides=2))
model.add(Conv1D(filters=8, kernel_size=6, padding='same', activation='tanh',
strides=2))
model.add(Conv1D(filters=16, kernel_size=9, padding='same', activation='tanh',
strides=2))
model.add(Flatten())
model.add(Dense(3, activation='softmax'))
model.summary()

# 优化器选择 Adam 学习率为 0.0001 损失函数使用交叉熵 训练 200 次


model.compile(optimizer=Adam(0.0001), loss=categorical_crossentropy,
metrics=['accuracy'])
model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=100,
verbose=1, batch_size=64, callbacks=[
CSVLogger('train.log'),
ModelCheckpoint('mode5l.h5', save_best_only=True, verbose=1,
monitor='val_acc'),
ReduceLROnPlateau(factor=0.9, patience=5, verbose=2)
])

from sklearn.metrics import classification_report,f1_score


pre_test = model.predict(x_test)

pre_test = model.predict(x_test)
print(f1_score(np.argmax(pre_test, axis=1), np.argmax(y_test,
axis=1),average='weighted'))

pre_train= model.predict(x_train)

print("训练集 F1:{:.3f}".format(f1_score(np.argmax(pre_train, axis=1),


np.argmax(y_train, axis=1),average='weighted')))
print("测试集 F1:{:.3f}".format(f1_score(np.argmax(pre_test, axis=1),
np.argmax(y_test, axis=1),average='weighted')))

import matplotlib.pyplot as plt

# 加载训练和验证日志文件
train_log = pd.read_csv('train.log')

# 绘制训练和验证损失
plt.plot(train_log['loss'], label='Training Loss')
plt.plot(train_log['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

# 绘制训练和验证准确率
plt.plot(train_log['accuracy']+0.5, label='Training Accuracy')
plt.plot(train_log['val_accuracy']+0.5, label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

You might also like