Student Alcohol Consumption 1649318453
Student Alcohol Consumption 1649318453
Student Alcohol Consumption 1649318453
In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
In [2]:
data = pd.read_csv("student_alcohol.csv")
In [3]:
data.head()
Out[3]:
school sex age address famsize Pstatus Medu Fedu Mjob Fjob ... famrel free
5 rows × 33 columns
In [4]:
data.tail()
Out[4]:
school sex age address famsize Pstatus Medu Fedu Mjob Fjob ... famrel f
5 rows × 33 columns
In [5]:
data.shape
Out[5]:
(395, 33)
In [6]:
data.columns
Out[6]:
dtype='object')
In [7]:
data.info()
<class 'pandas.core.frame.DataFrame'>
In [8]:
data.describe()
Out[8]:
In [9]:
data.isnull().sum()
Out[9]:
school 0
sex 0
age 0
address 0
famsize 0
Pstatus 0
Medu 0
Fedu 0
Mjob 0
Fjob 0
reason 0
guardian 0
traveltime 0
studytime 0
failures 0
schoolsup 0
famsup 0
paid 0
activities 0
nursery 0
higher 0
internet 0
romantic 0
famrel 0
freetime 0
goout 0
Dalc 0
Walc 0
health 0
absences 0
G1 0
G2 0
G3 0
dtype: int64
In [15]:
import warnings
warnings.filterwarnings('ignore')
In [16]:
In [17]:
plotPerColumnDistribution(data, 10, 5)
In [22]:
data.corr()
Out[22]:
In [23]:
sns.heatmap(data.corr())
Out[23]:
<AxesSubplot:>
In [24]:
In [25]:
In [26]:
sns.pairplot(data);
In [27]:
data.nunique()
Out[27]:
school 2
sex 2
age 8
address 2
famsize 2
Pstatus 2
Medu 5
Fedu 5
Mjob 5
Fjob 5
reason 4
guardian 3
traveltime 4
studytime 4
failures 4
schoolsup 2
famsup 2
paid 2
activities 2
nursery 2
higher 2
internet 2
romantic 2
famrel 5
freetime 5
goout 5
Dalc 5
Walc 5
health 5
absences 34
G1 17
G2 17
G3 18
dtype: int64
In [32]:
data['sex'].value_counts()
Out[32]:
F 208
M 187
In [28]:
data['activities'].value_counts()
Out[28]:
yes 201
no 194
In [29]:
data['internet'].value_counts()
Out[29]:
yes 329
no 66
In [31]:
data['romantic'].value_counts()
Out[31]:
no 263
yes 132
In [34]:
plt.figure(figsize=(8,5))
sns.barplot(x = data.sex, y = data.health, data = data)
plt.xticks(rotation = 90)
plt.show()
In [35]:
plt.figure(figsize=(8,5))
sns.barplot(x = data.sex, y = data.absences, data = data)
plt.xticks(rotation = 90)
plt.show()
In [36]:
plt.figure(figsize=(8,5))
sns.barplot(x = data.age, y = data.health, data = data, hue = data.sex)
plt.xticks(rotation = 90)
plt.show()
In [37]:
plt.figure(figsize=(8,5))
sns.barplot(x = data.age, y = data.absences, data = data, hue = data.sex)
plt.xticks(rotation = 90)
plt.show()
In [38]:
plt.figure(figsize=(8,5))
sns.barplot(x = data.sex, y = data.freetime, data = data)
plt.xticks(rotation = 90)
plt.show()
In [39]:
plt.figure(figsize=(8,5))
sns.barplot(x = data.sex, y = data.goout, data = data)
plt.xticks(rotation = 90)
plt.show()
In [41]:
plt.figure(figsize=(8,5))
sns.barplot(x = data.activities, y = data.age, data = data, hue = data.sex)
plt.xticks(rotation = 90)
plt.show()
In [42]:
plt.figure(figsize=(8,5))
sns.barplot(x = data.internet, y = data.age, data = data, hue = data.sex)
plt.xticks(rotation = 90)
plt.show()
In [43]:
plt.figure(figsize=(8,5))
sns.barplot(x = data.romantic, y = data.age, data = data, hue = data.sex)
plt.xticks(rotation = 90)
plt.show()
In [44]:
In [45]:
data.columns
Out[45]:
dtype='object')
In [46]:
In [47]:
new_data.head()
Out[47]:
age traveltime studytime failures famrel freetime goout Dalc Walc health absences G
0 18 2 2 0 4 3 4 1 1 3 6
1 17 1 2 0 5 3 3 1 1 3 4
2 15 1 2 3 4 3 2 2 3 3 10
3 15 1 3 0 3 2 2 1 1 5 2 1
4 16 1 2 0 4 3 2 1 2 5 4
In [72]:
In [73]:
In [74]:
In [75]:
model = LinearRegression()
model.fit(X_train, y_train)
Out[75]:
LinearRegression()
In [76]:
y_pred = model.predict(X_test)
In [77]:
In [78]:
In [79]:
In [80]:
In [81]:
model1 = LinearRegression()
model1.fit(X_train, y_train)
Out[81]:
LinearRegression()
In [82]:
y_pred = model1.predict(X_test)
In [83]:
In [84]:
In [85]:
In [86]:
In [87]:
model2 = LinearRegression()
model2.fit(X_train, y_train)
Out[87]:
LinearRegression()
In [88]:
y_pred = model2.predict(X_test)
In [89]: