Assignment 1 Data Mining
Assignment 1 Data Mining
Assignment 1 Data Mining
[1]:
#Import Module
import pandas as pd
In [2]:
#Import and Read Dataset
dataset = pd.read_csv('https://raw.githubusercontent.com/asrulabdullah99/data_mining/master/dataset_decision/Dataset_Tugas.csv')
dataset
1 No Married 100000 No
2 No Single 70000 No
5 No Married 60000 No
8 No Married 75000 No
In [3]:
#Convert all data to numerical values
d = {'Yes':1,'No':0}
dataset['Home_Owner']=dataset['Home_Owner'].map(d)
d = {'Single':0,'Married':1,'Divorced':2}
dataset['Marital_Status']=dataset['Marital_Status'].map(d)
d = {'Yes':1,'No':0}
dataset['Defaulted_Borrower']=dataset['Defaulted_Borrower'].map(d)
dataset
0 1 0 125000 0
1 0 1 100000 0
2 0 0 70000 0
3 1 1 120000 0
4 0 2 95000 1
5 0 1 60000 0
6 1 2 220000 0
7 0 0 85000 1
8 0 1 75000 0
9 0 0 90000 1
In [4]:
#Saparate columns
features = ['Home_Owner','Marital_Status','Annual_Income','Defaulted_Borrower']
x = dataset[features]
y = dataset['Defaulted_Borrower']
print(x)
print(y)
0 1 0 125000 0
1 0 1 100000 0
2 0 0 70000 0
3 1 1 120000 0
4 0 2 95000 1
5 0 1 60000 0
6 1 2 220000 0
7 0 0 85000 1
8 0 1 75000 0
9 0 0 90000 1
0 0
1 0
2 0
3 0
4 1
5 0
6 0
7 1
8 0
9 1
In [5]:
import pydotplus
dtree = DecisionTreeClassifier()
dtree = dtree.fit(x,y)
graph = pydotplus.graph_from_dot_data(data)
graph.write_png('decisiontree.png')
img = pltimg.imread('decisiontree.png')
imgplot = plt.imshow(img)
plt.show()
In [6]:
#Prediksi dari umur 39, 10 tahun experience, comedy ranking 7 dan USA
print(dtree.predict([[39,15,3,0]]))
[0]
In [8]:
print(dtree.predict([[1, 0, 125000, 0]]))
[0]
In [ ]: