Assignment 1 Data Mining

Download as pdf or txt
Download as pdf or txt
You are on page 1of 1

In 

[1]:
#Import Module

import pandas as pd

from sklearn import tree

from sklearn.tree import DecisionTreeClassifier

In [2]:
#Import and Read Dataset

dataset = pd.read_csv('https://raw.githubusercontent.com/asrulabdullah99/data_mining/master/dataset_decision/Dataset_Tugas.csv')

dataset

Out[2]: Home_Owner Marital_Status Annual_Income Defaulted_Borrower

0 Yes Single 125000 No

1 No Married 100000 No

2 No Single 70000 No

3 Yes Married 120000 No

4 No Divorced 95000 Yes

5 No Married 60000 No

6 Yes Divorced 220000 No

7 No Single 85000 Yes

8 No Married 75000 No

9 No Single 90000 Yes

In [3]:
#Convert all data to numerical values

d = {'Yes':1,'No':0}

dataset['Home_Owner']=dataset['Home_Owner'].map(d)

d = {'Single':0,'Married':1,'Divorced':2}

dataset['Marital_Status']=dataset['Marital_Status'].map(d)

d = {'Yes':1,'No':0}

dataset['Defaulted_Borrower']=dataset['Defaulted_Borrower'].map(d)

dataset

Out[3]: Home_Owner Marital_Status Annual_Income Defaulted_Borrower

0 1 0 125000 0

1 0 1 100000 0

2 0 0 70000 0

3 1 1 120000 0

4 0 2 95000 1

5 0 1 60000 0

6 1 2 220000 0

7 0 0 85000 1

8 0 1 75000 0

9 0 0 90000 1

In [4]:
#Saparate columns

#x adalah feature columns, y = target columns

features = ['Home_Owner','Marital_Status','Annual_Income','Defaulted_Borrower']

x = dataset[features]

y = dataset['Defaulted_Borrower']

print(x)

print(y)

Home_Owner Marital_Status Annual_Income Defaulted_Borrower

0 1 0 125000 0

1 0 1 100000 0

2 0 0 70000 0

3 1 1 120000 0

4 0 2 95000 1

5 0 1 60000 0

6 1 2 220000 0

7 0 0 85000 1

8 0 1 75000 0

9 0 0 90000 1

0 0

1 0

2 0

3 0

4 1

5 0

6 0

7 1

8 0

9 1

Name: Defaulted_Borrower, dtype: int64

In [5]:
import pydotplus

import matplotlib.pyplot as plt

import matplotlib.image as pltimg

dtree = DecisionTreeClassifier()

dtree = dtree.fit(x,y)

data = tree.export_graphviz(dtree, out_file= None, feature_names=features)

graph = pydotplus.graph_from_dot_data(data)

graph.write_png('decisiontree.png')

img = pltimg.imread('decisiontree.png')

imgplot = plt.imshow(img)

plt.show()

In [6]:
#Prediksi dari umur 39, 10 tahun experience, comedy ranking 7 dan USA

print(dtree.predict([[39,15,3,0]]))

[0]

In [8]:
print(dtree.predict([[1, 0, 125000, 0]]))

[0]

In [ ]:

You might also like