Tugas-Bank-Campaign (1) .Ipynb - Colaboratory
Tugas-Bank-Campaign (1) .Ipynb - Colaboratory
Tugas-Bank-Campaign (1) .Ipynb - Colaboratory
ipynb - Colaboratory
IMPORT LIBRARY
def configure_plotly_browser_state():
import IPython
display(IPython.core.display.HTML('''
<script src="/static/components/requirejs/require.js"></script>
<script>
requirejs.config({
paths: {
base: '/static/base',
plotly: 'https://cdn.plot.ly/plotly-latest.min.js?noext',
},
});
</script>
'''))
!pip install chart_studio
!pip install openpyxl
import warnings
warnings.filterwarnings('ignore')
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
%matplotlib inline
import matplotlib.pyplot as plt # Matlab-style plotting
import seaborn as sns
color = sns.color_palette()
sns.set_style('darkgrid')
from plotly.tools import make_subplots
from plotly import tools
import chart_studio.plotly as py
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.figure_factory as ff
from IPython.display import HTML, Image
from scipy import stats
from scipy.stats import norm, skew #for some statistics
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 1/29
10/19/21, 12:31 AM tugas-bank-campaign (1).ipynb - Colaboratory
data = pd.read_csv("bank-full.csv",sep = ";")
data.head()
age job marital education default balance housing loan contact day month duration campaign pdays previo
data.describe()
data.describe().transpose()
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 2/29
10/19/21, 12:31 AM tugas-bank-campaign (1).ipynb - Colaboratory
cont_features=[i for i in data.columns if data[i].nunique()>12]
cat_features=[i for i in data.columns if data[i].nunique()<=12]
cont_features
cat_features
['job',
'marital',
'education',
'default',
'housing',
'loan',
'contact',
'month',
'poutcome',
'y']
data.info() # cek info
<class 'pandas.core.frame.DataFrame'>
data = data.dropna() #Menghapus missing data jika ada
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 3/29
10/19/21, 12:31 AM tugas-bank-campaign (1).ipynb - Colaboratory
configure_plotly_browser_state()
fig = go.Figure()
fig.add_trace(go.Box(y=data['age'], name='Age',
marker_color = 'rgb(0, 0, 100)'))
fig.show()
90
80
70
60
50
40
30
20
Age
configure_plotly_browser_state()
fig = ff.create_distplot([data['age']],['Age'],bin_size=5,colors=['rgb(0, 0, 100)'])
iplot(fig, filename='Basic Distplot')
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 4/29
10/19/21, 12:31 AM tugas-bank-campaign (1).ipynb - Colaboratory
Age
0.04
0.03
0.02
0.01
fig = plt.figure()
res = stats.probplot(data['age'], plot=plt)
plt.show()
20 40 60 80 100
from scipy.stats import shapiro
import scipy.stats as stats
shapiro([data['age']])
(0.9605178833007812, 0.0)
Karena p value < 0.05 (significance level), hipotesis null ditolak dan dapat disimpulkan fitur tersebut memiliki distribusi tidak normal
2. Balance
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 5/29
10/19/21, 12:31 AM tugas-bank-campaign (1).ipynb - Colaboratory
configure_plotly_browser_state()
fig = go.Figure()
fig.add_trace(go.Box(y=data['balance'], name='Balance',
marker_color = 'rgb(0, 0, 100)'))
fig.show()
Q1 = data['balance'].quantile(0.25)
Q3 = data['balance'].quantile(0.75)
IQR = Q3 - Q1 #IQR is interquartile range.
filter = (data['balance'] >= Q1 - 1.5 * IQR) & (data['balance'] <= Q3 + 1.5 *IQR)
df1=data.loc[filter]
100k
80k
60k
40k
20k
Balance
configure_plotly_browser_state()
fig = ff.create_distplot([df1['balance']],['balance'],bin_size=5,colors=['rgb(0, 0, 100)'])
iplot(fig, filename='Basic Distplot')
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 6/29
10/19/21, 12:31 AM tugas-bank-campaign (1).ipynb - Colaboratory
balance
0.015
0.01
0.005
configure_plotly_browser_state()
fig = plt.figure()
from scipy.stats import shapiro
import scipy.stats as stats
ntA = shapiro(df1['balance'])
ntA
(0.866013765335083, 0.0)
Karena p value < 0.05 (significance level), hipotesis null ditolak dan dapat disimpulkan fitur tersebut memiliki distribusi tidak normal
from scipy.stats import anderson
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 7/29
10/19/21, 12:31 AM tugas-bank-campaign (1).ipynb - Colaboratory
result = anderson(df1['balance'])
print('Statistic: %.3f' % result.statistic)
p = 0
for i in range(len(result.critical_values)):
sl, cv = result.significance_level[i], result.critical_values[i]
if result.statistic < result.critical_values[i]:
print('%.3f: %.3f, data normal (tidak menolak H0)' % (sl, cv))
else:
print('%.3f: %.3f, data tidak normal (menolak H0)' % (sl, cv))
Statistic: 2010.508
3. Day
configure_plotly_browser_state()
fig = go.Figure()
fig.add_trace(go.Box(y=data['day'], name='Day',
marker_color = 'rgb(0, 0, 100)'))
fig.show()
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 8/29
10/19/21, 12:31 AM tugas-bank-campaign (1).ipynb - Colaboratory
configure_plotly_browser_state()
fig = ff.create_distplot([data['day']],['day'],bin_size=5,colors=['rgb(0, 0, 100)'])
iplot(fig, filename='Basic Distplot')
30
25
0.05
day
20
0.04
15
0.03
0.02
10
0.01
5
0
0
Day
0 10 20 30
fig = plt.figure()
res = stats.probplot(data['day'], plot=plt)
plt.show()
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 9/29
10/19/21, 12:31 AM tugas-bank-campaign (1).ipynb - Colaboratory
from scipy.stats import shapiro
import scipy.stats as stats
ntA = shapiro(data['day'])
ntA
(0.9595543146133423, 0.0)
Karena p value < 0.05 (significance level), hipotesis null ditolak dan dapat disimpulkan fitur tersebut memiliki distribusi tidak normal
4. Duration
configure_plotly_browser_state()
fig = go.Figure()
fig.add_trace(go.Box(y=data['duration'], name='Duration',
marker_color = 'rgb(0, 0, 100)'))
fig.show()
5000
4000
3000
2000
1000
Duration
configure_plotly_browser_state()
fig = ff.create_distplot([data['duration']],['duration'],bin_size=5,colors=['rgb(0, 0, 100)'])
iplot(fig, filename='Basic Distplot')
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 10/29
10/19/21, 12:31 AM tugas-bank-campaign (1).ipynb - Colaboratory
duration
0.003
0.002
0.001
configure_plotly_browser_state()
fig = plt.figure()
res = stats.probplot(data['duration'], plot=plt)
plt.show()
from scipy.stats import shapiro
import scipy.stats as stats
ntA = shapiro(data['duration'])
ntA
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 11/29
10/19/21, 12:31 AM tugas-bank-campaign (1).ipynb - Colaboratory
(0.7269970774650574, 0.0)
Karena p value < 0.05 (significance level), hipotesis null ditolak dan dapat disimpulkan fitur tersebut memiliki distribusi tidak normal
5. Campaign
configure_plotly_browser_state()
fig = go.Figure()
fig.add_trace(go.Box(y=data['campaign'], name='Campaign',
marker_color = 'rgb(0, 0, 100)'))
fig.show()
60
50
40
30
20
10
0
Campaign
configure_plotly_browser_state()
fig = ff.create_distplot([data['campaign']],['campaign'],bin_size=5,colors=['rgb(0, 0, 100)'])
iplot(fig, filename='Basic Distplot')
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 12/29
10/19/21, 12:31 AM tugas-bank-campaign (1).ipynb - Colaboratory
campaign
0.4
0.3
0.2
0.1
fig = plt.figure()
res = stats.probplot(data['campaign'], plot=plt)
plt.show()
0 20 40 60
from scipy.stats import shapiro
import scipy.stats as stats
ntA = shapiro(data['campaign'])
ntA
(0.5507382750511169, 0.0)
Karena p value < 0.05 (significance level), hipotesis null ditolak dan dapat disimpulkan fitur tersebut memiliki distribusi tidak normal
6. Pdays
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 13/29
10/19/21, 12:31 AM tugas-bank-campaign (1).ipynb - Colaboratory
configure_plotly_browser_state()
fig = go.Figure()
fig.add_trace(go.Box(y=data['pdays'], name='Pdays',
marker_color = 'rgb(0, 0, 100)'))
fig.show()
800
600
400
200
Pdays
configure_plotly_browser_state()
fig = ff.create_distplot([data['pdays']],['pdays'],bin_size=5,colors=['rgb(0, 0, 100)'])
iplot(fig, filename='Basic Distplot')
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 14/29
10/19/21, 12:31 AM tugas-bank-campaign (1).ipynb - Colaboratory
pdays
0.15
0.1
0.05
fig = plt.figure()
res = stats.probplot(data['pdays'], plot=plt)
0
plt.show()
from scipy.stats import shapiro
import scipy.stats as stats
ntA = shapiro(data['pdays'])
ntA
(0.47478705644607544, 0.0)
Karena p value < 0.05 (significance level), hipotesis null ditolak dan dapat disimpulkan fitur tersebut memiliki distribusi tidak normal
7. Previous
configure_plotly_browser_state()
fig = go.Figure()
fig.add_trace(go.Box(y=data['previous'], name='Previous',
marker_color = 'rgb(0, 0, 100)'))
fig.show()
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 15/29
10/19/21, 12:31 AM tugas-bank-campaign (1).ipynb - Colaboratory
250
200
150
100
50
Previous
configure_plotly_browser_state()
fig = ff.create_distplot([data['previous']],['previous'],bin_size=5,colors=['rgb(0, 0, 100)'])
iplot(fig, filename='Basic Distplot')
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 16/29
10/19/21, 12:31 AM tugas-bank-campaign (1).ipynb - Colaboratory
previous
1
configure_plotly_browser_state()
fig = plt.figure()
res = stats.probplot(data['previous'], plot=plt)
plt.show()
0.5
from scipy.stats import shapiro
import scipy.stats as stats
ntA = shapiro(data['previous'])
ntA
(0.23559075593948364, 0.0)
Karena p value < 0.05 (significance level), hipotesis null ditolak dan dapat disimpulkan fitur tersebut memiliki distribusi tidak normal
def calculateCrosstabulation(catVariable, targetCatVariable=data.y):
# Menghitung cross tabulation dalam absolut dan nilai relatif
absCount = pd.crosstab(index = catVariable, columns = targetCatVariable)\
.rename(columns={0:"no",1:"yes"})
relCount = pd.crosstab(index = catVariable, columns = targetCatVariable, normalize="index")\
.rename(columns={0:"no",1:"yes"})*100
relCount = relCount.round(1)
# Gambar 2 subplot bar chart
fig=make_subplots(
rows=2,
cols=1,
vertical_spacing=0.3,
subplot_titles=(f"Jumlah Absolut dari kategori fitur Y: yes dan no berdasarkan {catVariable.name}",
f"Jumlah Persentase dari kategori fitur Y: yes dan no berdasarkan {catVariable.name}"),
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 17/29
10/19/21, 12:31 AM tugas-bank-campaign (1).ipynb - Colaboratory
print_grid=False)
# Menambahkan trace pada frekuensi absolut
for col in absCount.columns:
fig.add_trace(go.Bar(x=absCount.index,
y=absCount[col],
text=absCount[col],
hoverinfo="x+y",
textposition="auto",
name=f"{col}",
textfont=dict(family="sans serif",size=14),
),
row=1,
col=1
)
# Menambah trace pada frekuensi relatif
for col in relCount.columns:
fig.add_trace(go.Bar(x=relCount.index,
y=relCount[col],
text=relCount[col],
hoverinfo="x+y",
textposition="auto",
name=f"{col}",
textfont=dict(family="sans serif",size=14),
),
row=2,
col=1
)
# Update layout. Menambahkan judul, dimensi, dan warna background
fig.layout.update(
height=600,
width=1000,
hovermode="closest",
barmode = "group",
paper_bgcolor="rgb(243, 243, 243)",
plot_bgcolor="rgb(243, 243, 243)"
)
# set judul axis y menjadi bold
fig.layout.yaxis1.update(title="<b>Abs Frequency</b>")
fig.layout.yaxis2.update(title="<b>Rel Frequency(%)</b>")
# set judul axis x menjadi bold
fig.layout.xaxis2.update(title=f"<b>{catVariable.name}</b>")
return fig.show()
def calculateChiSquare(catVariable, targetCatVariable=data.y):
catGroupedByCatTarget = pd.crosstab(index = catVariable, columns = targetCatVariable)
testResult = stats.chi2_contingency(catGroupedByCatTarget)
print(f"Chi Square Test Result between {targetCatVariable.name} & {catVariable.name}:")
return print(testResult)
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 18/29
10/19/21, 12:31 AM tugas-bank-campaign (1).ipynb - Colaboratory
Chi-square Test: Uji independensi Chi-square menguji apakah ada hubungan yang signifikan antara dua variabel
kategori. Data biasanya ditampilkan dalam format tabulasi silang dengan setiap baris mewakili kategori untuk satu
variabel dan setiap kolom mewakili kategori untuk variabel lain. Uji independensi chi-kuadrat merupakan uji
omnibus, yaitu menguji data secara keseluruhan. Ini berarti bahwa seseorang tidak akan dapat membedakan level
(kategori) variabel mana yang bertanggung jawab atas hubungan tersebut jika tabel Chi-kuadrat lebih besar dari
2×2. Jika pengujian lebih besar dari 2x2, pengujian tersebut memerlukan pengujian post hoc.
Jika nilai p signifikan (kurang dari 0,05), Kita dapat menolak hipotesis nol dan mengklaim bahwa temuan
mendukung hipotesis alternatif. Sementara kita memeriksa hasil uji chi2, kita juga perlu memeriksa apakah
frekuensi sel yang diharapkan lebih besar dari atau sama dengan 5. Jika sebuah sel memiliki frekuensi yang
diharapkan kurang dari 5, maka uji Fisher's Exact harus digunakan untuk mengatasi hal ini.
1. Job Vs Y
configure_plotly_browser_state()
calculateCrosstabulation(data.job)
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 19/29
10/19/21, 12:31 AM tugas-bank-campaign (1).ipynb - Colaboratory
/usr/local/lib/python3.7/dist-packages/plotly/tools.py:465: DeprecationWarning:
9024
Abs Frequency
8000
8157
yes
6757
6000 no
yes
4000
calculateChiSquare(data.job)
4540
3785
2000 1131
Chi Square Test 631
Result between
708 y & job:
840 1101
1364 123 109 1301
1748 516 1392 187 369 669 269 202 254 34
(836.1054877471965,
0 3.337121944935502e-172, 11, array([[4566.0715755 , 604.9284245 ],
ad
[8593.5038818 blu
, 1138.4961182 en ],
ho ma re se se stu tec un un
mi e- tre us na tir lf- rv de hn em kn
n. co pr em g ed em ice n i c p ow
[1313.04359559, 173.95640441],
lla en a e m p l s t i a l oy n
r eu id en oy n e
ed d
[1094.93884232, 145.06115768],
r t
[8351.55771825, 1106.44228175],
[1999.14640242, 264.85359758],
[1394.28099356, 184.71900644],
Rel Frequency(%)
[ 828.2682533 , 109.7317467 ],
92.7
91.7
91.2
91.1
88.9
88.2
88.2
87.8
80
[6708.26643958, 888.73356042],
86.2
84.5
77.2
[1150.56879963, 152.43120037],
71.3
[ 60
254.30837628, 33.69162372]]))
40
28.7
20
Nilai pertama (836.105)
12.2 adalah nilai
8.3 Chi-kuadrat,
8.8 diikuti oleh nilai-p
11.8 (3.337e-172),
8.9 kemudian
11.1 muncul derajat
11.8
22.8
7.3
15.5 13.8
0
kebebasan (11), dan
ad
m terakhir
blu
e
mengeluarkan
en
tr
ho
us frekuensi
ma
n
yang
re
tir diharapkan
se
lf- sebagai
se
rv
sarray.
tu
d
Karena
tec
h
semua
un
em frekuensi
un
kn
in. -co ep em ag ed em ice en nic plo ow
me lla re a e p s t ia n
ne ye
yang diharapkan lebih besar dari
r 5, hasilur uji chi2
id
dapat ndipercaya.
t Kitaloydapat
ed
n
menolak hipotesis nol karena
d nilai p
kurang dari 0,05 (sebenarnya nilai p hampir 0). Dengan demikian,
job hasilnya menunjukkan bahwa ada hubungan
yang signifikan secara statistik
2. Marital Vs Y
configure_plotly_browser_state()
calculateCrosstabulation(data.marital)
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 20/29
10/19/21, 12:31 AM tugas-bank-campaign (1).ipynb - Colaboratory
/usr/local/lib/python3.7/dist-packages/plotly/tools.py:465: DeprecationWarning:
20k yes
no
yes
10k
10878
2755 1912
4585 622
0
divorced married single
80 88.1 89.9
85.1
60
calculateChiSquare(data.marital)
40
marital
Nilai pertama (196.49) adalah nilai Chi-kuadrat, diikuti oleh nilai-p (2.1450e-43), kemudian muncul derajat
kebebasan (2), dan terakhir mengeluarkan frekuensi yang diharapkan sebagai array. Karena semua frekuensi yang
diharapkan lebih besar dari 5, hasil uji chi2 dapat dipercaya. Kita dapat menolak hipotesis nol karena nilai p kurang
dari 0,05 (sebenarnya nilai p hampir 0). Dengan demikian, hasilnya menunjukkan bahwa ada hubungan yang
signifikan secara statistik
3. Education Vs Y
configure_plotly_browser_state()
calculateCrosstabulation(data.education)
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 21/29
10/19/21, 12:31 AM tugas-bank-campaign (1).ipynb - Colaboratory
/usr/local/lib/python3.7/dist-packages/plotly/tools.py:465: DeprecationWarning:
yes
15k
no
yes
10k 11305
80
91.4 89.4 85 86.4
60
40
20 10.6 13.6
8.6
15
0
primary secondary tertiary unknown
education
calculateChiSquare(data.education)
[20487.71856407, 2714.28143593],
[11744.98511424, 1556.01488576],
[ 1639.75921789, 217.24078211]]))
Karena semua frekuensi yang diharapkan lebih besar dari 5, hasil uji chi2 dapat dipercaya. Kita dapat menolak
hipotesis nol karena nilai p kurang dari 0,05 (sebenarnya nilai p hampir 0). Dengan demikian, hasilnya menunjukkan
bahwa ada hubungan yang signifikan secara statistik
4. Default Vs Y
configure_plotly_browser_state()
calculateCrosstabulation(data.default)
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 22/29
10/19/21, 12:31 AM tugas-bank-campaign (1).ipynb - Colaboratory
/usr/local/lib/python3.7/dist-packages/plotly/tools.py:465: DeprecationWarning:
yes
30k
no
20k yes
10k
5237
763 52
0
no yes
93.6
80 88.2
60
40
20 11.8
6.4
0
no yes
default
calculateChiSquare(data.default)
[ 719.65738426, 95.34261574]]))
Karena semua frekuensi yang diharapkan lebih besar dari 5, hasil uji chi2 dapat dipercaya. Kita dapat menolak
hipotesis nol karena nilai p kurang dari 0,05 (sebenarnya nilai p hampir 0). Dengan demikian, hasilnya menunjukkan
bahwa ada hubungan yang signifikan secara statistik
5. Housing Vs Y
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 23/29
10/19/21, 12:31 AM tugas-bank-campaign (1).ipynb - Colaboratory
configure_plotly_browser_state()
calculateCrosstabulation(data.housing)
/usr/local/lib/python3.7/dist-packages/plotly/tools.py:465: DeprecationWarning:
20k
yes
15k 16727 no
yes
10k
5k 3354
1935
0
no yes
92.3
80
83.3
60
40
20
7.7
16.7
0
no yes
housing
calculateChiSquare(data.housing)
[22190.17186083, 2939.82813917]]))
Karena semua frekuensi yang diharapkan lebih besar dari 5, hasil uji chi2 dapat dipercaya. Kita dapat menolak
hipotesis nol karena nilai p kurang dari 0,05 (sebenarnya nilai p hampir 0). Dengan demikian, hasilnya menunjukkan
bahwa ada hubungan yang signifikan secara statistik
6. Loan Vs Y
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 24/29
10/19/21, 12:31 AM tugas-bank-campaign (1).ipynb - Colaboratory
configure_plotly_browser_state()
calculateCrosstabulation(data.loan)
/usr/local/lib/python3.7/dist-packages/plotly/tools.py:465: DeprecationWarning:
yes
no
20k
yes
10k
4805
6760 484
0
no yes
93.3
80 87.3
60
40
20 12.7
6.7
0
no yes
loan
calculateChiSquare(data.loan)
[ 6396.5620756, 847.4379244]]))
Karena semua frekuensi yang diharapkan lebih besar dari 5, hasil uji chi2 dapat dipercaya. Kita dapat menolak
hipotesis nol karena nilai p kurang dari 0,05 (sebenarnya nilai p hampir 0). Dengan demikian, hasilnya menunjukkan
bahwa ada hubungan yang signifikan secara statistik
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 25/29
10/19/21, 12:31 AM tugas-bank-campaign (1).ipynb - Colaboratory
7. Contact Vs Y
configure_plotly_browser_state()
calculateCrosstabulation(data.contact)
/usr/local/lib/python3.7/dist-packages/plotly/tools.py:465: DeprecationWarning:
20k yes
no
yes
10k 12490
2516
4369 390 530
0
cellular telephone unknown
95.9
85.1 86.6
50
13.4
14.9 4.1
0
cellular telephone unknown
contact
calculateChiSquare(data.contact)
[ 2566.04215788, 339.95784212],
[11496.85784433, 1523.14215567]]))
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 26/29
10/19/21, 12:31 AM tugas-bank-campaign (1).ipynb - Colaboratory
Karena semua frekuensi yang diharapkan lebih besar dari 5, hasil uji chi2 dapat dipercaya. Kita dapat menolak
hipotesis nol karena nilai p kurang dari 0,05 (sebenarnya nilai p hampir 0). Dengan demikian, hasilnya menunjukkan
bahwa ada hubungan yang signifikan secara statistik
8. Month Vs Y
configure_plotly_browser_state()
calculateCrosstabulation(data.month)
/usr/local/lib/python3.7/dist-packages/plotly/tools.py:465: DeprecationWarning:
12841
Abs Frequency
10k yes
no
yes
6268
5k
5559
4795
3567
688 1261 627 925
2355 577 114 100 2208 441 142 546 229 248 403 415 323 310 269
0
apr aug dec feb jan jul jun mar may nov oct sep
93.3
90.9
89.9
89.8
89.8
80 89
83.4
80.3
60
56.2
53.5
53.3
40 48 52
46.7
46.5
43.8
20 11 10.1 10.2 10.2
9.1 6.7
19.7
16.6
0
apr aug dec feb jan jul jun mar may nov oct sep
month
calculateChiSquare(data.month)
[ 5516.19592577, 730.80407423],
[ 188.96525182, 25.03474818],
[ 2339.10725266, 309.89274734],
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 27/29
10/19/21, 12:31 AM tugas-bank-campaign (1).ipynb - Colaboratory
[ 1238.87031917, 164.12968083],
[ 6088.3897724 , 806.6102276 ],
[ 4716.18415872, 624.81584128],
[ 421.19824821, 55.80175179],
[12155.58718011, 1610.41281989],
[ 3505.57032581, 464.42967419],
[ 651.66521422, 86.33478578],
[ 511.26579815, 67.73420185]]))
Karena semua frekuensi yang diharapkan lebih besar dari 5, hasil uji chi2 dapat dipercaya. Kita dapat menolak
hipotesis nol karena nilai p kurang dari 0,05 (sebenarnya nilai p sama dengan 0). Dengan demikian, hasilnya
menunjukkan bahwa ada hubungan yang signifikan secara statistik
9. Poutcome Vs Y
configure_plotly_browser_state()
calculateCrosstabulation(data.poutcome)
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 28/29
10/19/21, 12:31 AM tugas-bank-campaign (1).ipynb - Colaboratory
/usr/local/lib/python3.7/dist-packages/plotly/tools.py:465: DeprecationWarning:
calculateChiSquare(data.poutcome)
[ 1624.74795957, Jumlah Absolut dari kategori fitur Y: yes dan no berdasarkan poutcome
215.25204043],
[ 1334.23596028, 176.76403972],
33573 no
[32635.35860742,
30k 4323.64139258]]))
Abs Frequency
yes
no
20k
yes
Karena semua frekuensi yang diharapkan lebih besar dari 5, hasil uji chi2 dapat dipercaya. Kita dapat menolak
10k
hipotesis nol karena nilai p kurang dari 0,05 (sebenarnya nilai p sama dengan 0). Dengan demikian,3386
4283 hasilnya
618 1533 307 533 978
menunjukkan
0 bahwa ada hubungan yang signifikan secara statistik
failure other success unknown
KESIMPULAN: Berdasarkan analisa di atas, maka user bisa atau tidak nya membuka
akun berelasi kuat dengan fitur fitur
Jumlah Persentase daricategorical
kategori fitur Y: (melalui uji Chi-Square
yes dan no berdasarkan Test) dan tidak
poutcome
Rel Frequency(%)
90.8
memiliki 80keterkaitan
87.4
kuat dengan83.3variabel kontinu (Saphiro Wilk Test)
60 64.7
40
35.3
20 12.6 9.2
16.7
0
failure other success unknown
poutcome
https://colab.research.google.com/drive/1mu7DZCLMXk6k8zsDdbtCwM_AfyFlDvV4#scrollTo=e5NgJM_5RGLk&printMode=true 29/29