HW2 Applied Questions: 1 Problem 6

Download as pdf or txt
Download as pdf or txt
You are on page 1of 24

HW2 Applied Questions

魏羿晖 2020200732

1 Problem 6

1.1 a

setwd("D:/files/study/ISL")
auto = read.csv("Auto.csv", header = T, na.strings = "?")
auto = na.omit(auto)

library(MASS)
library(ISLR)
lm.fit = lm(mpg~horsepower, data = auto)
summary(lm.fit)

##
## Call:
## lm(formula = mpg ~ horsepower, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.5710 -3.2592 -0.3435 2.7630 16.9240
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.935861 0.717499 55.66 <2e-16 ***

1
1 PROBLEM 6 2

## horsepower -0.157845 0.006446 -24.49 <2e-16 ***


## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.906 on 390 degrees of freedom
## Multiple R-squared: 0.6059, Adjusted R-squared: 0.6049
## F-statistic: 599.7 on 1 and 390 DF, p-value: < 2.2e-16
predict(lm.fit, data.frame(horsepower=c(98)), interval="confidence")

## fit lwr upr


## 1 24.46708 23.97308 24.96108
predict(lm.fit, data.frame(horsepower=c(98)), interval="prediction")

## fit lwr upr


## 1 24.46708 14.8094 34.12476

回归结果中 horsepower 的系数为负,表明预测变量与响应变量之间存在着


负相关关系。相对应的 F 统计量远大于 1,p 值几乎为零,表明预测变量与
响应变量之间的相关关系十分显著。R 方为 0.6059,表示以 horsepower 作
为预测变量的线性回归能够解释 mpg 的 60.59% 的变异性。horsepower=98
时的预测值、置信区间与预测区间见输出结果。

1.2 b

plot(auto$horsepower, auto$mpg)
abline(lm.fit)
1 PROBLEM 6 3

40
auto$mpg

30
20
10

50 100 150 200

auto$horsepower

1.3 c

par(mfrow = c(2,2))
plot(lm.fit)
2 PROBLEM 7 4

Standardized residuals
Residuals vs Fitted Normal Q−Q

0 2 4
323 330323
15
Residuals

334 330 334


−15 0

−3
5 10 15 20 25 30 −3 −2 −1 0 1 2 3

Fitted values Theoretical Quantiles


Standardized residuals

Standardized residuals
Scale−Location Residuals vs Leverage
323
330
334
117
14
9

2
1.0

−2
Cook's distance
0.0

5 10 15 20 25 30 0.000 0.010 0.020 0.030

Fitted values Leverage

残差图的拟合线为 U 型,可以看出残差与预测值存在相关关系,表明数据
具有非线性。Scale-Location 图表明残差项可能有着非恒定的方差。

2 Problem 7

2.1 a

auto$name = as.factor(auto$name)
pairs(auto)
2 PROBLEM 7 5

3 5 7 50 200 10 20 1.0 2.5

mpg

10
7

cylinders
3

displacement

100
horsepower
50

weight

1500
acceleration
10

70 82
year

origin
1.0

0 300
name

10 30 100 400 1500 4500 70 76 82 0 150

2.2 b

cor(subset(auto, select = -name))

## mpg cylinders displacement horsepower weight


## mpg 1.0000000 -0.7776175 -0.8051269 -0.7784268 -0.8322442
## cylinders -0.7776175 1.0000000 0.9508233 0.8429834 0.8975273
## displacement -0.8051269 0.9508233 1.0000000 0.8972570 0.9329944
## horsepower -0.7784268 0.8429834 0.8972570 1.0000000 0.8645377
## weight -0.8322442 0.8975273 0.9329944 0.8645377 1.0000000
## acceleration 0.4233285 -0.5046834 -0.5438005 -0.6891955 -0.4168392
## year 0.5805410 -0.3456474 -0.3698552 -0.4163615 -0.3091199
## origin 0.5652088 -0.5689316 -0.6145351 -0.4551715 -0.5850054
## acceleration year origin
## mpg 0.4233285 0.5805410 0.5652088
## cylinders -0.5046834 -0.3456474 -0.5689316
## displacement -0.5438005 -0.3698552 -0.6145351
2 PROBLEM 7 6

## horsepower -0.6891955 -0.4163615 -0.4551715


## weight -0.4168392 -0.3091199 -0.5850054
## acceleration 1.0000000 0.2903161 0.2127458
## year 0.2903161 1.0000000 0.1815277
## origin 0.2127458 0.1815277 1.0000000

2.3 c

lm.fit1 = lm(mpg~.-name, data = auto)


summary(lm.fit1)

##
## Call:
## lm(formula = mpg ~ . - name, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.5903 -2.1565 -0.1169 1.8690 13.0604
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17.218435 4.644294 -3.707 0.00024 ***
## cylinders -0.493376 0.323282 -1.526 0.12780
## displacement 0.019896 0.007515 2.647 0.00844 **
## horsepower -0.016951 0.013787 -1.230 0.21963
## weight -0.006474 0.000652 -9.929 < 2e-16 ***
## acceleration 0.080576 0.098845 0.815 0.41548
## year 0.750773 0.050973 14.729 < 2e-16 ***
## origin 1.426141 0.278136 5.127 4.67e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.328 on 384 degrees of freedom
2 PROBLEM 7 7

## Multiple R-squared: 0.8215, Adjusted R-squared: 0.8182


## F-statistic: 252.4 on 7 and 384 DF, p-value: < 2.2e-16

F 统计量远大于 1,表示拒绝所有回归系数均为 0 的原假设,因此预测变量


与响应变量之间存在相关关系。displacement,weight,year 和 origin 与响
应变量有显著关系。year 的回归系数为 0.750773,表示控制其余变量,车龄
每增加一年,油耗会增加大约 0.75mpg。

2.4 d

par(mfrow = c(2,2))
plot(lm.fit1) Standardized residuals

Residuals vs Fitted Normal Q−Q


15

3323
327 327323
Residuals

26 326
2
5

−2
−10

10 15 20 25 30 35 −3 −2 −1 0 1 2 3

Fitted values Theoretical Quantiles


Standardized residuals

Standardized residuals

Scale−Location Residuals vs Leverage


2.0

3323
32726 327 0.5
394
2
1.0

−2

14
Cook's distance
0.0

10 15 20 25 30 35 0.00 0.05 0.10 0.15

Fitted values Leverage

plot(predict(lm.fit1), rstudent(lm.fit1))
2 PROBLEM 7 8

rstudent(lm.fit1)

3
0
−3

10 15 20 25 30 35

predict(lm.fit1)

残差图的拟合线为 U 型,可以看出残差与预测值仍存在一定的相关关系。
观测点 14 为高杠杆点。存在异常大的离群点,因为一部分点的学生化残差
大于 3。

2.5 e

lm.fit2 = lm(mpg~cylinders*displacement, data = auto)


lm.fit3 = lm(mpg~weight + displacement + displacement:weight, data = auto)
summary(lm.fit2)

##
## Call:
## lm(formula = mpg ~ cylinders * displacement, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.0432 -2.4308 -0.2263 2.2048 20.9051
##
2 PROBLEM 7 9

## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 48.22040 2.34712 20.545 < 2e-16 ***
## cylinders -2.41838 0.53456 -4.524 8.08e-06 ***
## displacement -0.13436 0.01615 -8.321 1.50e-15 ***
## cylinders:displacement 0.01182 0.00207 5.711 2.24e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.454 on 388 degrees of freedom
## Multiple R-squared: 0.6769, Adjusted R-squared: 0.6744
## F-statistic: 271 on 3 and 388 DF, p-value: < 2.2e-16
summary(lm.fit3)

##
## Call:
## lm(formula = mpg ~ weight + displacement + displacement:weight,
## data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.8664 -2.4801 -0.3355 1.8071 17.9429
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.372e+01 1.940e+00 27.697 < 2e-16 ***
## weight -8.931e-03 8.474e-04 -10.539 < 2e-16 ***
## displacement -7.831e-02 1.131e-02 -6.922 1.85e-11 ***
## weight:displacement 1.744e-05 2.789e-06 6.253 1.06e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.097 on 388 degrees of freedom
2 PROBLEM 7 10

## Multiple R-squared: 0.7265, Adjusted R-squared: 0.7244


## F-statistic: 343.6 on 3 and 388 DF, p-value: < 2.2e-16

选取相关系数最大的两组变量分别进行有交互项的线性回归,均存在统计
显著的交互作用。

2.6 f

lm.fit4 = lm(mpg~log(displacement) + sqrt(weight) + year + I(year^2), data = auto)


summary(lm.fit4)

##
## Call:
## lm(formula = mpg ~ log(displacement) + sqrt(weight) + year +
## I(year^2), data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.9700 -1.8228 -0.0033 1.6218 13.5212
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 434.93957 75.22099 5.782 1.52e-08 ***
## log(displacement) -3.42454 0.90388 -3.789 0.000176 ***
## sqrt(weight) -0.51231 0.06243 -8.206 3.43e-15 ***
## year -10.40999 1.97815 -5.262 2.36e-07 ***
## I(year^2) 0.07339 0.01301 5.641 3.26e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.123 on 387 degrees of freedom
## Multiple R-squared: 0.8416, Adjusted R-squared: 0.8399
## F-statistic: 513.9 on 4 and 387 DF, p-value: < 2.2e-16
3 PROBLEM 8 11

par(mfrow=c(2,2))
plot(lm.fit4)

Standardized residuals
Residuals vs Fitted Normal Q−Q
5 15

323 245323
Residuals

245
327 327

2
−2
−10

10 15 20 25 30 35 −3 −2 −1 0 1 2 3

Fitted values Theoretical Quantiles


Standardized residuals

Standardized residuals
Scale−Location Residuals vs Leverage
0.0 1.0 2.0

245323
327 4
0 387

Cook's
112distance
335
−4

10 15 20 25 30 35 0.00 0.02 0.04 0.06

Fitted values Leverage

选取 lm.fit1 中统计显著的几个预测变量,进行不同变换后的回归结果仍是
统计显著的。残差图中的点几乎没有规律,表明模型对数据的拟合度较好。
杠杆图中点的杠杆值均较小。但 Normal Q-Q 图则表示残差可能不是正态
分布的。

3 Problem 8

3.1 a

summary(Weekly)

## Year Lag1 Lag2 Lag3


## Min. :1990 Min. :-18.1950 Min. :-18.1950 Min. :-18.1950
## 1st Qu.:1995 1st Qu.: -1.1540 1st Qu.: -1.1540 1st Qu.: -1.1580
## Median :2000 Median : 0.2410 Median : 0.2410 Median : 0.2410
3 PROBLEM 8 12

## Mean :2000 Mean : 0.1506 Mean : 0.1511 Mean : 0.1472


## 3rd Qu.:2005 3rd Qu.: 1.4050 3rd Qu.: 1.4090 3rd Qu.: 1.4090
## Max. :2010 Max. : 12.0260 Max. : 12.0260 Max. : 12.0260
## Lag4 Lag5 Volume Today
## Min. :-18.1950 Min. :-18.1950 Min. :0.08747 Min. :-18.1950
## 1st Qu.: -1.1580 1st Qu.: -1.1660 1st Qu.:0.33202 1st Qu.: -1.1540
## Median : 0.2380 Median : 0.2340 Median :1.00268 Median : 0.2410
## Mean : 0.1458 Mean : 0.1399 Mean :1.57462 Mean : 0.1499
## 3rd Qu.: 1.4090 3rd Qu.: 1.4050 3rd Qu.:2.05373 3rd Qu.: 1.4050
## Max. : 12.0260 Max. : 12.0260 Max. :9.32821 Max. : 12.0260
## Direction
## Down:484
## Up :605
##
##
##
##
cor(Weekly[,-9])

## Year Lag1 Lag2 Lag3 Lag4


## Year 1.00000000 -0.032289274 -0.03339001 -0.03000649 -0.031127923
## Lag1 -0.03228927 1.000000000 -0.07485305 0.05863568 -0.071273876
## Lag2 -0.03339001 -0.074853051 1.00000000 -0.07572091 0.058381535
## Lag3 -0.03000649 0.058635682 -0.07572091 1.00000000 -0.075395865
## Lag4 -0.03112792 -0.071273876 0.05838153 -0.07539587 1.000000000
## Lag5 -0.03051910 -0.008183096 -0.07249948 0.06065717 -0.075675027
## Volume 0.84194162 -0.064951313 -0.08551314 -0.06928771 -0.061074617
## Today -0.03245989 -0.075031842 0.05916672 -0.07124364 -0.007825873
## Lag5 Volume Today
## Year -0.030519101 0.84194162 -0.032459894
## Lag1 -0.008183096 -0.06495131 -0.075031842
## Lag2 -0.072499482 -0.08551314 0.059166717
## Lag3 0.060657175 -0.06928771 -0.071243639
3 PROBLEM 8 13

## Lag4 -0.075675027 -0.06107462 -0.007825873


## Lag5 1.000000000 -0.05851741 0.011012698
## Volume -0.058517414 1.00000000 -0.033077783
## Today 0.011012698 -0.03307778 1.000000000
pairs(Weekly[,-9])

−15 0 −15 0 −15 0 −15 0

Year

1990
Lag1
−15

Lag2

−15
Lag3
−15

Lag4

−15
Lag5
−15

6
Volume

0
Today
−15

1990 2005 −15 0 −15 0 0 4 8

相关系数矩阵和散点图表示,大部分预测变量之间的相关系数接近于 0,
前几天的投资回报率与当天的回报率相关性很小。唯一的特例是 year 和
volume。可以看到 volume 随着 year 的增加逐渐增长。

3.2 b

glm.fit = glm(Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume, data = Weekly, family = binomi


summary(glm.fit)

##
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 +
3 PROBLEM 8 14

## Volume, family = binomial, data = Weekly)


##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6949 -1.2565 0.9913 1.0849 1.4579
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.26686 0.08593 3.106 0.0019 **
## Lag1 -0.04127 0.02641 -1.563 0.1181
## Lag2 0.05844 0.02686 2.175 0.0296 *
## Lag3 -0.01606 0.02666 -0.602 0.5469
## Lag4 -0.02779 0.02646 -1.050 0.2937
## Lag5 -0.01447 0.02638 -0.549 0.5833
## Volume -0.02274 0.03690 -0.616 0.5377
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1496.2 on 1088 degrees of freedom
## Residual deviance: 1486.4 on 1082 degrees of freedom
## AIC: 1500.4
##
## Number of Fisher Scoring iterations: 4

Lag2 为统计显著的预测变量,其 p 值为 0.0296。

3.3 c

glm.probs = predict(glm.fit, type = "response")


glm.pred = rep("Down",length(glm.probs))
glm.pred[glm.probs > .5] = "Up"
3 PROBLEM 8 15

table(glm.pred, Weekly$Direction)

##
## glm.pred Down Up
## Down 54 48
## Up 430 557
mean(glm.pred == Weekly$Direction)

## [1] 0.5610652

当模型预测市场下跌时,有 54/(48+54)=52.9% 的概率预测正确,当模型预


测市场上涨时,有 557/(557+430)=56.4% 的概率预测正确。整体预测准确
率约为 56.11%。

3.4 d 逻辑斯蒂回归

tr = (Weekly$Year < 2009)


te = Weekly[!tr,]
glm.fit1 = glm(Direction~Lag2, data = Weekly, family = binomial, subset = tr)
summary(glm.fit1)

##
## Call:
## glm(formula = Direction ~ Lag2, family = binomial, data = Weekly,
## subset = tr)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.536 -1.264 1.021 1.091 1.368
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.20326 0.06428 3.162 0.00157 **
3 PROBLEM 8 16

## Lag2 0.05810 0.02870 2.024 0.04298 *


## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1354.7 on 984 degrees of freedom
## Residual deviance: 1350.5 on 983 degrees of freedom
## AIC: 1354.5
##
## Number of Fisher Scoring iterations: 4
glm.probs1 = predict(glm.fit1, te, type = "response")
glm.pred1 = rep("Down",length(glm.probs1))
glm.pred1[glm.probs1 > .5] = "Up"
table(glm.pred1, Weekly$Direction[!tr])

##
## glm.pred1 Down Up
## Down 9 5
## Up 34 56
mean(glm.pred1 == Weekly$Direction[!tr])

## [1] 0.625

3.5 e LDA

lda.fit = lda(Direction~Lag2, data = Weekly, subset = tr)


summary(lda.fit)

## Length Class Mode


## prior 2 -none- numeric
## counts 2 -none- numeric
## means 2 -none- numeric
3 PROBLEM 8 17

## scaling 1 -none- numeric


## lev 2 -none- character
## svd 1 -none- numeric
## N 1 -none- numeric
## call 4 -none- call
## terms 3 terms call
## xlevels 0 -none- list
lda.pred = predict (lda.fit, te)
names(lda.pred)

## [1] "class" "posterior" "x"


lda.class = lda.pred$class
table(lda.class, Weekly$Direction[!tr])

##
## lda.class Down Up
## Down 9 5
## Up 34 56
mean(lda.class == Weekly$Direction[!tr])

## [1] 0.625

3.6 f QDA

qda.fit = qda(Direction~Lag2, data = Weekly, subset = tr)


summary(qda.fit)

## Length Class Mode


## prior 2 -none- numeric
## counts 2 -none- numeric
## means 2 -none- numeric
## scaling 2 -none- numeric
## ldet 2 -none- numeric
3 PROBLEM 8 18

## lev 2 -none- character


## N 1 -none- numeric
## call 4 -none- call
## terms 3 terms call
## xlevels 0 -none- list
qda.pred = predict (qda.fit, te)
names(qda.pred)

## [1] "class" "posterior"


qda.class = qda.pred$class
table(qda.class, Weekly$Direction[!tr])

##
## qda.class Down Up
## Down 0 0
## Up 43 61
mean(qda.class == Weekly$Direction[!tr])

## [1] 0.5865385

3.7 g KNN: K = 1

library(class)
train.X = as.matrix(Weekly$Lag2[tr])
test.X = as.matrix(Weekly$Lag2[!tr])
train.Direction = Weekly$Direction[tr]
set.seed(1)
knn.pred = knn(train.X, test.X, train.Direction, k = 1)
table(knn.pred, Weekly$Direction[!tr])

##
## knn.pred Down Up
## Down 21 30
3 PROBLEM 8 19

## Up 22 31
mean(knn.pred == Weekly$Direction[!tr])

## [1] 0.5

综上,逻辑斯蒂回归和 LDA 的整体预测准确率最高,均为 0.625。因此目


前为止这两种方法的结果最好。

3.8 i

3.8.1 KNN :K = 3

library(class)
train.X = as.matrix(Weekly$Lag2[tr])
test.X = as.matrix(Weekly$Lag2[!tr])
train.Direction = Weekly$Direction[tr]
set.seed(1)
knn.pred = knn(train.X, test.X, train.Direction, k = 3)
table(knn.pred, Weekly$Direction[!tr])

##
## knn.pred Down Up
## Down 16 20
## Up 27 41
mean(knn.pred == Weekly$Direction[!tr])

## [1] 0.5480769

3.8.2 KNN :K = 4

library(class)
train.X = as.matrix(Weekly$Lag2[tr])
3 PROBLEM 8 20

test.X = as.matrix(Weekly$Lag2[!tr])
train.Direction = Weekly$Direction[tr]
set.seed(1)
knn.pred = knn(train.X, test.X, train.Direction, k = 4)
table(knn.pred, Weekly$Direction[!tr])

##
## knn.pred Down Up
## Down 20 17
## Up 23 44
mean(knn.pred == Weekly$Direction[!tr])

## [1] 0.6153846

3.8.3 KNN :K = 5

library(class)
train.X = as.matrix(Weekly$Lag2[tr])
test.X = as.matrix(Weekly$Lag2[!tr])
train.Direction = Weekly$Direction[tr]
set.seed(1)
knn.pred = knn(train.X, test.X, train.Direction, k = 5)
table(knn.pred, Weekly$Direction[!tr])

##
## knn.pred Down Up
## Down 16 21
## Up 27 40
mean(knn.pred == Weekly$Direction[!tr])

## [1] 0.5384615
3 PROBLEM 8 21

3.8.4 KNN :K = 10

library(class)
train.X = as.matrix(Weekly$Lag2[tr])
test.X = as.matrix(Weekly$Lag2[!tr])
train.Direction = Weekly$Direction[tr]
set.seed(1)
knn.pred = knn(train.X, test.X, train.Direction, k = 10)
table(knn.pred, Weekly$Direction[!tr])

##
## knn.pred Down Up
## Down 17 21
## Up 26 40
mean(knn.pred == Weekly$Direction[!tr])

## [1] 0.5480769

在 KNN 分类器中调整 K 的取值,我们发现 k 值取 4 时整体预测准确率最


高,约为 0.615。因为在 b 小问中除 Lag1 和 Lag2 之外的预测变量 p 值均
较高,所以我们在其他模型中采用 Lag1 和 Lag2 的组合。

3.8.5 调整后的逻辑斯蒂回归

tr = (Weekly$Year < 2009)


te = Weekly[!tr,]
glm.fit1 = glm(Direction~Lag1*Lag2, data = Weekly, family = binomial, subset = tr)
summary(glm.fit1)

##
## Call:
## glm(formula = Direction ~ Lag1 * Lag2, family = binomial, data = Weekly,
## subset = tr)
3 PROBLEM 8 22

##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.573 -1.259 1.003 1.086 1.596
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.211419 0.064589 3.273 0.00106 **
## Lag1 -0.051505 0.030727 -1.676 0.09370 .
## Lag2 0.053471 0.029193 1.832 0.06700 .
## Lag1:Lag2 0.001921 0.007460 0.257 0.79680
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1354.7 on 984 degrees of freedom
## Residual deviance: 1346.9 on 981 degrees of freedom
## AIC: 1354.9
##
## Number of Fisher Scoring iterations: 4
glm.probs1 = predict(glm.fit1, te, type = "response")
glm.pred1 = rep("Down",length(glm.probs1))
glm.pred1[glm.probs1 > .5] = "Up"
table(glm.pred1, Weekly$Direction[!tr])

##
## glm.pred1 Down Up
## Down 7 8
## Up 36 53
mean(glm.pred1 == Weekly$Direction[!tr])

## [1] 0.5769231
3 PROBLEM 8 23

3.8.6 调整后的 LDA

lda.fit = lda(Direction~Lag1*Lag2, data = Weekly, subset = tr)


summary(lda.fit)

## Length Class Mode


## prior 2 -none- numeric
## counts 2 -none- numeric
## means 6 -none- numeric
## scaling 3 -none- numeric
## lev 2 -none- character
## svd 1 -none- numeric
## N 1 -none- numeric
## call 4 -none- call
## terms 3 terms call
## xlevels 0 -none- list
lda.pred = predict (lda.fit, te)
names(lda.pred)

## [1] "class" "posterior" "x"


lda.class = lda.pred$class
table(lda.class, Weekly$Direction[!tr])

##
## lda.class Down Up
## Down 7 8
## Up 36 53
mean(lda.class == Weekly$Direction[!tr])

## [1] 0.5769231
3 PROBLEM 8 24

3.8.7 调整后的 QDA

qda.fit = qda(Direction~Lag1*Lag2, data = Weekly, subset = tr)


summary(qda.fit)

## Length Class Mode


## prior 2 -none- numeric
## counts 2 -none- numeric
## means 6 -none- numeric
## scaling 18 -none- numeric
## ldet 2 -none- numeric
## lev 2 -none- character
## N 1 -none- numeric
## call 4 -none- call
## terms 3 terms call
## xlevels 0 -none- list
qda.pred = predict (qda.fit, te)
names(qda.pred)

## [1] "class" "posterior"


qda.class = qda.pred$class
table(qda.class, Weekly$Direction[!tr])

##
## qda.class Down Up
## Down 23 36
## Up 20 25
mean(qda.class == Weekly$Direction[!tr])

## [1] 0.4615385

与调整后的其他方法相比,仍是 KNN 方法中 k 值取 4 时整体预测准确率


最高。

You might also like