R

tseries <- c(2,4,6,8,10,12,14,16,18,20)
barplot(tseries)
install.packages("forecast")
install.pacakages("ggplot2")
install.packages("fpp2")
AirPassengers
library(ggfortify)
ggplot2::autoplot(AirPassengers)
library(fpp2)
qcement
autoplot(qcement)
ggseasonplot(qcement)
ggseasonplot(qcement,polar=TRUE)
ggsubseriesplot(qcement)
gglagplot(qcement)
ggAcf(qcement)
Acf(qcement)
arrivals
autoplot(arrivals)
autoplot(arrivals, facets =TRUE)
autoplot(arrivals, facets =TRUE) +geom_smooth()
autoplot(arrivals, facets =TRUE) +geom_smooth() + labs(y='Arrivals (in thousands)')
summary(arrivals)
ggAcf(arrivals)
Acf(arrivals)
mink
ggAcf(mink)
Acf(mink)
USAccDeaths
ggAcf(USAccDeaths)
Acf(USAccDeaths)
wn <- ts(rnorm(36))
autoplot(wn)
acf(wn)
amount <- c(9,8,9,12,9,12,11,7,13,9,11,10)
summary(amount)
#moving average
economics
savings.ts <- ts(economics$psavert, start= c(1967,7), frequency=12)
autoplot(savings.ts)
autoplot(savings.ts, series='Original') +
autolayer(ma(savings.ts,3), series='MA3')+
autolayer(ma(savings.ts,4), series='MA4')+
xlab('Date')+ ylab('Savings Rate')
savings.ts
#EXPONENTIAL SMOOTHING METHOD
ses(goog)
ses(goog, alpha=.2, h=1)
autoplot(ses(goog,alpha=0.2,h=1))
autoplot(ses(goog,alpha=0.2,h=10))
uschange
autoplot(uschange)
autoplot(uschange[,c('Consumption','Income')])
autoplot(uschange[,c('Consumption','Income')]) +ylab('%change')
uschange%>%
as.data.frame()%>%
ggplot(aes(x=Income,y=Consumption))+
ylab('Consumption')+
xlab('Income')+
geom_point()
uschange%>%
as.data.frame()%>%
xlab('Income')+
geom_point()+
geom_smooth()
uschange%>%
as.data.frame()%>%
xlab('Income')+
geom_point()+
geom_smooth(method='lm', se=FALSE)
#TIME SERIES LINEAR REGRESSION
tslm(Consumption~ Income,data=uschange) #consumption=0.5451+0.2806*X
##income and consumption are directly proportional
head(uschange)
tslm(Consumption ~ ., data=uschange)
#AUS BEER DATASET
ausbeer
autoplot(ausbeer)
seasonplot(ausbeer)
beer1992 <- window(ausbeer,start=1992)
autoplot(beer1992)
beer.model <- tslm(beer1992 ~ trend+season)
summary(beer.model)
autoplot(beer1992, series='Data')+
autolayer(fitted(beer.model),series='Fitted')+
xlab('Year')+
ylab('Megaliters')
autoplot(beer1992, series='Data')+
autolayer(fitted(beer.model, n.head=10),series='Fitted')+
xlab('Year')+
ylab('Megaliters')
head(uschange)
#create a TSLM model to explain the variation in consumption based on all other predictors
> consumption.model <-
tslm(Consumption~Income+Production+Savings+Unemployment,data=uschang
e)
> consumption.model
Fitted(consumption.model)
Summary(consumption.model)
CV – cross validation
> ausbeer
beer1992 <- window(ausbeer,start = 1992)
autoplot(beer1992)
beer.model <- tslm(beer1992 ~ trend + season)
summary(beer.model)
autoplot(beer1992, series = "Data") + autolayer(fitted(beer.model), series = "Fitted")+
xlab("Year")+ ylab("Megaliters")
fitted(beer.model)
CV(beer.model
Creating a linear model for marathon dataset
mens400
#MENS400 dataset:
mens400
autoplot(mens400)
#to create a mode that explains the rate of changes in winning times
#forecast the winning time for 2020 and 2024
mens400.model <- tslm(mens400~ trend)
mens400.model
autoplot(mens400)+
autolayer(fitted(mens400.model))
forecast(mens400.model)
A10
Autoplot(a10)
Double differenece
Kwiatkowski-Phillips-Schmidt-Shin (KPSS) test

1 meaning the series is non stationary
0 means series is stationary
Auto regressive model

Min support =3
Install.packages(“arules”)
Install.packages(“lubridate”)
Intall.packages(“arulesviz”)
Data(groceries)
Class(groceries)
head(groceires,5)
Fitec <- eclat(groceries,parameter = list(sup=0.1)) (clustering function)
Fitec1 <- eclat(groceries,parameter = list(sup=0.1,maxlen=15)) (clustering function)
Inspect(fitec1)
To check which are highly correlated
To Explore rules that indicate the sale of soft cheese
To Explore rules that indicate the sale of yoghurt

To visualize rules for coffee-
Rule<-apriori(Groceries,parameter=list(sup=0.001, conf=0.2), appearance = list(rhs='coffee'))
inspect(head(Rule,5))
plot(Rule,method="graph",engine='htmlwidget')
plot(Rule,method="paracoord")
To identify data based on invoice number-
To remove the first column invoice number-

Converting to csv-
To make data in transaction format-
DECISION TREE
install.packages(“rattle”)
rattle()
FOR BOOTSTRAP AGGREAGTION
install.packages(“ipred”)
Install.packages(“C50”)
install.packages("e1071")
library(e1071)
> library(ISLR)
> head(Default)
CLUSTERING
Install.packages(“tidyverse”)
Usa<-USArrests
Summary(usa)
usa<-scale(usa)
distances<-get_dist(usa)
fviz_dist(distances)
> kc <- kmeans(usa,centers=3,nstart = 20)
> kc
fviz_cluster(kc,data=usa)
> kc3 <- kmeans(usa,centers=3,nstart = 20)
> wss<-c(kc3$tot.withinss,kc4$tot.withinss,kc5$tot.withinss,kc6$tot.withinss)
> nclusters<-3:6
> plot(nclusters,wss,type='b')
https://cran.r-project.org/web/packages/cluster.datasets/cluster.datasets.pdf
> usa<-USArrests
> usa<-scale(USArrests)
> d<-dist(usa,method = 'euclidean')
> hcl<-hclust(d,method = "complete")

> hcl
> summary(hcl)
> plot(hcl,cex=0.6,hang=-1)
For divisive
To cut the tree

Tanglegram
boxplot(mpg~cyl, data=mtcars)
hist(iris$Sepal.Width,col="blue",main="Histogram",xlab="Sepal width")
Install.packages(“mvoutlier”)
Map Tracker plot
Principal component:
library(dbscan)

R

Uploaded by

Copyright:

Available Formats

R

Uploaded by

Document Information

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

R

Uploaded by

Copyright:

Available Formats

tseries <- c(2,4,6,8,10,12,14,16,18,20)

autoplot(arrivals, facets =TRUE)

autoplot(arrivals, facets =TRUE) +geom_smooth()

autoplot(arrivals, facets =TRUE) +geom_smooth() + labs(y='Arrivals (in thousands)')

amount <- c(9,8,9,12,9,12,11,7,13,9,11,10)

savings.ts <- ts(economics$psavert, start= c(1967,7), frequency=12)

xlab('Date')+ ylab('Savings Rate')

#EXPONENTIAL SMOOTHING METHOD

ses(goog, alpha=.2, h=1)

#TIME SERIES LINEAR REGRESSION

tslm(Consumption~ Income,data=uschange) #consumption=0.5451+0.2806*X

##income and consumption are directly proportional

beer1992 <- window(ausbeer,start=1992)

beer.model <- tslm(beer1992 ~ trend+season)

#forecast the winning time for 2020 and 2024

mens400.model <- tslm(mens400~ trend)

Kwiatkowski-Phillips-Schmidt-Shin (KPSS) test

0 means series is stationary

Auto regressive model

Fitec <- eclat(groceries,parameter = list(sup=0.1)) (clustering function)

Fitec1 <- eclat(groceries,parameter = list(sup=0.1,maxlen=15)) (clustering function)

To Explore rules that indicate the sale of soft cheese

To Explore rules that indicate the sale of yoghurt

Rule<-apriori(Groceries,parameter=list(sup=0.001, conf=0.2), appearance = list(rhs='coffee'))

To identify data based on invoice number-

To remove the first column invoice number-

To make data in transaction format-

FOR BOOTSTRAP AGGREAGTION

> kc <- kmeans(usa,centers=3,nstart = 20)

> kc4 <- kmeans(usa,centers=4,nstart = 20)

> kc5 <- kmeans(usa,centers=5,nstart = 20)

> kc6 <- kmeans(usa,centers=6,nstart = 20)

> d<-dist(usa,method = 'euclidean')

> hcl<-hclust(d,method = "complete")

To cut the tree

You might also like