VeBieuDo-21.08.21 - Ngo Thi Thang Nga

Download as pdf or txt
Download as pdf or txt
You are on page 1of 22

Một số biểu đồ thông dụng

Ngô Thị Thanh Nga -TLU

21/08/2021

1. Đọc dữ liệu, vẽ các biểu đồ


Một số dữ liệu có sẵn thú vị dạng .table trong packages datasets Titanic, HairEyeColor
Dữ liệu hoa Iris
library(datasets)
str(iris)

## 'data.frame': 150 obs. of 5 variables:


## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1
1 1 1 1 ...

head(iris)

## Sepal.Length Sepal.Width Petal.Length Petal.Width Species


## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa

Sepal: đài hoa


Petal: cánh hoa

Biểu đồ cho biến định tính


table(iris$Species)

##
## setosa versicolor virginica
## 50 50 50

attach(iris)
table(Species)
## Species
## setosa versicolor virginica
## 50 50 50

par(mfrow=c(1,2))
barplot(table(Species))
pie(table(Species))

Vẽ biểu đồ histogram
names(iris)

## [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width" "Species"

hist(Sepal.Length)
hist(Sepal.Length[Species=='setosa'])
hist(Sepal.Length[Species=='versicolor'])
hist(Sepal.Length[Species=='virginica'])
#Vẽ
biểu đồ hộp
boxplot(Sepal.Length)
boxplot(Sepal.Length~Species)
Biểu đồ tán xạ
plot(Sepal.Length)
plot(Sepal.Length,Sepal.Width)
plot(Sepal.Length[Species=='setosa'],Sepal.Width[Species=='setosa'])
Dữ liệu đặc biệt kiểu .table
library(datasets)
Titanic

## , , Age = Child, Survived = No


##
## Sex
## Class Male Female
## 1st 0 0
## 2nd 0 0
## 3rd 35 17
## Crew 0 0
##
## , , Age = Adult, Survived = No
##
## Sex
## Class Male Female
## 1st 118 4
## 2nd 154 13
## 3rd 387 89
## Crew 670 3
##
## , , Age = Child, Survived = Yes
##
## Sex
## Class Male Female
## 1st 5 1
## 2nd 11 13
## 3rd 13 14
## Crew 0 0
##
## , , Age = Adult, Survived = Yes
##
## Sex
## Class Male Female
## 1st 57 140
## 2nd 14 80
## 3rd 75 76
## Crew 192 20

Titanic.df=as.data.frame(Titanic)
Titanic.df

## Class Sex Age Survived Freq


## 1 1st Male Child No 0
## 2 2nd Male Child No 0
## 3 3rd Male Child No 35
## 4 Crew Male Child No 0
## 5 1st Female Child No 0
## 6 2nd Female Child No 0
## 7 3rd Female Child No 17
## 8 Crew Female Child No 0
## 9 1st Male Adult No 118
## 10 2nd Male Adult No 154
## 11 3rd Male Adult No 387
## 12 Crew Male Adult No 670
## 13 1st Female Adult No 4
## 14 2nd Female Adult No 13
## 15 3rd Female Adult No 89
## 16 Crew Female Adult No 3
## 17 1st Male Child Yes 5
## 18 2nd Male Child Yes 11
## 19 3rd Male Child Yes 13
## 20 Crew Male Child Yes 0
## 21 1st Female Child Yes 1
## 22 2nd Female Child Yes 13
## 23 3rd Female Child Yes 14
## 24 Crew Female Child Yes 0
## 25 1st Male Adult Yes 57
## 26 2nd Male Adult Yes 14
## 27 3rd Male Adult Yes 75
## 28 Crew Male Adult Yes 192
## 29 1st Female Adult Yes 140
## 30 2nd Female Adult Yes 80
## 31 3rd Female Adult Yes 76
## 32 Crew Female Adult Yes 20

attach(Titanic.df)
sum(Freq[Class=="1st"])

## [1] 325

sum(Freq[Class=="1st"&Survived=='Yes'])

## [1] 203

2. Mô phỏng dữ liệu
Phân phối nhị thức
set.seed(19)
x<- rbinom(100, 20, 0.5)
hist(x,xlim=c(0,20))

# Phân
phối Poisson
x <- rpois(100, lambda=15)
hist(x)

# Phân
phối mũ
x<- rexp(150, 0.1)
par(mfrow=c(1,2))
hist(x)
curve(dexp(x,10))
#Phân
phối Chi bình phương
curve(dchisq(x, 1), xlim=c(0,10), ylim=c(0,0.6), col="red", lwd=3)
curve(dchisq(x, 2), add=T, col="green", lwd=3)
curve(dchisq(x, 3), add=T, col="blue", lwd=3)
curve(dchisq(x, 5), add=T, col="orange", lwd=3)
abline(h=0, lty=3)
legend(par("usr")[2], par("usr")[4],
xjust=1,
c("df=1", "df=2", "df=3", "df=5"), lwd=3, lty=1,
col=c("red", "green", "blue", "orange"))
# Phân
phối Student
curve(dt(x, 1), xlim=c(-3,3), ylim=c(0,0.4), col="red", lwd=3)
curve(dt(x, 2), add=T, col="blue", lwd=3)
curve(dt(x, 5), add=T, col="green", lwd=3)
curve(dt(x, 10), add=T, col="orange", lwd=3)
curve(dnorm(x), add=T, lwd=4, lty=3)
title(main="Student T distributions")
legend(par("usr")[2], par("usr")[4],xjust=0.9,
c("df=1", "df=2", "df=5", "df=10", "Std.norm."),
lwd=c(2,2,2,2,2),
lty=c(1,1,1,1,3),
col=c("red", "blue", "green", "orange", par("fg")))
#Phân
phối Fisher
curve(df(x,1,1), xlim=c(0,2), ylim=c(0,0.8), lwd=3)
curve(df(x,3,1), add=T)
curve(df(x,6,1), add=T, lwd=3)
curve(df(x,3,3), add=T, col="red")
curve(df(x,6,3), add=T, col="red", lwd=3)
curve(df(x,3,6), add=T, col="blue")
curve(df(x,6,6), add=T, col="blue", lwd=3)
title(main="Fisher F distributions")
legend(par("usr")[2], par("usr")[4],
xjust=1,
c("df=1,1", "df=3,1", "df=6,1", "df=3,3", "df=6,3",
"df=3,6", "df=6,6"),
lwd=c(1,1,3,1,3,1,3),
lty=c(2,1,1,1,1,1,1),
col=c(par("fg"), par("fg"), par("fg"), "red", "blue", "blue"))
#Dùng
biểu đồ Q-Q plot so sánh sự tương đồng giữa các phân phối
#hai phân phối chuẩn
x=rnorm(1000,12,1)
y=rnorm(1000,12,1)
qqplot(x,y)
#PP
chuẩn, PP đều
x=runif(1000,12,111)
y=rnorm(1000,12,1)
qqplot(x,y)
qqnorm(x)
qqnorm(y)

You might also like