Exam Answer Sheet - 2
Exam Answer Sheet - 2
Exam Answer Sheet - 2
csv")
> set.seed(201964927)
> n.sample.omit<- sample(5:10,1)
> n.sample <- nrow(DATA) - n.sample.omit
> z.sel <- sample(1:nrow(DATA),n.sample,replace=FALSE)
> my.data <- DATA[z.sel,]
Qa- Use the Guardian score to group the universities into two groups – those with a score of 70 and
above and those with a score of below 70. How many and what percentage of institutions are in
each group?
Qb -Investigate the distributions of, and provide appropriate numerical summaries of the 2 variables,
Satisfied.with.course and Satisfied.with.Assessment. Give reasons for your choice of numerical
summary statistics
library(lattice)
library(openintro)
histogram(L$Guardian.score.100,
main = "score < 70",
xlab = "Guardian.score.100",
ylab = "Percent of total",
col = "gold2")
histogram(H$Guardian.score.100,
main = "score >= 70",
xlab = "Guardian.score.100",
ylab = "Percent of total",
col = "gold2")
Figure 1: Statistical distribution of the 2 categories (With scores < or >= 70)
Universities with a Guardian score <70 have a positive skewed histogram in which the mean is
greater than the median. With a largest part of the data is towards the right-hand side of the
distribution, with a few large values to the left.
However, those with a Guardian score >= 70 have a negative skewed histogram, it suggests that the
mean is less than the median. More of the data is towards the left-hand side of the distribution
(scores between 70 and 85), with a few large values to the right (90 and above). We conclude that
the majority of the universities have a score less than 90, and the 70 score can be considered as a
criterion to split data into 2 representative groups.
summary(my.data$Satisfied.with.course)
Min. 1st Qu. Median Mean 3rd Qu. Max.
57.00 78.00 82.00 81.94 87.00 96.00
> summary(my.data$Satisfied.with.Assessment)
Min. 1st Qu. Median Mean 3rd Qu. Max.
35.00 65.00 70.00 69.15 74.00 89.00
I choosed the function summary because there are not missing values in our database
Qc- Using suitable plots, examine the distributions of Student.staff.ratio, and Average.Entry.Tariff,
over the two groups from (a) and describe your findings.
histogram(L$Student.staff.ratio,
main = "score < 70",
xlab = "Student.staff.ratio",
ylab = "Percent of total",
col = "gold2")
histogram(H$Student.staff.ratio,
main = "score >= 70",
xlab = "Student.staff.ratio",
ylab = "Percent of total",
col = "gold2")
Figure 2: Statistical distributions of Student.staff.ratio variable for each group (< or >= 70)
Student staff ratio of universities with a score < 70 has a symmetric distribution, then the majority of
individuals have a ratio near median. However, data with the score >=70 is more negative skewed,
then, the majority of this subset has a high student staff ratio
histogram(L$Average.Entry.Tariff,
main = "score < 70",
xlab = "Average.Entry.Tariff",
ylab = "Percent of total",
col = "gold2")
histogram(H$Average.Entry.Tariff,
main = "score >= 70",
xlab = "Average.Entry.Tariff",
ylab = "Percent of total",
col = "gold2")
Figure 3: Statistical distributions of Average.Entry.Tariff variable for each group (< or >= 70)
Similarly, to the student staff ratio, the average entry tariff has a symmetric distribution for the
individuals with score<70, but a negative skewed one for the second group. The majority of average
entry tariff of high guardian score universities are classified as a high values.
library(PerformanceAnalytics)
Scores <70 present more significant correlations than the first category. The most significant
correlation is r=0.70, between Satisfied.with.Teaching and satisfied with the course, which is a logical
result since the quality of teaching increases with the satisfaction with the course for all Guardian
score categories. Similarly to the groupe (scores >=70), Expenditure.per.student.FTE varies in the
opposite sense with continuation and satisfaction with teaching (r<0).
z.kn <- my.data[,4:7]
summary(z.kn)
Satisfied.with.Teaching Satisfied.with.course Continuation Expenditure.per.student.F
Min. :55.00 Min. :57.00 Min. :73.00 Min. : 2.000
1st Qu.:77.00 1st Qu.:78.00 1st Qu.:86.00 1st Qu.: 4.000
Median :80.00 Median :82.00 Median :91.00 Median : 5.000
Mean :79.79 Mean :81.94 Mean :89.59 Mean : 5.299
3rd Qu.:83.00 3rd Qu.:87.00 3rd Qu.:93.00 3rd Qu.: 7.000
Max. :94.00 Max. :96.00 Max. :99.00 Max. :10.000
>
pairs(z.kn)
Scores >= 70: In the diagonal of figure above, we find the distribution of the 4 variables:
Satisfied.with.Teaching, Satisfied.with.course, Continuation, Expenditure.per.student.FTE.
Correlations are in general low between all of the variables, the most important value is the one
among Satisfied.with.Teaching and satisfied with the course (r=0.29), confirming that satisfaction
with teaching is related to satisfaction with course. However, Expenditure.per.student.FTE varies in
the opposite sense with continuation and satisfaction with teaching (r<0)
cor(z.kn)
> cor(z.kn)
Satisfied.with.Teaching Satisfied.with.course Continuation Expenditure.per.student.FTE
Satisfied.with.Teaching 1.00000000 0.75666345 0.1568911 0.02648637
Satisfied.with.course 0.75666345 1.00000000 0.2532291 -0.01272594
Continuation 0.15689105 0.25322906 1.0000000 0.30345429
Expenditure.per.student.FTE 0.02648637 -0.01272594 0.3034543 1.00000000
> round(cor(z.kn),2)
Satisfied.with.Teaching Satisfied.with.course
Continuation Expenditure.per.student.FTE
Satisfied.with.Teaching 1.00 0.76
0.16 0.03
Satisfied.with.course 0.76 1.00
0.25 -0.01
Continuation 0.16 0.25
1.00 0.30
Expenditure.per.student.FTE 0.03 -0.01
0.30 1.00
Qe- Take a subset of the universities corresponding to those in Scotland, Wales and Northern Ireland.
How many universities are in this subset?
Answer is:
Scotland: 14 universities
Wales: 2 universities
Northern Ireland: 8 universities
Qf- Using this subset of the data, carry out single link, complete link and average link hierarchical
cluster analysis using all 9 performance variables (i.e. exclude Rank, Name, Guardian Score and
Country from the data set you read in) and the Euclidean distance between the universities.
Explain why you scaled or did not scale the variables. For each clustering method write down
the agglomerative coefficient. Which of the three linkage methods do you consider to be the
best; give reasons for your choice. Plot the dendrogram and the banner from the best method
and provide an interpretation of these plots. Hence, decide if there are a small number of
groups which capture the structure in these data. Give reasons for your choice of the number of
groups to use. How many universities are in each group?
library(cluster)
z.df <- wales[,4:12]
z.df.scale <- scale(z.df)
z <- agnes(z.df.scale,method="single")
plot(z)
I scaled the data because each observations' feature values are represented as coordinates
in 15-dimensional space and then the distances between these coordinates are calculated. If
these coordinates are not normalized, then it may lead to false results.
Agglomerative coefficient:
Single: 0.26
Complete: 0.38
Average: 0.59
The best linkage method is “Complete” because it splits the entire data types into
representative groups, with the highest agglomerative scores for the 3 countries.
Figure 5: The dendrogram and the banner from the best method (complete) of the 3 countries
respectively from the top to the bottom : Northern Irland, Scotland and wales
Qg- For each of the groups from (f) calculate the average scores on each of the 9 performance
measurements. Hence provide an interpretation of the groups.
round(aggregate(z.df,list(z.gp),mean),2)
Group.1 Satisfied.with.Teaching Satisfied.with.course Continuation Expenditure.per.student.FTE Student.staff.ratio
1 1 82.00 88.0 92.25 3.00 19.75
2 2 76.50 71.5 82.50 5.50 31.55
3 3 94.00 90.0 98.00 4.00 16.10
4 4 87.33 89.0 89.00 7.67 23.63
5 5 79.00 79.5 92.50 6.25 22.88
Career.prospects Value.added.score Average.Entry.Tariff Satisfied.with.Assessment
1 73.00 5.25 154.50 72.75
2 61.00 7.00 135.50 66.00
3 92.00 9.00 212.00 82.00
4 74.00 8.33 176.33 72.33
5 67.75 7.50 172.75 60.25
Third group: Highest satisfied with all variables (teaching and with course with high, Continuation
Expenditure.per.s, Career.prospects Value.added.score Average.Entry.Tariff
Satisfied.with.Assessment).
The other groups don’t show a very different results in comparison with the third group.
Qh-Carry out a principal component analysis of the 9 performance measures using all the data.
You need to decide if you need to use the correlation or covariance matrix – give reasons for
your choice. Provide an interpretation of the first three components. Plot the first two
principal component scores and label with points to identify if the universities have a Guardian
Score of 70+ or under 70. What is your interpretation of this plot?
library("FactoMineR")
library("factoextra")
>
pcauni<-prcomp(my.data[,c(4:12)],scale=TRUE)
> print(pcauni,digits=2)
Rotation (n x k) = (9 x 9):
PC1 PC2 PC3 PC4 PC5 PC6
PC7 PC8 PC9
Satisfied.with.Teaching -0.36 0.472 -0.042 0.187 -0.066 0.1481
-0.102 -0.0093 0.760
Satisfied.with.course -0.36 0.411 -0.078 -0.115 -0.278 0.4854
-0.023 0.3184 -0.517
Continuation -0.41 -0.237 0.039 -0.411 -0.307 -0.4937
0.178 0.4591 0.158
Expenditure.per.student.FTE -0.24 -0.340 -0.327 0.785 -0.144 -0.0259
0.226 0.1613 -0.086
Student.staff.ratio 0.10 0.132 0.750 0.272 -0.509 -0.0927
0.200 -0.1551 -0.060
Career.prospects -0.46 -0.170 -0.026 -0.225 0.066 0.1953
0.514 -0.6336 -0.026
Value.added.score -0.31 -0.093 0.539 0.136 0.684 0.1045
0.042 0.3191 -0.044
Average.Entry.Tariff -0.39 -0.341 0.133 0.033 -0.159 -0.0073
-0.773 -0.2869 -0.091
Satisfied.with.Assessment -0.22 0.516 -0.112 0.150 0.216 -0.6636
-0.043 -0.2284 -0.328
.095 -0.1799 -0.5055
round((pcauni$sdev^2)/sum(pcauni$sdev^2)*100,2)
[1] 34.31 23.01 13.98 7.43 6.24 5.03 4.20 3.75 2.05
> round(cumsum((pcauni$sdev^2)/sum(pcauni$sdev^2)*100),2)
[1] 34.31 57.32 71.30 78.73 84.97 90.00 94.20 97.95 100.00
>
>
First tree components represent a cumulative.variance.percent of 56.79049 thus, they contain about
57% of total inertia of the data.
This plot didn’t explain the Guardian score variations as well, we cannot see an important distinction
between the clusters <70 and >70.
Qi- Calculate the score on the first component and plot this against the Guardian Score in the data
set. What does the plot and the correlation coefficient between these two variables imply?
> #(i)calculate the score on the first component and plot this against the Gardian Scor
> my_data=my.data[,4:12]
head(my_data)
Satisfied.with.Teaching Satisfied.with.course Continuation Expenditure.per.student.FTE Student.staff.ra
68 81 86 92 2 1
119 77 86 78 2 2
72 81 86 91 4 2
113 78 84 87 3 2
117 77 76 85 3 2
36 89 92 95 5 2
Career.prospects Value.added.score Average.Entry.Tariff Satisfied.with.Assessment
68 69 3 135 74
119 69 1 94 71
72 72 5 141 65
113 55 1 124 63
117 62 4 109 65
36 76 7 143 74
>
>
pca_scores=predict(pcauni)
> head(round(pca_scores,2))
PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9
68 0.11 1.11 -0.52 -1.50 -0.48 -0.39 -0.60 -0.18 -0.09
119 2.33 1.86 -0.52 -0.59 -0.58 0.83 0.27 -1.22 -0.73
72 -0.16 0.15 0.37 -0.62 -0.64 0.52 -0.20 0.07 0.08
113 2.07 0.67 0.03 -0.54 -1.76 0.32 -0.65 0.15 0.01
117 2.19 0.35 1.00 -0.13 -0.49 -0.04 0.16 -0.34 0.28
36 -1.84 1.48 1.82 0.30 -1.32 -0.02 0.35 0.21 0.14
plot(my.data[,("Guardian.score.100")],pca_scores[,("PC1")],
+ xlab="Guardian score",ylab = "PCA First Component(PC1)",
+ main="PCA fist component vs Guardian Score")
Figure 6: Plot of the first principal component against the Guardian score with correlation measure
> cor(my.data[,3],pca_scores[,1])
[1] -0.9562752
The plot shows a high negative correlation score between PC1 and Guardian score (r=-0.956). with
an important explanation of the variations of Guardian.score
Q j- Plot the university names on the scatter plot of the second and third principal components.
What is your interpretation of this plot in relation to your interpretation of these components from
(h)?
#(j)Plot the university names on the scatter plot of the second and third principal com
> library(lattice)
> par(mfrow=c(1,1))
> plot(unipc[,2:3],type="n")
> text(unipc[,2],unipc[,3],labels=my.data$Name.Institution,cex=0.7)
>
Figure 7: Plot of the university names on the scatter plot of the second and third principal
components.
In the plot, we see that PC2 and PC3 do not explain Institutions variations. There is no remarkable
separation between institutions in the plot. Which joins the question (h) results
Qk- In the light of your principal component analysis discuss the implications for the use of a single
score and rank in the Guardian League tables.
Answer: Using a single score and rank in the Guardian League tables does not give satisfactory
results as far as PCA is concerned, this appears in our results above where we do not find a great
distinction between the variables based on the principal components.
Appendix
> DATA <- read.csv("Guardian_University_Guide_2019_BMM (1).csv")
> set.seed(201964927)
> n.sample.omit<- sample(5:10,1)
> n.sample <- nrow(DATA) - n.sample.omit
> z.sel <- sample(1:nrow(DATA),n.sample,replace=FALSE)
> my.data <- DATA[z.sel,]
> H= subset(my.data, Guardian.score.100 >= 70)
> L= subset(my.data, Guardian.score.100 < 70)
> H_score = nrow(H)/nrow(my.data)
> L_score = nrow(L)/nrow(my.data)
> library(lattice)
> library(openintro)
> #question b
> library(lattice)
> library(openintro)
> histogram(L$Guardian.score.100,
+ main = "score < 70",
+ xlab = "Guardian.score.100",
+ ylab = "Percent of total",
+ col = "gold2")
> histogram(H$Guardian.score.100,
+ main = "score >= 70",
+ xlab = "Guardian.score.100",
+ ylab = "Percent of total",
+ col = "gold2")
> summary(my.data$Satisfied.with.course)
Min. 1st Qu. Median Mean 3rd Qu. Max.
57.00 78.00 82.00 81.94 87.00 96.00
> summary(my.data$Satisfied.with.Assessment)
Min. 1st Qu. Median Mean 3rd Qu. Max.
35.00 65.00 70.00 69.15 74.00 89.00
> histogram(L$Student.staff.ratio,
+ main = "score < 70",
+ xlab = "Student.staff.ratio",
+ ylab = "Percent of total",
+ col = "gold2")
> histogram(H$Student.staff.ratio,
+ main = "score >= 70",
+ xlab = "Student.staff.ratio",
+ ylab = "Percent of total",
+ col = "gold2")
> histogram(L$Average.Entry.Tariff,
+ main = "score < 70",
+ xlab = "Average.Entry.Tariff",
+ ylab = "Percent of total",
+ col = "gold2")
> histogram(H$Average.Entry.Tariff,
+ main = "score >= 70",
+ xlab = "Average.Entry.Tariff",
+ ylab = "Percent of total",
+ col = "gold2")
> #question d
> library(PerformanceAnalytics)
> #question d
> library(PerformanceAnalytics)
> library(lattice)
> my_data <- H[, c(4,5,6,7)]
> chart.Correlation(my_data, histogram=TRUE, pch=19)
> #or i can answer
> z.kn <- my.data[,4:7]
> summary(z.kn)
Satisfied.with.Teaching Satisfied.with.course Continuation
Expenditure.per.student.FTE
Min. :55.00 Min. :57.00 Min. :73.00 Min. :
2.000
1st Qu.:77.00 1st Qu.:78.00 1st Qu.:86.00 1st Qu.:
4.000
Median :80.00 Median :82.00 Median :91.00 Median :
5.000
Mean :79.79 Mean :81.94 Mean :89.59 Mean :
5.299
3rd Qu.:83.00 3rd Qu.:87.00 3rd Qu.:93.00 3rd Qu.:
7.000
Max. :94.00 Max. :96.00 Max. :99.00 Max. :
10.000
> pairs(z.kn)
> cor(z.kn)
Satisfied.with.Teaching Satisfied.with.course
Continuation Expenditure.per.student.FTE
Satisfied.with.Teaching 1.00000000 0.75666345
0.1568911 0.02648637
Satisfied.with.course 0.75666345 1.00000000
0.2532291 -0.01272594
Continuation 0.15689105 0.25322906
1.0000000 0.30345429
Expenditure.per.student.FTE 0.02648637 -0.01272594
0.3034543 1.00000000
> round(cor(z.kn),2)
Satisfied.with.Teaching Satisfied.with.course
Continuation Expenditure.per.student.FTE
Satisfied.with.Teaching 1.00 0.76
0.16 0.03
Satisfied.with.course 0.76 1.00
0.25 -0.01
Continuation 0.16 0.25
1.00 0.30
Expenditure.per.student.FTE 0.03 -0.01
0.30 1.00
> sco= subset(my.data, Country == "Scotland")
> NI=subset(my.data, Country == "NI")
> wales=subset(my.data, Country == "Wales")
> NII = nrow(NI)
> waless = nrow(wales)
> sco= subset(my.data, Country == "Scotland")
> #question f
> library(cluster)
> z.df <- wales[,4:12]
> z.df.scale <- scale(z.df)
> z <- agnes(z.df.scale,method="single")
> plot(z)
Hit <Return> to see next plot: library(cluster)
Hit <Return> to see next plot:
> z.df <- wales[,4:12]
> z.df.scale <- scale(z.df)
> z <- agnes(z.df.scale,method="single")
> plot(z)
Hit <Return> to see next plot: z.df <- wales[,4:12]
Hit <Return> to see next plot: z.df <- wales[,4:12]
> z.df.scale <- scale(z.df)
> z <- agnes(z.df.scale,method="average")
> plot(z)
Hit <Return> to see next plot: z.df <- wales[,4:12]
Hit <Return> to see next plot: z.df.scale <- scale(z.df)
> z <- agnes(z.df.scale,method="average")
> plot(z)
Hit <Return> to see next plot: z.df <- wales[,4:12]
Hit <Return> to see next plot: z.df <- sco[,4:12]
> z.df <- sco[,4:12]
> z.df.scale <- scale(z.df)
> z <- agnes(z.df.scale,method="complete")
> plot(z)
Hit <Return> to see next plot: z.df <- sco[,4:12]
Hit <Return> to see next plot: z.df.scale <- scale(z.df)
> z <- agnes(z.df.scale,method="complete")
> plot(z)
Hit <Return> to see next plot: z.df.scale <- scale(z.df)
Hit <Return> to see next plot: #question g
> z.gp <- cutree(z,k=5)
> table(z.gp)
z.gp
1 2 3 4 5
4 2 1 3 4
> round(aggregate(z.df,list(z.gp),mean),2)
Group.1 Satisfied.with.Teaching Satisfied.with.course Continuation
Expenditure.per.student.FTE Student.staff.ratio
1 1 82.00 88.0 92.25
3.00 19.75
2 2 76.50 71.5 82.50
5.50 31.55
3 3 94.00 90.0 98.00
4.00 16.10
4 4 87.33 89.0 89.00
7.67 23.63
5 5 79.00 79.5 92.50
6.25 22.88
Career.prospects Value.added.score Average.Entry.Tariff
Satisfied.with.Assessment
1 73.00 5.25 154.50
72.75
2 61.00 7.00 135.50
66.00
3 92.00 9.00 212.00
82.00
4 74.00 8.33 176.33
72.33
5 67.75 7.50 172.75
60.25
> #question h
> library(MASS)
> detach("package:MASS", unload = TRUE)
> library(MASS, lib.loc = "C:/Program Files/R/R-3.5.2/library")
> #question h
> library(MASS)
> library(class)
> library("FactoMineR")
> library("factoextra")
> res.pca <- PCA(my.data[,4:12], scale.unit = TRUE)
> library("factoextra")
> eig.val <- get_eigenvalue(res.pca)
> eig.val <- get_eigenvalue(res.pca)
> eig.val
eigenvalue variance.percent cumulative.variance.percent
Dim.1 3.0875259 34.305843 34.30584
Dim.2 2.0713373 23.014859 57.32070
Dim.3 1.2579163 13.976848 71.29755
Dim.4 0.6685941 7.428823 78.72637
Dim.5 0.5617666 6.241851 84.96822
Dim.6 0.4524569 5.027299 89.99552
Dim.7 0.3782468 4.202742 94.19827
Dim.8 0.3376331 3.751479 97.94974
Dim.9 0.1845230 2.050256 100.00000
> library(ggplot2)
> irispca <- prcomp(my.data[,4:12], scale.=T)
> PC1 <- irispca$x[,1]
> PC2 <- irispca$x[, 2]
> Guardian.score <- my.data$Guardian.score.100
> df=my.data.frame(PC1, PC2)
> pcauni<-prcomp(my.data[,c(4:12)],scale=TRUE)
> print(pcauni,digits=2)
Standard deviations (1, .., p=9):
[1] 1.76 1.44 1.12 0.82 0.75 0.67 0.62 0.58 0.43
Rotation (n x k) = (9 x 9):
PC1 PC2 PC3 PC4 PC5 PC6
PC7 PC8 PC9
Satisfied.with.Teaching -0.36 0.472 -0.042 0.187 -0.066 0.1481
-0.102 -0.0093 0.760
Satisfied.with.course -0.36 0.411 -0.078 -0.115 -0.278 0.4854
-0.023 0.3184 -0.517
Continuation -0.41 -0.237 0.039 -0.411 -0.307 -0.4937
0.178 0.4591 0.158
Expenditure.per.student.FTE -0.24 -0.340 -0.327 0.785 -0.144 -0.0259
0.226 0.1613 -0.086
Student.staff.ratio 0.10 0.132 0.750 0.272 -0.509 -0.0927
0.200 -0.1551 -0.060
Career.prospects -0.46 -0.170 -0.026 -0.225 0.066 0.1953
0.514 -0.6336 -0.026
Value.added.score -0.31 -0.093 0.539 0.136 0.684 0.1045
0.042 0.3191 -0.044
Average.Entry.Tariff -0.39 -0.341 0.133 0.033 -0.159 -0.0073
-0.773 -0.2869 -0.091
Satisfied.with.Assessment -0.22 0.516 -0.112 0.150 0.216 -0.6636
-0.043 -0.2284 -0.328
> round((pcauni$sdev^2)/sum(pcauni$sdev^2)*100,2)
[1] 34.31 23.01 13.98 7.43 6.24 5.03 4.20 3.75 2.05
> round(cumsum((pcauni$sdev^2)/sum(pcauni$sdev^2)*100),2)
[1] 34.31 57.32 71.30 78.73 84.97 90.00 94.20 97.95 100.00
> unipc <- predict(pcauni)
> unipc <- predict(pcauni)
> unipc
PC1 PC2 PC3 PC4 PC5
PC6 PC7 PC8 PC9
68 0.109492604 1.11393332 -0.51926769 -1.49861961 -0.481300222
-0.386410061 -0.59781937 -0.18144236 -0.092135171
119 2.326457771 1.85759774 -0.51903953 -0.59044213 -0.582289445
0.825556385 0.26510388 -1.21928409 -0.730895339
72 -0.155172218 0.15240977 0.36918432 -0.62214204 -0.641500355
0.522124760 -0.20494000 0.07438884 0.078529810
113 2.074046305 0.67467775 0.02685934 -0.54408765 -1.755687151
0.322968079 -0.65414731 0.14795441 0.005741605
117 2.188672668 0.35088084 0.99747998 -0.12908175 -0.487439952
-0.037680548 0.16482890 -0.34244924 0.282725854
36 -1.842773785 1.47780895 1.81753681 0.29578377 -1.324518846
-0.017035358 0.34928079 0.20508103 0.142158749
85 0.740748140 0.66681774 1.04498672 -0.40717704 0.420452550
-0.335691774 0.07772037 0.51140214 0.252710845
44 -0.407581876 1.94875637 0.43971054 -0.26508063 0.884451960
0.306622211 0.42879464 -0.57881330 -0.137017446
98 0.658624405 2.17552864 1.72343488 0.68930993 0.135586493
0.004761093 -0.42985553 -0.27746743 -0.485857104
19 -1.575000243 0.94911694 -0.29446115 -0.76589815 0.464543652
-0.078791850 0.22967237 -0.83908373 -0.184062389
67 -0.720293152 3.10206873 2.83511506 0.61386993 -0.035225991
0.724083668 0.92562478 0.87528155 0.540358968
91 1.319370838 -0.42176723 1.56954301 -0.55150904 0.407791335
0.118660514 0.91876046 -0.30331369 0.214003756
13 -1.689772509 1.43307236 -1.02118747 -0.94136867 -0.235649832
-0.115609195 -0.31833630 -0.61103821 -0.446347737
99 1.493019535 1.44455152 -2.26818528 -0.67872824 0.184576425
1.072672802 -0.24195187 0.47274597 -0.968374813
4 -3.465278740 -0.40740535 -0.34533291 0.62010398 -0.400460028
-0.064522887 -0.69002014 0.98506103 -0.537743850
81 1.261256050 -0.31961354 -1.92349209 0.21117459 -0.779002792
-1.013717535 -0.33308730 0.23515749 -0.724479620
106 1.871464149 -1.79630610 1.32930640 1.43733071 -0.068806897
-0.159733178 0.42979924 -0.62563633 1.191127040
83 0.891329605 1.22537973 -0.28557370 0.15577119 0.892998850
2.164730288 0.32645213 -0.39979567 -0.021434523
49 -0.532287577 0.21251754 0.40744900 -0.71885221 -0.187153870
-1.026004138 -0.53237623 0.53031692 -0.444647890
84 0.189200113 -0.41762183 0.14778350 -0.33556999 -0.390946625
-0.620401859 0.98488909 1.50451101 0.082493100
2 -4.507722039 0.79337911 0.32537247 -0.88189929 0.667471882
-0.347294334 -1.42642651 -0.79554928 0.575099349
12 -2.559299942 -1.27396797 0.20474526 0.80642631 -0.004042045
-0.186703147 0.51127502 -0.25963121 0.187677230
63 -0.287453400 0.26156583 -0.40986326 -1.25036125 -1.423364565
-0.202852254 0.79018167 -0.63886720 0.047174546
76 1.480994659 -3.36349380 0.25348918 -0.65822859 0.148996581
-0.350996048 -1.01058178 -0.43598739 0.176518165
64 -0.321948992 -0.23336617 0.38748539 -0.33299747 0.727505355
0.100588291 0.52801637 0.78536416 0.477335420
6 -2.772699612 2.41663723 -2.12400435 -0.90120202 0.673744890
-0.146798470 0.78730762 -0.21931616 0.092135277
112 1.972053137 1.39016641 0.23119311 0.60527225 0.287183593
-0.301111265 -0.42204859 0.52089780 0.001253937
66 -0.353321133 1.17558171 -1.60269076 1.38917670 -1.275251779
-1.079899448 0.34694163 0.46392686 -0.065458868
40 -0.007755342 -1.99553315 0.11635294 -0.63451779 1.663459215
-0.729922279 0.04070034 -0.62939194 -0.401362458
5 -3.541907472 0.16585954 0.08260119 -0.13773256 -0.351946002
0.710840204 0.27709056 0.31223233 0.318475562
101 2.048128828 -0.33635000 0.56114383 -0.10020351 0.565190158
-0.721631950 0.16417903 0.68328872 0.423874528
42 -0.846685255 -0.47543723 -0.50086918 -0.33059155 -0.047331661
-0.291609731 -0.63200420 0.86188928 -0.450767490
90 1.138643885 0.38491797 -0.49094018 -0.49659793 0.105091809
-0.113708731 0.26078206 0.02571153 0.074811610
88 1.260231748 -0.83032901 -0.82606983 -0.06220042 0.578171839
-1.438804664 0.76110144 -0.62561266 -1.006836949
100 2.271042507 -0.33725326 -0.74611037 1.44801202 1.668999900
-0.427360241 -0.06882954 -0.17985696 -0.990858399
57 0.401578054 2.77001960 -1.24768086 -0.31799322 0.050959240
0.074576246 -0.58595121 0.08186123 0.348030787
107 0.772078319 -0.88968194 0.86519965 0.46182809 -2.101957279
-0.996125304 0.70142606 0.12464875 -0.701123702
69 -0.008323897 -0.10305567 0.31572422 0.14822662 0.628040366
-0.367842663 0.46885589 1.30680689 -0.348195001
39 -0.526287088 1.37907587 0.18330437 -0.07898025 1.075272067
-0.240870036 -0.48061858 0.68424081 -0.307784197
55 -0.207595867 -0.02074759 0.83070066 -0.41297174 1.186857084
0.252042930 0.20748003 0.51056671 -0.018579933
77 0.735376304 -0.39114223 0.04889009 -0.73471808 -0.077759076
0.076529550 0.02558234 0.29323608 0.161909320
35 -0.720637339 -0.24306534 -1.11159910 -0.13022905 -0.615558511
-0.189831757 0.39968238 0.46977247 0.084093883
32 -1.144902707 -0.97562473 -1.10151363 -0.37327057 -0.425825349
0.086791799 0.65548256 -0.59180478 0.171603808
21 -2.758318587 0.46682758 1.43243209 0.65761361 -0.956471194
0.293039364 -1.68203885 0.21102166 0.141232569
115 2.401229825 1.46053796 0.98008274 1.52895405 0.398857405
0.307123171 0.52652212 0.09235801 0.198218249
23 -1.754713810 -3.01875130 0.15774723 1.16610238 0.402686715
-0.554801206 -0.05671563 -0.74922042 0.092327090
80 1.013200471 1.09094544 0.34369280 0.08560943 0.711521783
-0.237232633 -0.14569711 1.20627509 -0.058196104
24 -1.279496218 -1.57791989 0.94250006 -0.33764795 0.574693155
0.397557526 -0.51727653 -1.17618725 0.021218466
102 0.642515909 1.32812629 0.43352747 -0.15050683 -1.629996134
-0.750002105 0.67471915 -0.63893989 0.307310976
114 1.755598380 0.39756700 -0.92124438 -0.51752569 -1.104261029
0.197707500 -0.29919271 -0.04416906 0.096124040
38 -1.164537493 0.17126356 1.28158091 -0.56829771 -0.228207111
0.490995322 0.70397049 -0.85327286 -0.216449577
60 -0.622813614 1.63176786 -3.55708396 0.85384111 -0.597485685
0.344701605 0.47891522 0.69572061 0.626886262
58 0.051458468 1.00112286 0.68469819 -0.48082098 1.547635852
-0.225262361 0.20376893 0.78697423 0.454759850
20 -1.215531332 -2.44646734 -1.12342878 0.64519139 -0.042543244
-0.340843386 -0.87978083 -0.59989457 0.496434353
10 -1.467012298 1.86811890 -0.84916058 0.52322533 1.387028349
-0.153163806 0.03732391 0.68563143 0.098397643
28 -1.047677406 0.21137100 -0.81199047 -0.35213745 0.338505145
0.666931661 0.83653052 0.11662753 -0.097582092
25 -1.283076519 1.13721523 -0.23243910 -1.50455111 1.473088612
0.495843808 -0.52194794 0.09622880 -0.305956314
118 1.643206187 0.98048678 3.46710663 1.71596316 -0.373121532
-0.378185308 -0.58482574 -0.54608576 -0.921961007
1 -5.483947181 -0.38000590 -2.03286666 0.68275155 0.256165453
-0.203723572 -1.57796760 -0.62153735 0.249850800
46 -1.467351120 -0.29013731 0.94497043 2.24443938 -0.702010620
0.329287014 0.08202280 0.55898065 -0.220853439
78 0.674466957 -1.22387752 -1.21786470 1.53652200 -0.210046594
-1.436119022 0.45792310 0.52474827 -0.059207843
54 0.161217761 -0.26968120 0.51918632 -0.54881420 0.716987276
-0.586950121 0.07394390 -0.02732637 -0.426958350
120 2.568748164 1.40492416 -0.78656913 -0.09964175 -0.794277719
2.139734395 -0.13709373 -0.59995760 0.695017295
111 2.609360380 -1.63888021 -1.78805903 1.27186219 0.239487496
0.361801187 0.35798171 0.44896020 -0.463189787
31 -1.183888865 -1.56224823 -1.61054035 0.03563184 -1.493000372
-0.189865302 0.71048761 -0.40123859 0.147728747
70 0.412961852 -0.86235040 -0.27868999 -0.40786942 -0.038512680
0.267470656 -0.52401648 0.16972850 0.095597922
87 1.042489353 -1.12252001 0.38033555 -0.79697724 -0.034956678
-0.386862378 0.24739559 -0.10777837 0.274224748
86 0.536877066 -0.69918894 -1.12151938 -0.56950821 -0.614869132
0.082938370 0.73014763 0.32549345 0.202854668
8 -2.544029211 0.49915728 -0.88907084 0.62985078 0.008620090
0.439057787 0.43367804 -0.76117620 -0.421587730
73 0.267230669 -2.86299981 0.98185415 2.01685635 0.406826839
1.081363182 -0.09977740 0.26883334 -0.171335339
15 -1.990955910 1.50543191 0.20446140 -1.14666413 -0.023807662
0.555196673 -0.52894820 0.49837569 -0.216584830
65 0.443278799 1.42619945 -1.19160511 -0.88273251 -0.045033831
-1.512852917 -0.12294745 -0.77609754 0.420901165
16 -1.384396179 -1.91902608 -1.48318993 0.74973072 -0.238313231
-0.039167200 0.04727699 0.06926093 -0.110680258
3 -3.181645976 -2.11489513 -0.36974096 0.59524338 -0.205698654
0.485636136 0.05223947 0.19587062 0.021345087
14 -1.072422440 -0.41092600 -0.50740211 0.26048109 1.470230750
-0.526860964 1.45381529 -0.15249841 1.190690850
37 -1.241649479 -2.25369128 -0.14017274 0.98390838 -0.773494981
0.544919691 -0.24321334 1.46327686 0.422908381
105 1.721064530 1.56231372 0.28509022 0.99638352 0.170937278
0.925669770 -0.08229365 -1.38484385 0.519580152
74 -0.319860086 -0.13199466 1.86297062 0.04278256 0.054093143
-0.562842921 -0.43550119 -0.06852019 0.334791535
9 -2.394396665 -1.58117371 1.07027136 -0.20405550 -0.308656835
-0.729606350 0.52656837 -0.53526280 -0.870698323
56 -0.214835970 0.09984063 0.63981724 -0.71409261 0.143803707
0.500938575 0.59449764 0.45654346 0.092291461
59 0.207970423 0.17967187 -0.30511543 0.63273772 0.924728854
0.731067104 -0.27354003 0.26099723 -0.026658052
109 2.394909716 0.43407209 -0.75698464 0.60241963 0.062077105
-0.019823476 -0.71447951 0.06825558 -0.230413207
62 0.262180202 3.49280637 -0.92758183 0.67301684 0.331147509
-1.032089614 -0.76334152 0.29132380 0.251596120
18 -1.419925893 0.87037473 -0.21910381 -0.30480256 0.615507835
0.005185900 0.48681602 -0.02711868 0.294419112
29 -1.355759553 -2.61252364 0.01388153 0.45859534 -0.251864464
0.833571717 0.63356730 -0.12141889 0.392001637
121 5.196243840 -3.56591437 -1.75382945 -1.45208351 0.141618920
2.503758913 -0.22292219 0.39509276 -0.864462946
11 -1.697995808 -1.11465257 -1.95721915 0.83442573 0.570294212
-0.064437364 0.28861069 0.33999765 -0.055193131
17 -1.997000132 0.77714525 0.33282492 1.37299242 1.039969110
0.898340673 -0.59030232 -0.73996335 -0.166746273
75 0.271317944 -1.27970720 -0.55929949 -0.38743149 -0.857783736
1.133443248 -0.39603850 0.52724981 0.262486964
41 -1.163287353 0.54752250 1.19310211 -0.02187968 -0.061729440
0.191798229 0.41147133 0.23550747 -0.541975034
52 -0.930667990 -2.64687641 1.34602938 0.29137886 -0.405551981
1.234438983 -0.53701230 -0.26390768 0.050742016
27 -1.299161372 -1.81795491 -0.46035060 -0.55808955 -0.322007622
0.437960550 0.20783895 0.38065904 0.169746951
116 3.202849762 -1.70343720 -0.22002171 -0.63661099 0.600820712
-1.179192718 -0.79187458 -0.77200128 0.784968605
97 1.774631899 -0.51637932 0.78840024 -1.19080732 0.879304192
0.189310119 -0.18106901 0.79626167 0.067810667
89 1.570931671 -1.89632036 1.32645981 -0.61696298 0.318665075
-0.150874880 0.22663776 0.28055443 -0.067021796
122 2.946907774 1.36078335 -0.23497558 0.47610565 -1.195767250
0.226958651 -0.14221050 -0.01883230 0.069713587
93 1.199670376 0.05734835 0.05645280 -0.12258702 -0.057078959
-0.147360460 -0.19343727 -0.63511543 0.380453709
82 1.065724619 -0.41923223 -0.56700151 -1.26014274 0.133004275
-0.075796414 -0.48450876 0.24456233 0.408295749
95 0.928931769 2.93619964 -0.96112417 2.27865624 0.426478496
0.597610711 0.20496195 -0.82173292 -0.531069257
96 0.654281759 -0.84482704 1.55368301 0.14706680 -0.272804166
-0.099278338 -1.86088984 0.57933048 0.463090666
48 -0.672632845 0.47931974 0.10911735 0.19617431 -0.035195748
-0.310797882 0.76153793 -0.14609291 -0.452891671
104 1.583389704 1.76678141 -0.76128616 -0.51082243 -0.615327685
-1.030489701 -0.45389843 0.64321189 0.292566945
103 0.496704699 1.38668172 1.25504632 0.81586965 -1.208608957
0.244045345 0.08800508 0.21997106 -0.003371502
22 -1.923364229 -0.68277483 1.13944735 -1.05076413 -0.156766892
0.775291686 -0.26779694 -0.17584842 -0.102546068
51 -0.513040339 0.99473431 2.56019059 -0.47427334 -0.397540165
-0.292456280 1.55488883 -0.35552381 -0.329287155
71 1.007769516 0.68034839 -2.05169994 1.06190409 0.753654673
0.046361792 -0.08297287 -0.75570363 0.180244260
108 2.220086329 0.97215921 0.04810253 0.59225055 0.471639205
-0.134670696 0.34149609 -0.83156236 0.652103105
47 -0.192662589 1.24494211 0.61901346 -0.33024454 0.285522026
-0.555834158 -0.55375944 -0.52297870 -0.439221495
30 -0.956353794 -0.76671649 0.78353376 -1.28364862 0.360582327
0.173900272 0.83664989 0.41953355 0.120271810
79 0.144092587 0.91085002 -0.75579364 -0.88279663 -1.199755585
-0.146701739 0.22999002 0.01279351 0.315666694
94 1.753740666 -0.08724159 1.19544752 -0.01835392 1.177834778
-0.529206538 -0.97253640 0.70077653 -0.203847371
[ reached getOption("max.print") -- omitted 6 rows ]
> plot(unipc[,1],unipc[,2],type = "n")
> points(x=unipc[,1],
y=unipc[,2],pch=ifelse(my.data$Guardian.score.100>=70,16,1),col=ifelse(my.
data$Guardian.score.100>=70,"green","red"))
> #(i)calculate the score on the first component and plot this against the
Gardian Score
> my_data=my.data[,4:12]
> head(my_data)
Satisfied.with.Teaching Satisfied.with.course Continuation
Expenditure.per.student.FTE Student.staff.ratio
68 81 86 92
2 18.0
119 77 86 78
2 22.0
72 81 86 91
4 21.5
113 78 84 87
3 24.0
117 77 76 85
3 25.9
36 89 92 95
5 29.7
Career.prospects Value.added.score Average.Entry.Tariff
Satisfied.with.Assessment
68 69 3 135
74
119 69 1 94
71
72 72 5 141
65
113 55 1 124
63
117 62 4 109
65
36 76 7 143
74
> pca_scores=predict(pcauni)
> head(round(pca_scores,2))
PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9
68 0.11 1.11 -0.52 -1.50 -0.48 -0.39 -0.60 -0.18 -0.09
119 2.33 1.86 -0.52 -0.59 -0.58 0.83 0.27 -1.22 -0.73
72 -0.16 0.15 0.37 -0.62 -0.64 0.52 -0.20 0.07 0.08
113 2.07 0.67 0.03 -0.54 -1.76 0.32 -0.65 0.15 0.01
117 2.19 0.35 1.00 -0.13 -0.49 -0.04 0.16 -0.34 0.28
36 -1.84 1.48 1.82 0.30 -1.32 -0.02 0.35 0.21 0.14
> plot(my.data[,("Guardian.score.100")],pca_scores[,("PC1")],
+ xlab="Guardian score",ylab = "PCA First Component(PC1)",
+ main="PCA fist component vs Guardian Score")
> cor(my.data[,3],pca_scores[,1])
[1] -0.9562752
> #(j)Plot the university names on the scatter plot of the second and
third principal components
> library(lattice)
> par(mfrow=c(1,1))
> plot(unipc[,2:3],type="n")
> text(unipc[,2],unipc[,3],labels=my.data$Name.Institution,cex=0.7)