Individual Variable Data Analysis: Warning
Individual Variable Data Analysis: Warning
Individual Variable Data Analysis: Warning
Venkata Yadati
Warning
Warning: You must have the “PPforest” package and “effects” package installed for this
assignment. You can install these packages by running the following code in base R (not
RStudio or RMarkdown).
install.packages(“PPforest”)
install.packages(“effects”)
table(private)
## private
## No Yes
## 212 565
There are 212 public universities and 565 private univeristies in the set of data, making the
data set filled with 777 colleges.
Variable: Apps
apps <- college$Apps
quantile(apps)
mean(apps)
## [1] 3001.638
var(apps)
## [1] 14978460
sd(apps)
## [1] 3870.201
boxplot(apps)
hist(apps)
Variable: Accept
accept <- college$Accept
quantile(accept)
mean(accept)
## [1] 2018.804
var(accept)
## [1] 6007960
sd(accept)
## [1] 2451.114
boxplot(accept)
hist(accept)
Variable: Enroll
enroll <- college$Enroll
quantile(enroll)
mean(enroll)
## [1] 779.973
var(enroll)
## [1] 863368.4
sd(enroll)
## [1] 929.1762
boxplot(enroll)
hist(enroll)
Variable: Top10perc
top10perc <- college$Top10perc
quantile(top10perc)
mean(top10perc)
## [1] 27.55856
var(top10perc)
## [1] 311.1825
sd(top10perc)
## [1] 17.64036
boxplot(top10perc)
hist(top10perc)
Variable: Top25perc
top25perc <- college$Top25perc
quantile(top25perc)
mean(top25perc)
## [1] 55.79665
var(top25perc)
## [1] 392.2292
sd(top25perc)
## [1] 19.80478
boxplot(top25perc)
hist(top25perc)
Variable: F.Undergrad
fundergrad <- college$F.Undergrad
quantile(fundergrad)
mean(fundergrad)
## [1] 3699.907
var(fundergrad)
## [1] 23526579
sd(fundergrad)
## [1] 4850.421
boxplot(fundergrad)
hist(fundergrad)
Variable: P.Undergrad
pundergrad <- college$P.Undergrad
quantile(pundergrad)
mean(pundergrad)
## [1] 855.2986
var(pundergrad)
## [1] 2317799
sd(pundergrad)
## [1] 1522.432
boxplot(pundergrad)
hist(pundergrad)
Variable: Outstate
outstate <- college$Outstate
quantile(outstate)
mean(outstate)
## [1] 10440.67
var(outstate)
## [1] 16184662
sd(outstate)
## [1] 4023.016
boxplot(outstate)
hist(outstate)
Variable: Room.Board
roomboard <- college$Room.Board
quantile(roomboard)
mean(roomboard)
## [1] 4357.526
var(roomboard)
## [1] 1202743
sd(roomboard)
## [1] 1096.696
boxplot(roomboard)
hist(roomboard)
Variable: Books
books <- college$Books
quantile(books)
mean(books)
## [1] 549.381
var(books)
## [1] 27259.78
sd(books)
## [1] 165.1054
boxplot(books)
hist(books)
Variable: Personal
personal <- college$Personal
quantile(personal)
mean(personal)
## [1] 1340.642
var(personal)
## [1] 458425.8
sd(personal)
## [1] 677.0715
boxplot(personal)
hist(personal)
Variable: PhD
phd <- college$PhD
quantile(phd)
mean(phd)
## [1] 72.66023
var(phd)
## [1] 266.6086
sd(phd)
## [1] 16.32815
boxplot(phd)
hist(phd)
Variable: Terminal
terminal <- college$Terminal
quantile(terminal)
mean(terminal)
## [1] 79.7027
var(terminal)
## [1] 216.7478
sd(terminal)
## [1] 14.72236
boxplot(terminal)
hist(terminal)
Variable: S.F.Ratio
sfratio <- college$S.F.Ratio
quantile(sfratio)
mean(sfratio)
## [1] 14.0897
var(sfratio)
## [1] 15.66853
sd(sfratio)
## [1] 3.958349
boxplot(sfratio)
hist(sfratio)
Variable: perc.alumni
percalumni <- college$perc.alumni
quantile(percalumni)
mean(percalumni)
## [1] 22.74389
var(percalumni)
## [1] 153.5567
sd(percalumni)
## [1] 12.3918
boxplot(percalumni)
hist(percalumni)
Variable: Expend
expend <- college$Expend
quantile(expend)
mean(expend)
## [1] 9660.171
var(expend)
## [1] 27266866
sd(expend)
## [1] 5221.768
boxplot(expend)
hist(expend)
Variable: Grad.Rate
gradrate <- college$Grad.Rate
quantile(gradrate)
mean(gradrate)
## [1] 65.46332
var(gradrate)
## [1] 295.0737
sd(gradrate)
## [1] 17.17771
boxplot(gradrate)
hist(gradrate)
MultiVariate Data Analysis
Acceptance Rate vs. GradRate
college$acceptrate <- college$Accept/college$Apps
mean(college$acceptrate)
## [1] 0.7469277
##
## Call:
## lm(formula = college$Grad.Rate ~ college$acceptrate, data = college)
##
## Residuals:
## Min 1Q Median 3Q Max
## -58.491 -10.806 0.968 12.496 57.411
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 90.493 3.059 29.58 < 2e-16 ***
## college$acceptrate -33.510 4.018 -8.34 3.39e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.47 on 775 degrees of freedom
## Multiple R-squared: 0.08235, Adjusted R-squared: 0.08117
## F-statistic: 69.55 on 1 and 775 DF, p-value: 3.39e-16
plot(college$acceptrate,college$Grad.Rate)
Room.Board vs. Books
model2 <- lm(college$Books ~ college$Room.Board, data = college)
summary(model2)
##
## Call:
## lm(formula = college$Books ~ college$Room.Board, data = college)
##
## Residuals:
## Min 1Q Median 3Q Max
## -418.37 -84.38 -23.92 63.17 1746.59
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.654e+02 2.410e+01 19.313 < 2e-16 ***
## college$Room.Board 1.927e-02 5.363e-03 3.592 0.000349 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 163.9 on 775 degrees of freedom
## Multiple R-squared: 0.01637, Adjusted R-squared: 0.01511
## F-statistic: 12.9 on 1 and 775 DF, p-value: 0.000349
plot(college$Books,college$Room.Board)
perc.alumni vs. Room.Board
model3 <- lm(college$Room.Board ~ college$perc.alumni, data = college)
summary(model3)
##
## Call:
## lm(formula = college$Room.Board ~ college$perc.alumni, data = college)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2386.8 -768.5 -169.1 728.3 3519.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3809.293 79.215 48.09 < 2e-16 ***
## college$perc.alumni 24.105 3.059 7.88 1.11e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1056 on 775 degrees of freedom
## Multiple R-squared: 0.07418, Adjusted R-squared: 0.07299
## F-statistic: 62.1 on 1 and 775 DF, p-value: 1.107e-14
plot(college$Room.Board,college$perc.alumni)