Introduction to Environment
Objectives: To learn and Use R programming for statistical analysis of the data:
• Basic Computations in R
• Computing descriptive statistics of the data
• Summarize samples and tables
Open Interface of RStudio
1. R Console: This area shows the output of code you run. Also, you can directly write
codes in console.
2. R Script: As the name suggest, here you get space to write codes. To run those codes,
simply select the line(s) of code and press Ctrl + Enter.
3. R environment: This space displays the set of external elements added. To check if data
has been loaded properly in R, always look at this area.
4. Graphical Output: This space display the graphs created during exploratory data
analysis.
Environment
R Script Editor History
Files, Plots
R Console
Packages
Help, Viewer
Key Points
# Use (Ctrl + L) to clear the R Console
# '#' is used for comments
# Use double backward slash "\\" or single forward slash "/" in commands
# Commands in R are case sensitive so its better to use either lower case always in file
and folder names
# R is insensitive to white spaces
Instructor: Dr. Prabhat Mittal M. Sc., M.Phil, Ph.D. (FMS, DU) 1|P a g e
Post-doctoral, University of Minnesota, USA
URL: http://people.du.ac.in/~pmittal/
# Loading of corresponding libraries is a pre-requisite to use the functions/arguments
provided in that particular library. These libraries are useful, although there are many.
Before starting your work you need to load the libraries first
# library(ggplot2)
# library(corrplot)
# library(psych)
# library(data.table)
# library(curl)
# library(agricolae)
# libraries can be installed both online (using the Tools-Install Packages-From
CRAN) and Offline using the downloaded zipped folders of the packages (using the
Tools-Install Packages-From .zip/.tar/.gz files).
Writing Programs in R Script Editor
R Script Editor (Write codes) R Console (Output of the code)
‘Ctrl + Enter’ to Run
setwd("D:\\Learning R") > setwd("D:\\Learning R\\Day-1")
> getwd()
getwd() [1] "D:/Learning R/Day-1"
x<-rnorm(10) > x<-rnorm(10)
P1 > x
x [1] 1.01622536 1.09003432 -0.87021942 -
mean(x) 1.06429331 -0.89391671 -0.06187585
[7] 1.45932555 -2.02050180 -0.13619066 -
sd(x) 0.90256998
min(x) > mean(x)
[1] -0.2383983
max(x) > sd(x)
head(x) [1] 1.125004
> min(x)
tail(x) [1] -2.020502
> max(x)
[1] 1.459326
> head(x)
[1] 1.01622536 1.09003432 -0.87021942 -
1.06429331 -0.89391671 -0.06187585
> tail(x)
[1] -0.89391671 -0.06187585 1.45932555 -
2.02050180 -0.13619066 -0.90256998
P2 A<-1 > A<-1
> B<-3
B<-3 > C<-1
C<-1 > my.vector<-c((-B-sqrt(B^2-
my.vector<-c((-B-sqrt(B^2- 4*A*C))/(2*A),(-B+sqrt(B^2-
4*A*C))/(2*A))
4*A*C))/(2*A),(-B+sqrt(B^2- > my.vector
4*A*C))/(2*A)) [1] -2.618034 -0.381966
my.vector
x<-rnorm(10,mean=32,sd=2) > x<-rnorm(10,mean=32,sd=2)
P3 > x
x [1] 32.74604 32.96040 34.32075 32.32031
mean(x) 32.43983 31.95673 29.80607 34.77096
[9] 31.33791 31.18291
sd(x) > mean(x)
hist(x, axis=FALSE) [1] 32.38419
my.vector > sd(x)
[1] 1.463795
> hist(x, axis=FALSE)
Instructor: Dr. Prabhat Mittal M. Sc., M.Phil, Ph.D. (FMS, DU) 2|P a g e
Post-doctoral, University of Minnesota, USA
URL: http://people.du.ac.in/~pmittal/
Try the following R Script
setwd("D:\\Learning R")
getwd()
a = sqrt(729); a
b = 1947.0; b
b = as.character(b); b
vec = c(1:6); vec
c = c("Guru", "vignesh","durai","mittal","rishi","ramakar","aswini"); c
length(c)
c[c(1, 2)]
sort(c) #sorting method1
c[order(c)] #sorting method2
sort(c, decreasing = TRUE) #reverse sort
c[rev(order(c))] #reverse sort
d = c(4,6,5,7,10,9,4,15); d>7
p=c(3, 5, 6, 8)
q=c(3,3,3); p+q
age <- c(22, 25, 18, 20)
name <- c("Amit", "Rahul", "Neeti", "Komal")
gender <- c("M", "M", "F", "F")
details=data.frame(Age=age,Name=name,Gender=gender);details
z=0:9
digit=as.character(z)
as.integer(digit)
x=c(2,4,6,8)
y=c(TRUE,TRUE,FALSE,TRUE)
sum(x[y])
x1=c(4,6,5,7,10,9,4,15)
y1=c(0,10,1,8,2,3,4,1)
sum(x1*y1)
rbind(x1,y1)
a1=matrix(c(1:36),nrow = 6,ncol = 6); a1
a1[2,3]
a1[-2,-3]
a1[-2,3]
a1[,3]
a1[2,]
a1[2,-3]
a1[2,3]=14
a1[,3]=c(10,20,30,40,50,60)
a1[2,]=c(100,200,300,400,500,600); a1
Instructor: Dr. Prabhat Mittal M. Sc., M.Phil, Ph.D. (FMS, DU) 3|P a g e
Post-doctoral, University of Minnesota, USA
URL: http://people.du.ac.in/~pmittal/
fname="Prabhat"
lname="Mittal"
name=cat(fname,lname)
Names=c("Prabhat","Rahul","Amit")
designation=c("Professor","Project head","marketing head")
nd=data.frame(Names,designation);nd
init=c(2L,33,3+2i,TRUE,"String");
typeof(init);
init=c(2L,33,3+2i,TRUE);
typeof(init);
init=c(2L,33,TRUE);
typeof(init);
init=c(2L,TRUE);
typeof(init);
init=c(TRUE);
typeof(init);
#typeof -> order (character,complex,double,integer,logical)
#class -> order (character,complex,numeric,integer,logical)
int=c(2L,33,3+2i,TRUE,"String");
class(int);
int=c(2L,33,3+2i,TRUE);
class(int);
int=c(2L,33,TRUE);
class(int);
int=c(2L,TRUE);
class(int);
int=c(TRUE);
typeof(int);
x=c(20,30,15,NA,50,60,NA,70)
a=x[!is.na(x)]
na.omit(x)
b=seq(from = 0,to = 100,by = 2);b
c = seq(from = -10,to = 10,length.out = 20); c
d=seq(-10,20,by = 0.12);d
Instructor: Dr. Prabhat Mittal M. Sc., M.Phil, Ph.D. (FMS, DU) 4|P a g e
Post-doctoral, University of Minnesota, USA
URL: http://people.du.ac.in/~pmittal/
P4.R -Computing Descriptive Statistics
setwd("D:/………….") # Enter the complete path/link to working folder
getwd()
library(ggplot2)
library(corrplot)
library(psych)
library(data.table)
library(curl)
library(agricolae)
library(car)
library(ggpubr)
library(gridExtra)
# Reading inbuilt data from R like mtcars, platgrowth
data("mtcars")
data("PlantGrowth")
#Import csv file from your PC
library(readr)
datafile <- read_csv("D:/…….") # Enter the complete path/link to csv file (datafile)
datafile
#deleting columns sn from the file
datafile$sn <-NULL
datafile
#renaming column treatment to group
colnames(datafile)<- c("Weight","Height","Age","Gender","Group")
datafile
#renaming column 1 individually weight to Wgt
#colnames(Data_AI)[1]<-"Wgt"
#labeling factors of gender to Male & Female
datafile$Gender<- factor(datafile$Gender,levels = c(1,2),labels=c("Male","Female"))
datafile
#to avoid missing NA values if any
datafile$Gender<-na.omit(datafile$Gender)
datafile$Group<-na.omit(datafile$Group)
#to avoid using datafile as prefix to variables
Instructor: Dr. Prabhat Mittal M. Sc., M.Phil, Ph.D. (FMS, DU) 5|P a g e
Post-doctoral, University of Minnesota, USA
URL: http://people.du.ac.in/~pmittal/
attach(datafile)
#Summary statistics of all variables
summary(datafile)
sapply(datafile,mean)
#sapply command an alternative to summary will not calculate
#for Gender and Group because it is not numeric
#Summary statistics Grouped by
tapply(Weight, Group, summary)
describeBy(datafile, Group, mat = TRUE)
#Summary statistics of individual variables
summary(Weight)
summary(Height)
mean(Weight)
sd(Weight)
P5.R –Creating Graph
#Individual ggplot
ggplot(datafile, aes(x = Group, y = Weight)) + geom_boxplot()
#Grouping ggplots
p1=ggplot(datafile, aes(x = Group, y = Weight)) + geom_boxplot()
p2=ggplot(datafile, aes(x = Group, y = Height)) + geom_boxplot()
grid.arrange(p1,p2,ncol=2,nrow=1)
#Scatter plot in pairs
pairs(~Weight+Age+Height)
pairs(~Weight+Age+Height,col="red",pch="+")
#Scatter plot individuals
scatterplot(Weight~Height, xlab='Height',ylab = 'Weight')
scatterplot(Height~Age, xlab='Age',ylab = 'Height')
scatterplot(Weight~Age, xlab='Age',ylab = 'Weight')
#Histograms
par(mfrow=c(1,3))
p4=hist(Weight,col="red")
p5=hist(Age, col="green")
p6=hist(Height,col="light green")
Instructor: Dr. Prabhat Mittal M. Sc., M.Phil, Ph.D. (FMS, DU) 6|P a g e
Post-doctoral, University of Minnesota, USA
URL: http://people.du.ac.in/~pmittal/