Basic R Programming

Basic R Programming
Silvia (Shuchang Liu)

Feb 3rd, 2014
1 Why R?
• R is high-level programming language and popular among biostatistician
• R is free open source and everyone can contribute to R (e.g. publish paper to develop new R packages)
• R can install many ready-made packages
2 Resources and installation

1. R and RStudio installation
• R can be installed on Windows, Linux and Mac (http://www.r-project.org/)
• RStudio is a nice IDE for R with GUI interface like MATLAB (http://www.rstudio.com/)
2. Package installation
• To install general R packages

To install a package in the first time
> install.packages("packageName")
Load the package before using it
> library(packageName)
• To install Bioconductor packages
Bioconductor is an open source software for bioinformatics
> source("http://bioconductor.org/biocLite.R")
> biocLite("packageName")
> library(packageName)
3 Basic R file formats

file description R file MATLAB file
script file fileName.r fileName.m
workspace file fileName.rdata fileName.mat
1
4 Some examples
> # hi, I am the comment
> print("Hello world!")
[1] "Hello world!"
> a = 1 # semicolon is optional here

> b <- 2.3 # var <- value, to assign the value to var
> c <- 3e2
> d <- a + b +c # no output
> a + b + c # show out the output
[1] 303.3
> a == b # a equals to b?
[1] FALSE
> a != b # a not equals to b?
[1] TRUE
> 5/4
[1] 1.25
> 1/0
[1] Inf
> cos(pi)
[1] -1
> max(4,5)
[1] 5
> log(9, base=3) # parameter for the function
[1] 2
> log(9) # o.w. by default, base=exp(1)
[1] 2.197225
> ls() # list all the variables
[1] "a" "b" "c" "d"
> exists("b") # Does "b" exist?
[1] TRUE
> rm(list=ls()) # remove all the variables in the workspace
2
5 Data Structures
5.1 Vectors
Creat a vector
> v1 <- -2:5
> v1
[1] -2 -1 0 1 2 3 4 5
> v2 <- c(3, 2, 7.2, 0.9, 100)

> v2
[1] 3.0 2.0 7.2 0.9 100.0
> v3 <- c("aa","B","c2")

> v3
[1] "aa" "B" "c2"
> seq(from=4, to=7, by=0.5)
[1] 4.0 4.5 5.0 5.5 6.0 6.5 7.0
> rep(v2, each=2)
[1] 3.0 3.0 2.0 2.0 7.2 7.2 0.9 0.9 100.0 100.0
> rep(v2, times=2)
[1] 3.0 2.0 7.2 0.9 100.0 3.0 2.0 7.2 0.9 100.0
> rep(v2, times=1:5)
[1] 3.0 2.0 2.0 7.2 7.2 7.2 0.9 0.9 0.9 0.9 100.0 100.0
[13] 100.0 100.0 100.0
> sample(1:10)
[1] 10 8 3 7 2 9 4 6 5 1
Reference elements
> v2
[1] 3.0 2.0 7.2 0.9 100.0
> v2[3]
[1] 7.2
> v2[c(2,4)]
3
[1] 2.0 0.9
> v2[-c(2,4)]
[1] 3.0 7.2 100.0
> v2[v2>5]
[1] 7.2 100.0
> v2[2]=88
Vector operations
> v2
[1] 3.0 88.0 7.2 0.9 100.0
> which(v2>=3)
[1] 1 2 3 5
> which.min(v2)
[1] 4
> length(v2)
[1] 5
> v2*10+1
[1] 31 881 73 10 1001
> (v2*10+1)[2:4][2]
[1] 73
5.2 Matrices
> d <- sample(1:20)
> d
[1] 3 17 2 16 14 12 4 8 15 20 13 7 6 19 1 10 9 11 18 5
> # creat a matrix

> m1 <- matrix(data=d,nrow=4,ncol=5,byrow=TRUE,
+ dimnames=list(rows=c("cat","dog","rat","fish"),
+ cols=c("a","b","c","d","e")))
> m1
4
cols
rows a b c d e
cat 3 17 2 16 14
dog 12 4 8 15 20
rat 13 7 6 19 1
fish 10 9 11 18 5
> # reference matrix

> m1[2,4]
[1] 15
> m1["dog","d"] # reference by name
[1] 15
> m1[c(2,4),3]
dog fish
8 11
> m1[c("dog","fish"),"c"]
dog fish
8 11
> m1[3,]
a b c d e
13 7 6 19 1
> m1["rat",]
a b c d e
13 7 6 19 1
> # matrix dimension

> dim(m1) # matrix dimension
[1] 4 5
> nrow(m1) # number of rows
[1] 4
> ncol(m1) # number of column
[1] 5
> length(m1) # element length
[1] 20
5
> colnames(m1) # column name
[1] "a" "b" "c" "d" "e"
> rownames(m1)[4] <- "elephant" # change row names

> m2 <- matrix(c(111,222,333,444),2,2)
> m2
[,1] [,2]
[1,] 111 333
[2,] 222 444
> m1[2:3,c(3,5)] <- m2

> m1
cols
rows a b c d e
cat 3 17 2 16 14
dog 12 4 111 15 333
rat 13 7 222 19 444
elephant 10 9 11 18 5
> # matrix operation

> m3 <- matrix(1:6,2,3)
> m3
[,1] [,2] [,3]

[1,] 1 3 5
[2,] 2 4 6
> m4 <- matrix(11:16,2,3,byrow=TRUE)

> m4
[,1] [,2] [,3]

[1,] 11 12 13
[2,] 14 15 16
> m5 <- matrix(21:26,3,2)

> m5
[,1] [,2]
[1,] 21 24
[2,] 22 25
[3,] 23 26
> m3*m4 # element-wise manipulation
[,1] [,2] [,3]

[1,] 11 36 65
[2,] 28 60 96
6
> m3%*%m5 # matrix manipulation, if m3%*%m4, it will throw out error message
[,1] [,2]
[1,] 202 229
[2,] 268 304
> # several useful functions
> t(m1) # transpose
rows
cols cat dog rat elephant
a 3 12 13 10
b 17 4 7 9
c 2 111 222 11
d 16 15 19 18
e 14 333 444 5
> apply(m1,1,sum) # sum of each row
cat dog rat elephant
52 475 705 53
> apply(m1,1,sqrt) # pay attention to the output dimension
rows
cat dog rat elephant
a 1.732051 3.464102 3.605551 3.162278
b 4.123106 2.000000 2.645751 3.000000
c 1.414214 10.535654 14.899664 3.316625
d 4.000000 3.872983 4.358899 4.242641
e 3.741657 18.248288 21.071308 2.236068
> apply(m1,2, function(x) return(sum(x^2+1)))
a b c d e
426 439 61734 1170 308250
> rbind(m3,m4) # row combination
[,1] [,2] [,3]
[1,] 1 3 5
[2,] 2 4 6
[3,] 11 12 13
[4,] 14 15 16
> cbind(m3,m4) # column combination
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 1 3 5 11 12 13
[2,] 2 4 6 14 15 16
> as.vector(m3)
[1] 1 2 3 4 5 6
7
5.3 Arrays
An array is a muti-dimensional matrix.
> # generate an array var
> a <- array(1:24,dim=c(4,3,2),
+ dimnames=list(c("a","b","c","d"),c("x","y","z"),c("old","new") ))
> a
, , old
x y z
a 1 5 9
b 2 6 10
c 3 7 11
d 4 8 12
, , new
x y z
a 13 17 21
b 14 18 22
c 15 19 23
d 16 20 24
> # reference elements
> a[2,1,"new"] # pay attention to the dimension change
[1] 14
> a[-2,"x",]
old new
a 1 13
c 3 15
d 4 16
> # operation
> dim(a)
[1] 4 3 2
> apply(a,3,mean)
old new
6.5 18.5
5.4 Lists
> # generate a list var
> l1 <- list(name=c("Peter","Lily","Emma"),c("yes","no"),
+ age=c(20,40,33,rep(18,times=3)),
+ value=matrix(1:6,2,3))
> l1
8
$name
[1] "Peter" "Lily" "Emma"
[[2]]
[1] "yes" "no"
$age
[1] 20 40 33 18 18 18
$value
[,1] [,2] [,3]
[1,] 1 3 5
[2,] 2 4 6
> # reference elements

> l1[[2]]
[1] "yes" "no"
> l1[["age"]]
[1] 20 40 33 18 18 18
> l1$name
[1] "Peter" "Lily" "Emma"
> l1[["value"]][2,]
[1] 2 4 6
> # list operation

> names(l1)
[1] "name" "" "age" "value"
> length(l1)
[1] 4
> unlist(l1) #produce a vector of all the elements
name1 name2 name3 age1 age2 age3 age4 age5

"Peter" "Lily" "Emma" "yes" "no" "20" "40" "33" "18" "18"
age6 value1 value2 value3 value4 value5 value6
"18" "1" "2" "3" "4" "5" "6"
> l2 <- list(matrix(2,4,5),matrix(1:10,2,5),diag(3))

> l2
9
[[1]]
[,1] [,2] [,3] [,4] [,5]
[1,] 2 2 2 2 2
[2,] 2 2 2 2 2
[3,] 2 2 2 2 2
[4,] 2 2 2 2 2
[[2]]
[,1] [,2] [,3] [,4] [,5]
[1,] 1 3 5 7 9
[2,] 2 4 6 8 10
[[3]]
[,1] [,2] [,3]
[1,] 1 0 0
[2,] 0 1 0
[3,] 0 0 1
> lapply(l2,sum) # return a list
[[1]]
[1] 40
[[2]]
[1] 55
[[3]]
[1] 3
> sapply(l2,dim) # return a vector or matrix
[,1] [,2] [,3]

[1,] 4 2 3
[2,] 5 5 3
5.5 Dataframes
> # generate a dataframe
> ID <- 100:103
> name <- c("Peter","Lily","Emma","Joe")
> sex <- c("M","F","F","M")
> age <- c(22,30,16,44)
> married <- c(F,T,F,T)
> d1 <- data.frame(ID,name,sex,age,married)
> rownames(d1) <-name
> d1
ID name sex age married

Peter 100 Peter M 22 FALSE
10
Lily 101 Lily F 30 TRUE
Emma 102 Emma F 16 FALSE
Joe 103 Joe M 44 TRUE
> # reference elements, just like a matrix

> d1["Emma","age"]
[1] 16
> d1[d1$ID==103,]

> # operation
> d2 <- d1[order(d1[,"age"]),]
> d2

6 Control Structures
6.1 if()...else
if(condition){
expression 1, if TRUE
}else{
expression 2, if FALSE
}
> passExam <- TRUE

> if(passExam==TRUE){
+ print("Congratulations!")
+ }else{
+ print("Try it again!")
+ }
[1] "Congratulations!"
6.2 ifelse()
ifelse(condition, TRUE expression, FALSE expression)
> x <- matrix(sample(1:6),2,3)

> x
11
[,1] [,2] [,3]
[1,] 2 6 1
[2,] 5 3 4
> y <- ifelse(x>3,1,0)
> y
[,1] [,2] [,3]
[1,] 0 1 0
[2,] 1 0 1
6.3 switch()
switch (statement, list)
> grade="D"
> switch(grade, A="Great job!", B="Not bad", C="So-so", D="Donnot cry! I trust you!")
[1] "Donnot cry! I trust you!"
6.4 for loops

for (name in vector){
statement
}
> sum(1:10)
[1] 55
> a <- 0
> for (i in 1:10){
+ a <- a+i
+ }
> a
[1] 55
6.5 repeat loops

repeat{
expression
if(condition) break
}
> a <- 0
> i <- 0
> repeat{
+ i <- i+1
+ a <- a+i
+ if (i>=10) break
+ }
> a
12
[1] 55
6.6 while loops

while(condition){
expression
}
> a <- 0
> i <- 0
> while(i<10){
+ i <- i+1
+ a <- a+i
+ }
> a
[1] 55
7 Functions
functionName <- function(arguments){
body
}
> # example 1:
> my.func <- function(grade){
+ switch(grade, A="Great job!", B="Not bad", C="So-so", D="Fail again?... OK... Just cry!")
+ }
> my.func("D")
[1] "Fail again?... OK... Just cry!"
> # example 2: a function to calculate the factorial

> my.fac <- function(x){
+ if(x==1){
+ return(x)
+ }else{
+ return(x*my.fac(x-1))
+ }
+ }
> my.fac(5)
[1] 120
> factorial(5)
[1] 120
13
> # example 3: a function with mutiple arguments
> my.func2 <- function(x,y){
+ len=length(y)
+ return(x[1:len])
+ }
> my.func2(c(4,3,2,1),c(1,2))
[1] 4 3
8 Read and write files

8.1 Data output
> d1

> # output the file after converting it to a data frame

> write.table(d1,file="myFile.txt",append=FALSE,quote=FALSE,sep="\t",row.names=TRUE,col.names=TRUE)
> # save current work space
> save(d1,file="work.rdata")
8.2 Data input

> # input the data as a data frame format
> rm(list=ls())
> data=read.table(file="myFile.txt",header=TRUE,sep="\t",skip=0)
> data

> # load workspace file

> load("work.rdata")
> ls()
[1] "d1" "data"
9 Graph
> # set the figure contain four sub graphs
> par(mfrow=c(2,2)) # 2-by-2 sub graph
14
> # graph 1: plot
> x <- 1:10
> y <- seq(0.1,1,by=0.1) # theoretical value
> z <- y + rnorm(10,mean=0,sd=0.1) # actual value
> plot(x, z, type="p",col="red",main="Plot", xlab="x label", ylab="value")
> lines(x, y)
> # graph 2: histogram
> age <- rnorm(1000,mean=20,sd=3)
> hist(age,main="Histogram",xlab="age",ylab="counts")
> # graph 3: boxplot
> boxplot(age,main="boxplot",ylab="age")
> # graph 4: qq plot
> qqnorm(age,main="qq plot")
10 At the end
1. R is similar to MATLAB, but has differences in many details
2. R programming courses
UPitt: BIOST 2094 – STATISTICAL COMPUTING AND DATA ANALYSIS USING R
3. Some useful references
http://cran.r-project.org/doc/manuals/r-release/R-lang.html
http://www.cyclismo.org/tutorial/R/
http://ww2.coastal.edu/kingw/statistics/R-tutorials/
OR, just google it!
15
Plot Histogram
0.8
150
counts
value
0.4
50
0.0
0
2 4 6 8 10 10 15 20 25 30
x label age
boxplot qq plot
Sample Quantiles
25
25
20
20
age
15
15
10
10
−3 −2 −1 0 1 2 3
Theoretical Quantiles
Figure 1: Figure examples
16

Basic R Programming

Uploaded by

Copyright:

Available Formats

Basic R Programming

Uploaded by

Document Information

Original Description:

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Basic R Programming

Uploaded by

Copyright:

Available Formats

Basic R Programming

Silvia (Shuchang Liu)

2 Resources and installation

• To install general R packages

3 Basic R file formats

[1] "Hello world!"

> a = 1 # semicolon is optional here

> a != b # a not equals to b?

> log(9, base=3) # parameter for the function

> log(9) # o.w. by default, base=exp(1)

> ls() # list all the variables

[1] "a" "b" "c" "d"

> exists("b") # Does "b" exist?

> rm(list=ls()) # remove all the variables in the workspace

> v2 <- c(3, 2, 7.2, 0.9, 100)

[1] 3.0 2.0 7.2 0.9 100.0

> v3 <- c("aa","B","c2")

[1] "aa" "B" "c2"

> seq(from=4, to=7, by=0.5)

[1] 4.0 4.5 5.0 5.5 6.0 6.5 7.0

> rep(v2, each=2)

> rep(v2, times=2)

> rep(v2, times=1:5)

[1] 3.0 2.0 7.2 0.9 100.0

[1] 3.0 7.2 100.0

[1] 7.2 100.0

[1] 3.0 88.0 7.2 0.9 100.0

[1] 31 881 73 10 1001

> # creat a matrix

> # reference matrix

> m1["dog","d"] # reference by name

> # matrix dimension

> nrow(m1) # number of rows

> ncol(m1) # number of column

> length(m1) # element length

[1] "a" "b" "c" "d" "e"

> rownames(m1)[4] <- "elephant" # change row names

> m1[2:3,c(3,5)] <- m2

> # matrix operation

[,1] [,2] [,3]

> m4 <- matrix(11:16,2,3,byrow=TRUE)

[,1] [,2] [,3]

> m5 <- matrix(21:26,3,2)

> m3*m4 # element-wise manipulation

[,1] [,2] [,3]

> # reference elements

[1] "yes" "no"

[1] "Peter" "Lily" "Emma"

> # list operation

[1] "name" "" "age" "value"

> unlist(l1) #produce a vector of all the elements

name1 name2 name3 age1 age2 age3 age4 age5

> l2 <- list(matrix(2,4,5),matrix(1:10,2,5),diag(3))

> lapply(l2,sum) # return a list

> sapply(l2,dim) # return a vector or matrix

[,1] [,2] [,3]

ID name sex age married

> # reference elements, just like a matrix