R Code Compact

The document provides a comprehensive overview of R programming for bioinformatics, covering various topics such as data structures, arithmetic operations, logical operations, statistical functions, string functions, and data frames. It includes practical examples and code snippets for each topic, demonstrating how to manipulate data, perform calculations, and visualize results. Additionally, it introduces advanced data structures like lists and tibbles, and explains their usage in data analysis.

Uploaded by

mattiullahkhan860

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

3 views10 pages

R Code Compact

Uploaded by

mattiullahkhan860

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 10

R Code Compilation - All Chapters

# Chapter 1 - Getting Started

print("Hello, R for Bioinformatics!")
5 + 5
my_first_variable <- 42
my_first_variable
?mean
values <- c(10, 15, 20, 25, 30)
mean_value <- mean(values)
print(paste("The mean value is:", mean_value))
plot(1:10, main="My First Plot", xlab="X axis", ylab="Y axis", type="o",
col="blue")
x <- 10; y <- 20; z = 30; Name <- "John"

# Chapter 2 - Data Structures

gene_lengths <- c(1500, 2300, 850, 4100)
print(gene_lengths)
name <- "jane"; print(Name); print(name)
sqrt(25); round(3.14159, digits=2)
count <- 42; message <- "Hello, Bioinformatics!"; is_active <- TRUE
print(count); print(message); print(is_active)
class(count); class(message); class(is_active)
count <- count + 8; print(count)
gene_count <- 100; proteinMass <- 45.6; sample_1 <- "ATCG"
weight <- 65.5; class(weight)
count <- 42L; class(count)
dna_sequence <- "ATGCAATAGCATCGAT"; class(dna_sequence)
is_significant <- TRUE; class(is_significant)
complex_number <- 3 + 4i; class(complex_number)
age_str <- "25"; age_num <- as.numeric(age_str)
print(age_num); class(age_num)
temp <- 37.5; temp_str <- as.character(temp)
print(temp_str); class(temp_str)
logical_vector <- c(TRUE, FALSE, TRUE)
numeric_vector <- as.numeric(logical_vector); print(numeric_vector)
measurements <- c(10.2, NA, 12.5, 14.1); print(measurements)
mean(measurements); mean(measurements, na.rm=TRUE)
empty_var <- NULL; print(empty_var); length(empty_var)
pos_inf <- 1/0; print(pos_inf)
neg_inf <- -1/0; print(neg_inf)
undefined <- 0/0; print(undefined)

# Arithmetic Operations
a <- 10; b <- 3
addition <- a + b; subtraction <- a - b; multiplication <- a * b
division <- a / b; exponentiation <- a ^ b; modulus <- a %% b
int_division <- a %/% b
print(paste("Addition:", addition))
print(paste("Subtraction:", subtraction))
print(paste("Multiplication:", multiplication))
print(paste("Division:", division))
print(paste("Exponentiation:", exponentiation))
print(paste("Modulus:", modulus))
print(paste("Integer Division:", int_division))
result <- 2 + 3 * 4; print(result)
result_with_parentheses <- (2 + 3) * 4; print(result_with_parentheses)

# Comparison Operations
x <- 5; y <- 10
equal <- x == y; not_equal <- x != y; greater_than <- x > y
less_than <- x < y; greater_equal <- x >= y; less_equal <- x <= y
print(paste("Equal:", equal))
print(paste("Not Equal:", not_equal))
print(paste("Greater Than:", greater_than))
print(paste("Less Than:", less_than))
print(paste("Greater Than or Equal:", greater_equal))
print(paste("Less Than or Equal:", less_equal))
num_value <- 5; char_value <- "5"
print(num_value == char_value)
value <- 10; print(value == NA); print(is.na(value))

# Logical Operations
a <- TRUE; b <- FALSE; and_result <- a & b
or_result <- a | b; not_result <- !a
print(paste("AND:", and_result))
print(paste("OR:", or_result))
print(paste("NOT:", not_result))
vec1 <- c(TRUE, FALSE, TRUE); vec2 <- c(FALSE, FALSE, TRUE)
and_vec <- vec1 & vec2; or_vec <- vec1 | vec2
print("Vectorized AND:"); print(and_vec)
print("Vectorized OR:"); print(or_vec)
first_elements_and <- vec1 && vec2; print(first_elements_and)
complex_condition <- (x > 0) & (y < 20); print(complex_condition)

# Mathematical Functions
num <- 16; sqrt_num <- sqrt(num); abs_neg <- abs(-10)
log_num <- log(10); log10_num <- log10(100); exp_num <- exp(1)
print(paste("Square root of", num, ":", sqrt_num))
print(paste("Absolute value of -10:", abs_neg))
print(paste("Natural log of 10:", log_num))
print(paste("Log base 10 of 100:", log10_num))
print(paste("e^1:", exp_num))

# Statistical Functions
data <- c(4, 7, 12, 5, 8, 15, 9)
mean_val <- mean(data); median_val <- median(data); sd_val <- sd(data)
min_val <- min(data); max_val <- max(data); sum_val <- sum(data)
print(paste("Mean:", mean_val))
print(paste("Median:", median_val))
print(paste("Standard Deviation:", sd_val))
print(paste("Minimum:", min_val))
print(paste("Maximum:", max_val))
print(paste("Sum:", sum_val))

# String Functions
first_name <- "john"; last_name <- "doe"
full_name <- paste(first_name, last_name); print(full_name)
full_name_comma <- paste(last_name, first_name, sep=", ");
print(full_name_comma)
upper_name <- toupper(full_name); lower_name <- tolower("HELLO WORLD")
print(upper_name); print(lower_name)
substr_result <- substr("ATGCAATAGCATCGAT", start=4, stop=8);
print(substr_result)

# Utility Functions
vec <- c(10, 20, NA, 40, 50)
vec_length <- length(vec); has_na <- is.na(vec); vec_class <- class(vec)
print(paste("Vector length:", vec_length))
print("NA positions:"); print(has_na)
print(paste("Vector class:", vec_class))
pi_value <- pi; print(pi_value)
rounded_pi <- round(pi_value); print(rounded_pi)
rounded_pi_2 <- round(pi_value, digits=2); print(rounded_pi_2)
mean_result1 <- mean(x=c(1, 2, 3, NA), na.rm=TRUE)
mean_result2 <- mean(na.rm=TRUE, x=c(1, 2, 3, NA))
print(mean_result1); print(mean_result2)
mean_result3 <- mean(c(1, 2, 3, NA), TRUE); print(mean_result3)

# Vectors
class(gene_lengths)
gene_names <- c("BRCA1", "TP53", "EGFR", "MYC"); print(gene_names);
class(gene_names)
is_significant <- c(TRUE, FALSE, TRUE, TRUE); print(is_significant);
class(is_significant)
exon_counts <- c(24L, 11L, 28L, 3L); print(exon_counts); class(exon_counts)
sequence_1_to_10 <- 1:10; print(sequence_1_to_10)
sequence_by_step <- seq(from=0, to=1, by=0.2); print(sequence_by_step)
sequence_fixed_length <- seq(from=1, to=100, length.out=5);
print(sequence_fixed_length)
repeated_values <- rep("A", times=5); print(repeated_values)
repeated_pattern <- rep(c(1, 2), times=3); print(repeated_pattern)
repeated_each <- rep(c("Control", "Treated"), each=3); print(repeated_each)
mixed_vector_1 <- c(1, "two", 3); print(mixed_vector_1); class(mixed_vector_1)
mixed_vector_2 <- c(10.5, TRUE, FALSE); print(mixed_vector_2);
class(mixed_vector_2)
mixed_vector_3 <- c(5L, TRUE, 10L, FALSE); print(mixed_vector_3);
class(mixed_vector_3)

# Vector Operations
vec1 <- c(1, 2, 3, 4); vec2 <- c(10, 20, 30, 40)
sum_vec <- vec1 + vec2; print(sum_vec)
product_vec <- vec1 * 5; print(product_vec)
comparison_vec <- vec1 > 2; print(comparison_vec)
sqrt_vec <- sqrt(vec1); print(sqrt_vec)
vec_short <- c(10, 20); vec_long <- c(1, 2, 3, 4, 5, 6)
sum_recycled <- vec_long + vec_short; print(sum_recycled)

# Vector Indexing
gene_expression <- c(15.2, 8.1, 22.5, 19.0, 7.5)
names(gene_expression) <- c("GeneA", "GeneB", "GeneC", "GeneD", "GeneE")
print(gene_expression)
first_element <- gene_expression[1]; print(first_element)
third_and_fifth <- gene_expression[c(3, 5)]; print(third_and_fifth)
all_except_second <- gene_expression[-2]; print(all_except_second)
all_except_first_and_fourth <- gene_expression[c(-1, -4)];
print(all_except_first_and_fourth)
low_expression_indices <- gene_expression < 10; print(low_expression_indices)
low_expression_values <- gene_expression[low_expression_indices];
print(low_expression_values)
high_expression_values <- gene_expression[gene_expression > 18];
print(high_expression_values)
gene_c_value <- gene_expression["GeneC"]; print(gene_c_value)
genes_b_and_d <- gene_expression[c("GeneB", "GeneD")]; print(genes_b_and_d)
gene_expression[1] <- 16.0; gene_expression["GeneE"] <- 8.0
gene_expression[gene_expression < 10] <- 10.0; print(gene_expression)

# Matrices
matrix_data <- 1:12
my_matrix <- matrix(data = matrix_data, nrow = 3, ncol = 4); print(my_matrix)
my_matrix_byrow <- matrix(data = matrix_data, nrow = 3, ncol = 4, byrow =
TRUE); print(my_matrix_byrow)
matrix_direct <- matrix(c(5, 10, 15, 20, 25, 30), nrow=2); print(matrix_direct)
rownames(my_matrix) <- c("Sample1", "Sample2", "Sample3")
colnames(my_matrix) <- c("GeneA", "GeneB", "GeneC", "GeneD"); print(my_matrix)
dim(my_matrix); nrow(my_matrix); ncol(my_matrix)
matrix1 <- matrix(1:6, nrow=2); matrix2 <- matrix(10:15, nrow=2)
print("Matrix 1:"); print(matrix1); print("Matrix 2:"); print(matrix2)
matrix_sum <- matrix1 + matrix2; print("Element-wise Sum:"); print(matrix_sum)
matrix_product_elementwise <- matrix1 * matrix2; print("Element-wise
Product:"); print(matrix_product_elementwise)
matrix_scaled <- matrix1 * 10; print("Scaled Matrix 1:"); print(matrix_scaled)
matrix_a <- matrix(1:6, nrow=2, ncol=3); matrix_b <- matrix(1:6, nrow=3,
ncol=2)
print("Matrix A (2x3):"); print(matrix_a); print("Matrix B (3x2):");
print(matrix_b)
matrix_product_matmul <- matrix_a %*% matrix_b; print("Matrix Multiplication (A
%*% B):"); print(matrix_product_matmul)
transposed_matrix_a <- t(matrix_a); print("Transposed Matrix A (3x2):");
print(transposed_matrix_a)

# Matrix Indexing
print(my_matrix)
element_2_3 <- my_matrix[2, 3]; print(element_2_3)
row_2 <- my_matrix[2, ]; print(row_2); class(row_2)
col_3 <- my_matrix[, 3]; print(col_3); class(col_3)
row_2_matrix <- my_matrix[2, , drop=FALSE]; print(row_2_matrix);
class(row_2_matrix)
col_3_matrix <- my_matrix[, 3, drop=FALSE]; print(col_3_matrix);
class(col_3_matrix)
sub_matrix <- my_matrix[c(1, 3), c(2, 4)]; print(sub_matrix)
sample1_gene_b <- my_matrix["Sample1", "GeneB"]; print(sample1_gene_b)
sample2_data <- my_matrix["Sample2", ]; print(sample2_data)
matrix_subset_logical <- my_matrix[my_matrix[, "GeneA"] > 1, ];
print(matrix_subset_logical)
my_matrix[1, 1] <- 100; my_matrix["Sample3", "GeneC"] <- 999; my_matrix[,
"GeneD"] <- 0; print(my_matrix)

# Lists
patient_info <- list(
patient_id = "P123", age = 45L, diagnosis = "Type 2 Diabetes", is_smoker =
TRUE,
lab_values = c(glucose = 150, hba1c = 7.5, cholesterol = 210),
medications = c("Metformin", "Lisinopril")
)
print(patient_info); str(patient_info)
complex_list <- list(
info = "Experiment Details", samples = c("S1", "S2", "S3"),
expression_matrix = matrix(rnorm(9), nrow=3, dimnames=list(paste0("G", 1:3),
paste0("S", 1:3))),
metadata = patient_info
)
print(complex_list); str(complex_list)

# List Indexing
print(patient_info)
patient_id_value <- patient_info[[1]]; print(patient_id_value);
class(patient_id_value)
diagnosis_value <- patient_info[["diagnosis"]]; print(diagnosis_value);
class(diagnosis_value)
age_value <- patient_info$age; print(age_value); class(age_value)
lab_values_vector <- patient_info$lab_values; print(lab_values_vector);
class(lab_values_vector)
first_two_elements_list <- patient_info[1:2]; print(first_two_elements_list);
class(first_two_elements_list)
medications_list <- patient_info["medications"]; print(medications_list);
class(medications_list)
first_medication <- patient_info$medications[1]; print(first_medication)
glucose_value <- patient_info$lab_values["glucose"]; print(glucose_value)
print(complex_list$`metadata`$patient_id); print(complex_list[["metadata"]]
[["age"]])
patient_info$age <- 46L; patient_info[["is_smoker"]] <- FALSE
patient_info$`medications <- c(patient_info`$medications, "Aspirin")
patient_info$new_field <- "Some new information"; print(patient_info)
patient_info$new_field <- NULL; print(patient_info)

# Data Frames
gene_ids <- c("Gene1", "Gene2", "Gene3", "Gene4")
chromosome <- c("chr1", "chr2", "chr1", "chrX")
expression_level <- c(5.2, 1.8, 7.1, 4.5)
is_expressed <- c(TRUE, TRUE, TRUE, TRUE)
gene_data_df <- data.frame(GeneID = gene_ids, Chromosome = chromosome,
Expression = expression_level, Expressed = is_expressed)
print(gene_data_df); class(gene_data_df); str(gene_data_df)
gene_data_df_no_factors <- data.frame(GeneID = gene_ids, Chromosome =
chromosome, Expression = expression_level, Expressed = is_expressed,
stringsAsFactors = FALSE)
str(gene_data_df_no_factors)

# Tibbles
library(tibble)
gene_data_tbl <- tibble(GeneID = gene_ids, Chromosome = chromosome, Expression
= expression_level, Expressed = is_expressed)
print(gene_data_tbl); class(gene_data_tbl); str(gene_data_tbl)
element_2_3_df <- gene_data_tbl[2, 3]; print(element_2_3_df)
row_1_df <- gene_data_tbl[1, ]; print(row_1_df); class(row_1_df)
expression_col_matrix_style <- gene_data_tbl[, "Expression"];
print(expression_col_matrix_style); class(expression_col_matrix_style)
expression_col_list_style <- gene_data_tbl$Expression;
print(expression_col_list_style); class(expression_col_list_style)
chromosome_col_list_style <- gene_data_tbl[["Chromosome"]];
print(chromosome_col_list_style); class(chromosome_col_list_style)
expressed_high <- gene_data_tbl[gene_data_tbl$Expression > 5, ];
print(expressed_high)
chr1_genes <- gene_data_tbl[gene_data_tbl$Chromosome == "chr1", ];
print(chr1_genes)
gene_expression_subset <- gene_data_tbl[, c("GeneID", "Expression")];
print(gene_expression_subset)
gene_expression_subset_list <- gene_data_tbl[c("GeneID", "Expression")];
print(gene_expression_subset_list)
gene_data_tbl$`Expression[1] <- 6.0; gene_data_tbl`$LogExpression <-
log2(gene_data_tbl$Expression); print(gene_data_tbl)
gene_data_tbl$Expressed <- NULL; print(gene_data_tbl)

# Chapter 3 - Programming Fundamentals

gc_content <- 0.65
if (gc_content > 0.6) { print("High GC content detected.") }
expression_level <- 4.5
if (expression_level > 5.0) { print("Gene is highly expressed.") } else {
print("Gene expression is not high.") }
sequence_length <- 1500
if (sequence_length < 100) { print("Short sequence.") } else if
(sequence_length < 1000) { print("Medium sequence.") } else { print("Long
sequence.") }
expression_values <- c(2.1, 8.5, 4.9, 12.3, 1.5); threshold <- 5.0
classification <- ifelse(expression_values > threshold, "High", "Low");
print(classification)
p_values <- c(0.01, 0.25, 0.04, 0.001, 0.5); significance_level <- 0.05

# dplyr Operations
library(dplyr)
data(iris); iris_tbl <- as_tibble(iris); print(iris_tbl)
setosa_data <- filter(iris_tbl, Species == "setosa"); print(setosa_data)
versicolor_long_sepal <- filter(iris_tbl, Species == "versicolor", Sepal.Length
> 6); print(versicolor_long_sepal)
sorted_by_petal_length <- arrange(iris_tbl, desc(Petal.Length));
print(head(sorted_by_petal_length))
sorted_multi <- arrange(iris_tbl, Species, Sepal.Width);
print(head(sorted_multi))
sepal_species_data <- select(iris_tbl, Species, Sepal.Length);
print(head(sepal_species_data))
no_species_data <- select(iris_tbl, -Species); print(head(no_species_data))
petal_data <- select(iris_tbl, starts_with("Petal")); print(head(petal_data))
iris_with_area <- mutate(iris_tbl, Petal.Area = Petal.Length * Petal.Width)
print(head(select(iris_with_area, Petal.Length, Petal.Width, Petal.Area)))
iris_transformed <- mutate(iris_tbl, Sepal.Length.cm = Sepal.Length / 10,
Petal.Length.cm = Petal.Length / 10)
print(head(iris_transformed))
summary_petal_length <- summarise(iris_tbl, avg_petal_length =
mean(Petal.Length)); print(summary_petal_length)
summary_stats <- summarise(iris_tbl, avg_sepal_width = mean(Sepal.Width),
max_petal_length = max(Petal.Length), n_observations = n())
print(summary_stats)
species_summary <- group_by(iris_tbl, Species); print(species_summary)
species_avg_stats <- summarise(species_summary, avg_sepal_length =
mean(Sepal.Length), avg_sepal_width = mean(Sepal.Width), count = n())
print(species_avg_stats)
species_avg_stats_ungrouped <- ungroup(species_avg_stats);
print(species_avg_stats_ungrouped)
result_nested <- summarise(filter(mutate(iris_tbl, Petal.Area = Petal.Length *
Petal.Width), Species == "virginica", Sepal.Length > 7), avg_petal_area =
mean(Petal.Area))
print(result_nested)

# Chapter 4 - Bioinformatics
BiocManager::install(c("Biostrings", "GenomicRanges", "rtracklayer",
"SummarizedExperiment"))
library(Biostrings)
dna_seq <- DNAString("ATGCAATAGCATCGAT"); print(dna_seq); length(dna_seq)
complement_seq <- complement(dna_seq); print(complement_seq)
reverse_complement_seq <- reverseComplement(dna_seq);
print(reverse_complement_seq)
gc_content_seq <- letterFrequency(dna_seq, "GC", as.prob=TRUE);
print(gc_content_seq)
dna_set <- DNAStringSet(c("ATGCAATAGCATCGAT", "GCTAGCTAGCTA", "AAATTTCCCGGG"))
names(dna_set) <- c("Seq1", "Seq2", "Seq3"); print(dna_set)
writeXStringSet(dna_set, "example_sequences.fasta")
loaded_sequences <- readDNAStringSet("example_sequences.fasta");
print(loaded_sequences)

# GenomicRanges
library(GenomicRanges)
gr <- GRanges(
seqnames = Rle(c("chr1", "chr2", "chr1", "chr3"), c(1, 3, 2, 4)),
ranges = IRanges(start=c(101, 201, 401, 501, 601, 701, 801, 901, 1001, 1101),
end=c(110, 250, 450, 550, 650, 750, 850, 950, 1050, 1150)),
strand = Rle(strand(c("-", "+", "*", "+", "-")), c(1, 2, 2, 3, 2)),
score = 1:10, GC = seq(0.4, 0.58, length.out=10)
)
print(gr); seqnames(gr); ranges(gr); start(gr); end(gr); width(gr); strand(gr)
mcols(gr); gr$`score; gr`$GC; length(gr)
first_three_ranges <- gr[1:3]; print(first_three_ranges)
chr1_ranges <- gr[seqnames(gr) == "chr1"]; print(chr1_ranges)
high_score_ranges <- gr[gr$score > 7]; print(high_score_ranges)
shifted_gr <- shift(gr, shift = 1000); print(shifted_gr)
resized_gr_start <- resize(gr, width = 20, fix = "start");
print(resized_gr_start)
resized_gr_end <- resize(gr, width = 100, fix = "end"); print(resized_gr_end)
gr_query <- GRanges("chr1", IRanges(c(120, 360), c(180, 410)), score=c(50, 80))
print("Query Ranges:"); print(gr_query); print("Subject Ranges (gr):");
print(gr)
overlaps <- findOverlaps(gr_query, gr); print("Overlap results (indices):");
print(overlaps)
query_hits <- gr_query[queryHits(overlaps)]; subject_hits <-
gr[subjectHits(overlaps)]
print("Query ranges that overlap:"); print(query_hits); print("Subject ranges
that overlap:"); print(subject_hits)
subset_by_overlap <- subsetByOverlaps(gr, gr_query); print("Ranges in gr that
overlap with gr_query:"); print(subset_by_overlap)
nearest_indices <- nearest(gr_query, gr); print("Index of nearest range in gr
for each range in gr_query:"); print(nearest_indices);
print(gr[nearest_indices])
distances <- distanceToNearest(gr_query, gr); print("Distances to nearest
ranges:"); print(distances)
gr_a <- GRanges("chr1", IRanges(c(100, 200), c(150, 250))); gr_b <-
GRanges("chr1", IRanges(c(130, 260), c(180, 300)))
union_gr <- union(gr_a, gr_b); intersect_gr <- intersect(gr_a, gr_b);
setdiff_gr <- setdiff(gr_a, gr_b)
print("Union:"); print(union_gr); print("Intersect:"); print(intersect_gr);
print("Set Difference (A-B):"); print(setdiff_gr)
coverage_chr1 <- coverage(gr[seqnames(gr) == "chr1"]); print(coverage_chr1)

# rtracklayer
library(rtracklayer)
bed_content <- c("chr1\t100\t150\tGeneA\t10\t+", "chr1\t300\t380\tGeneB\t25\t-
", "chr2\t200\t250\tGeneC\t15\t+")
writeLines(bed_content, "example.bed"); bed_gr <- import("example.bed", format
= "bed"); print(bed_gr)
gtf_content <- c("chr1\tHAVANA\texon\t11869\t12227\t.\t+\t.\tgene_id
\"ENSG001\"; transcript_id \"ENST001\"; exon_number 1;",
"chr1\tHAVANA\texon\t12613\t12721\t.\t+\t.\tgene_id \"ENSG001\"; transcript_id
\"ENST001\"; exon_number 2;",
"chr1\tHAVANA\tgene\t11869\t14409\t.\t+\t.\tgene_id \"ENSG001\"; gene_name
\"DDX11L1\";", "chr2\tHAVANA\texon\t5000\t5500\t.\t-\t.\tgene_id \"ENSG002\";
transcript_id \"ENST002\"; exon_number 1;")
writeLines(gtf_content, "example.gtf"); gtf_gr <- import("example.gtf", format
= "gtf"); print(gtf_gr); print(mcols(gtf_gr))
exons_gr <- gtf_gr[gtf_gr$type == "exon"]; print(exons_gr)
exons_gr$`score <- sample(1:100, length(exons_gr), replace=TRUE);
exons_gr`$name <- paste0("Exon_", 1:length(exons_gr))
export(exons_gr, "exported_exons.bed", format = "bed")

# SummarizedExperiment
library(SummarizedExperiment)
counts_matrix <- matrix(rpois(100, lambda = 10), nrow = 20, ncol = 5, dimnames
= list(paste0("Gene", 1:20), paste0("Sample", 1:5)))
print(head(counts_matrix))
feature_data <- GRanges(seqnames = sample(c("chr1", "chr2"), 20, replace =
TRUE), ranges = IRanges(start = sample(1:10000, 20), width = sample(50:500,
20)), strand = sample(c("+", "-"), 20, replace = TRUE), gene_id =
paste0("Gene", 1:20), gc_content = rnorm(20, 0.5, 0.05))
names(feature_data) <- rownames(counts_matrix); print(feature_data)
sample_data <- DataFrame(Treatment = rep(c("Control", "Treated"), length.out =
5), Replicate = c(1, 1, 2, 2, 3), row.names = colnames(counts_matrix))
print(sample_data)
se <- SummarizedExperiment(assays = list(counts = counts_matrix), rowData =
feature_data, colData = sample_data); print(se)
assay(se); assays(se); rowData(se); colData(se); dim(se); nrow(se); ncol(se)
se_subset_genes <- se[1:5, ]; print(se_subset_genes)
se_subset_samples <- se[, se$Treatment == "Treated"]; print(se_subset_samples)
se_subset_both <- se[rowData(se)$`gc_content > 0.55, se`$Replicate == 1];
print(se_subset_both)
normalized_counts <- log2(counts_matrix + 1); assay(se, "logcounts") <-
normalized_counts; print(assays(se))

# Chapter 5 - Data Visualization

library(ggplot2); library(dplyr)
data(iris); iris_tbl <- as_tibble(iris)
base_plot <- ggplot(data = iris_tbl, mapping = aes(x = Sepal.Length, y =
Sepal.Width, color = Species))
scatter_plot <- base_plot + geom_point(); print(scatter_plot)
ggplot(data = iris_tbl, aes(x = Sepal.Length, y = Sepal.Width, color =
Species)) + geom_point()
set.seed(123); num_proteins <- 1000
proteomics_data <- tibble(ProteinID = paste0("Prot", 1:num_proteins),
Control_Abundance = rnorm(num_proteins, mean = 10, sd = 2), Treated_Abundance =
Control_Abundance + rnorm(num_proteins, mean = 0.5, sd = 1.5))
proteomics_data$`Treated_Abundance[1:50] <-
proteomics_data`$Treated_Abundance[1:50] + rnorm(50, mean = 3, sd = 1)
proteomics_data$`Treated_Abundance[51:100] <-
proteomics_data`$Treated_Abundance[51:100] - rnorm(50, mean = 3, sd = 1)
scatter_proteomics <- ggplot(proteomics_data, aes(x = Control_Abundance, y =
Treated_Abundance)) + geom_point(alpha = 0.5) + labs(title = "Protein
Abundance: Treated vs. Control", x = "Log2 Abundance (Control)", y = "Log2
Abundance (Treated)") + theme_minimal()
print(scatter_proteomics)
scatter_proteomics_diag <- scatter_proteomics + geom_abline(intercept = 0,
slope = 1, color = "red", linetype = "dashed"); print(scatter_proteomics_diag)
box_plot_iris <- ggplot(iris_tbl, aes(x = Species, y = Sepal.Width, fill =
Species)) + geom_boxplot() + labs(title = "Sepal Width Distribution by
Species", x = "Species", y = "Sepal Width") + theme_light()
print(box_plot_iris)
violin_plot_iris <- ggplot(iris_tbl, aes(x = Species, y = Petal.Length, fill =
Species)) + geom_violin(trim = FALSE, alpha = 0.7) + geom_jitter(width = 0.1,
height = 0, alpha = 0.5, size = 1.5) + labs(title = "Petal Length Distribution
by Species", x = "Species", y = "Petal Length") + theme_bw()
print(violin_plot_iris)
histogram_petal <- ggplot(iris_tbl, aes(x = Petal.Length)) +
geom_histogram(binwidth = 0.2, fill = "skyblue", color = "black") + labs(title
= "Histogram of Petal Lengths", x = "Petal Length", y = "Frequency")
print(histogram_petal)
density_petal_species <- ggplot(iris_tbl, aes(x = Petal.Length, fill = Species,
color = Species)) + geom_density(alpha = 0.5) + labs(title = "Density of Petal
Lengths by Species", x = "Petal Length", y = "Density")
print(density_petal_species)
bar_plot_counts <- ggplot(iris_tbl, aes(x = Species, fill = Species)) +
geom_bar() + labs(title = "Count of Observations per Species", x = "Species", y
= "Count")
print(bar_plot_counts)
iris_summary <- iris_tbl %>% group_by(Species) %>% summarise(Avg.Sepal.Length =
mean(Sepal.Length)); print(iris_summary)
bar_plot_means <- ggplot(iris_summary, aes(x = Species, y = Avg.Sepal.Length,
fill = Species)) + geom_col() + labs(title = "Average Sepal Length per
Species", x = "Species", y = "Average Sepal Length")
print(bar_plot_means)
set.seed(456)
heatmap_data_matrix <- matrix(rnorm(50 * 10, mean = 0, sd = 1.5), nrow = 50,
ncol = 10, dimnames = list(paste0("Prot", 1:50), paste0("Sample", 1:10)))
heatmap_data_matrix[, 1:5] <- heatmap_data_matrix[, 1:5] + 1;
heatmap_data_matrix[1:20, ] <- heatmap_data_matrix[1:20, ] + 1.5
library(tidyr); library(tibble)
heatmap_data_long <- as.data.frame(heatmap_data_matrix) %>%
rownames_to_column(var = "ProteinID") %>% pivot_longer(cols = -ProteinID,
names_to = "SampleID", values_to = "Log2FoldChange")
heatmap_plot <- ggplot(heatmap_data_long, aes(x = SampleID, y = ProteinID, fill
= Log2FoldChange)) + geom_tile(color = "white", size = 0.1) +
scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0) +
labs(title = "Protein Expression Heatmap", x = "Sample", y = "Protein", fill =
"Log2 FC") + theme_minimal() + theme(axis.text.y = element_text(size = 6),
axis.text.x = element_text(angle = 45, hjust = 1, size = 8), legend.position =
"bottom")
print(heatmap_plot)
set.seed(789); num_proteins_volcano <- 2000
volcano_data <- tibble(ProteinID = paste0("Prot", 1:num_proteins_volcano),
log2FoldChange = rnorm(num_proteins_volcano, mean = 0, sd = 1.5), p.value =
runif(num_proteins_volcano, min = 0, max = 1))
up_indices <- 1:100; volcano_data$`log2FoldChange[up_indices] <-
rnorm(length(up_indices), mean = 3, sd = 1); volcano_data`$p.value[up_indices]
<- rbeta(length(up_indices), 1, 50)
down_indices <- 101:200; volcano_data$`log2FoldChange[down_indices] <-
rnorm(length(down_indices), mean = -3, sd = 1);
volcano_data`$p.value[down_indices] <- rbeta(length(down_indices), 1, 50)
volcano_data <- volcano_data %>% mutate(negLog10p = -log10(p.value),
Significance = case_when(log2FoldChange > 1.5 & p.value < 0.01 ~ "Upregulated",
log2FoldChange < -1.5 & p.value < 0.01 ~ "Downregulated", TRUE ~ "Not
Significant"), Significance = factor(Significance, levels = c("Upregulated",
"Downregulated", "Not Significant")))
volcano_plot <- ggplot(volcano_data, aes(x = log2FoldChange, y = negLog10p,
color = Significance)) + geom_point(alpha = 0.6, size = 1.5) +
scale_color_manual(values = c("Upregulated" = "red", "Downregulated" = "blue",
"Not Significant" = "grey")) + labs(title = "Volcano Plot of Differential
Protein Abundance", x = "Log2 Fold Change", y = "-Log10 P-value") +
theme_minimal() + theme(legend.position = "bottom") + geom_hline(yintercept = -
log10(0.01), linetype = "dashed", color = "darkgrey") + geom_vline(xintercept =
c(-1.5, 1.5), linetype = "dashed", color = "darkgrey")

# Chapter 6 - Shiny
library(shiny); library(ggplot2)
ui <- fluidPage(titlePanel("Old Faithful Geyser Data Histogram"),
sidebarLayout(sidebarPanel(sliderInput(inputId = "bins", label = "Number of
bins:", min = 1, max = 50, value = 30)), mainPanel(plotOutput(outputId =
"distPlot"))))
server <- function(input, output) { output$`distPlot <- renderPlot({ x <-
faithful[, 2]; bins <- seq(min(x), max(x), length.out = input`$bins + 1);
hist(x, breaks = bins, col = "darkgray", border = "white", xlab = "Waiting time
to next eruption (in mins)", main = "Histogram of waiting times") }) }
shinyApp(ui = ui, server = server)

# Chapter 7 - Performance
slow_function <- function(n) { result <- 0; for (i in 1:n) { result <- result +
log(sqrt(i^2.5)) }; Sys.sleep(0.1); return(result) }
execution_time <- system.time({ result_single <- slow_function(10000) });
print(execution_time)
num_reps <- 10
system.time({ results_sequential <- vector("list", num_reps); for(i in
1:num_reps) { results_sequential[[i]] <- slow_function(5000) } })

R Cheat Sheet Merged
100% (2)
R Cheat Sheet Merged
35 pages
R Cheat Sheet
No ratings yet
R Cheat Sheet
4 pages
R Cheat Sheet: 1. Basics 4. Input and Export of Data
100% (1)
R Cheat Sheet: 1. Basics 4. Input and Export of Data
4 pages
Shaurya Sharma PS LAB WORK
No ratings yet
Shaurya Sharma PS LAB WORK
70 pages
R Program 2025,-1
No ratings yet
R Program 2025,-1
11 pages
R File Code
No ratings yet
R File Code
16 pages
Arunav Da Prac
No ratings yet
Arunav Da Prac
55 pages
Practical 1 - Basics of R
No ratings yet
Practical 1 - Basics of R
8 pages
R Basics PDF
No ratings yet
R Basics PDF
10 pages
An Introduction To R: Biostatistics 615/815
No ratings yet
An Introduction To R: Biostatistics 615/815
59 pages
R Lab Program
No ratings yet
R Lab Program
21 pages
R Programing Bhagu
No ratings yet
R Programing Bhagu
40 pages
Nishant R File
No ratings yet
Nishant R File
49 pages
R Progm
No ratings yet
R Progm
17 pages
5 Sem Lab Manual R Programming BCA-BSC
No ratings yet
5 Sem Lab Manual R Programming BCA-BSC
16 pages
An Introduction To R Language
No ratings yet
An Introduction To R Language
11 pages
A Short List of Some Useful R Commands: Input and Display
No ratings yet
A Short List of Some Useful R Commands: Input and Display
2 pages
R Programmimg Practical Journal All-1
No ratings yet
R Programmimg Practical Journal All-1
25 pages
Data Science
No ratings yet
Data Science
20 pages
Basic R Programming
No ratings yet
Basic R Programming
16 pages
R Programming Lab
No ratings yet
R Programming Lab
14 pages
50 R Exercises
No ratings yet
50 R Exercises
44 pages
R Cheatsheet Base R
No ratings yet
R Cheatsheet Base R
2 pages
R Intro STAT5000
No ratings yet
R Intro STAT5000
17 pages
R Lab Set2
No ratings yet
R Lab Set2
3 pages
R Cheat Sheet: Data Manipulation Files & Objects
No ratings yet
R Cheat Sheet: Data Manipulation Files & Objects
1 page
Program 1a
No ratings yet
Program 1a
5 pages
File 2
No ratings yet
File 2
17 pages
Workshop Activity: X Seq y Length
No ratings yet
Workshop Activity: X Seq y Length
3 pages
Cost Practical
No ratings yet
Cost Practical
13 pages
R Practical File
No ratings yet
R Practical File
17 pages
Statistic and R Programming Lab Exercise
No ratings yet
Statistic and R Programming Lab Exercise
8 pages
File 2
No ratings yet
File 2
17 pages
Bdo Co1 Session 4
No ratings yet
Bdo Co1 Session 4
43 pages
R Manual
No ratings yet
R Manual
10 pages
R Textbook Full
No ratings yet
R Textbook Full
96 pages
Introduction To R For Gene Expression Data Analysis
No ratings yet
Introduction To R For Gene Expression Data Analysis
11 pages
R Lab Manual
No ratings yet
R Lab Manual
19 pages
S24 Stats10 Lab1-1
No ratings yet
S24 Stats10 Lab1-1
8 pages
R Program
No ratings yet
R Program
22 pages
A Short List of The Most Useful R Commands
No ratings yet
A Short List of The Most Useful R Commands
8 pages
Write A R Program For Different Types of Data Structures
No ratings yet
Write A R Program For Different Types of Data Structures
11 pages
BD
No ratings yet
BD
12 pages
Computing Systems DS AI Lab Manual
No ratings yet
Computing Systems DS AI Lab Manual
68 pages
Unit - 3 Learning Notes
No ratings yet
Unit - 3 Learning Notes
8 pages
RP Lab Manual
No ratings yet
RP Lab Manual
24 pages
R Lab
No ratings yet
R Lab
15 pages
Glocal University: Practical File of R Programming
100% (1)
Glocal University: Practical File of R Programming
32 pages
A Short List of The Most Useful R Commands
No ratings yet
A Short List of The Most Useful R Commands
11 pages
R Tools LAB
No ratings yet
R Tools LAB
31 pages
First Machine Problem
No ratings yet
First Machine Problem
6 pages
Cb161 Lab Manual
No ratings yet
Cb161 Lab Manual
25 pages
23DSCP206-Data Mining Using R
No ratings yet
23DSCP206-Data Mining Using R
14 pages
R Commands: Appendix B
No ratings yet
R Commands: Appendix B
5 pages
Purrr
No ratings yet
Purrr
2 pages
R Programming
No ratings yet
R Programming
23 pages
Statistics With R
No ratings yet
Statistics With R
5 pages
DAUR Lab Manual
No ratings yet
DAUR Lab Manual
14 pages
Westinghouse Multi-Site ISO 9001 Certificate Jan 22 2019
No ratings yet
Westinghouse Multi-Site ISO 9001 Certificate Jan 22 2019
6 pages
DBT Exercises
100% (1)
DBT Exercises
24 pages
Sulphuric Acid Production-Installation 3-5
No ratings yet
Sulphuric Acid Production-Installation 3-5
8 pages
A Summary and Analysis of John Milton
No ratings yet
A Summary and Analysis of John Milton
3 pages
Dual-Frequency Proximity and Contactless Readers For Identifi Cation and Enrollment
No ratings yet
Dual-Frequency Proximity and Contactless Readers For Identifi Cation and Enrollment
2 pages
2025-2026 ADMISSION OFFERS FOR CITY CAMPUS AND ODeL
No ratings yet
2025-2026 ADMISSION OFFERS FOR CITY CAMPUS AND ODeL
120 pages
Annex B Action Plan Monitoring Tool
No ratings yet
Annex B Action Plan Monitoring Tool
4 pages
Operating System: Deployment Guide Automating Windows NT Setup
No ratings yet
Operating System: Deployment Guide Automating Windows NT Setup
129 pages
Business Analytics, Volume II - A Data Driven Decision Making Approach For Business
100% (3)
Business Analytics, Volume II - A Data Driven Decision Making Approach For Business
421 pages
Resume Vincent Wang 2021
No ratings yet
Resume Vincent Wang 2021
1 page
Aptiv DRod
No ratings yet
Aptiv DRod
4 pages
9bows Itinerary
No ratings yet
9bows Itinerary
11 pages
BS 5839 Generalities
No ratings yet
BS 5839 Generalities
2 pages
PRAJYA June 2024 Digital File
No ratings yet
PRAJYA June 2024 Digital File
68 pages
Course Outline - Qualitative Research Spring '24
No ratings yet
Course Outline - Qualitative Research Spring '24
5 pages
Agile Devops Planning Transition Transformation
No ratings yet
Agile Devops Planning Transition Transformation
1 page
4 - TLC
No ratings yet
4 - TLC
4 pages
The Persistence of Hollywood Thomas Elsasser PDF Download
100% (1)
The Persistence of Hollywood Thomas Elsasser PDF Download
37 pages
WhatsApp Inc United Kingdom
No ratings yet
WhatsApp Inc United Kingdom
1 page
Hair and Jewellery Policy
No ratings yet
Hair and Jewellery Policy
2 pages
Sales Territories
No ratings yet
Sales Territories
28 pages
Lecture Notes in Mechanical Engineering
No ratings yet
Lecture Notes in Mechanical Engineering
193 pages
Curriculum Vitae Hindi Meaning
100% (2)
Curriculum Vitae Hindi Meaning
4 pages
Chapter Review: A Geographer's World
No ratings yet
Chapter Review: A Geographer's World
2 pages
Lighting Load Calculation Report: Preliminary
No ratings yet
Lighting Load Calculation Report: Preliminary
14 pages
MSC 9-Group 9 Presentation - Public Procurement and Public Disposal of Assets Act (Part IX)
No ratings yet
MSC 9-Group 9 Presentation - Public Procurement and Public Disposal of Assets Act (Part IX)
16 pages
Journal of Physiotherapy
No ratings yet
Journal of Physiotherapy
13 pages
Corning DAS Architecture
No ratings yet
Corning DAS Architecture
39 pages
Sapngan SportsFest 2024 Proposal
No ratings yet
Sapngan SportsFest 2024 Proposal
3 pages
QDD 400G VSR4 C - Datasheets - EN
No ratings yet
QDD 400G VSR4 C - Datasheets - EN
10 pages