R Code Compact

The document provides a comprehensive overview of R programming for bioinformatics, covering various topics such as data structures, arithmetic operations, logical operations, statistical functions, string functions, and data frames. It includes practical examples and code snippets for each topic, demonstrating how to manipulate data, perform calculations, and visualize results. Additionally, it introduces advanced data structures like lists and tibbles, and explains their usage in data analysis.

Uploaded by

mattiullahkhan860

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

3 views10 pages

R Code Compact

Uploaded by

mattiullahkhan860

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 10

R Code Compilation - All Chapters

# Chapter 1 - Getting Started

print("Hello, R for Bioinformatics!")
5 + 5
my_first_variable <- 42
my_first_variable
?mean
values <- c(10, 15, 20, 25, 30)
mean_value <- mean(values)
print(paste("The mean value is:", mean_value))
plot(1:10, main="My First Plot", xlab="X axis", ylab="Y axis", type="o",
col="blue")
x <- 10; y <- 20; z = 30; Name <- "John"

# Chapter 2 - Data Structures

gene_lengths <- c(1500, 2300, 850, 4100)
print(gene_lengths)
name <- "jane"; print(Name); print(name)
sqrt(25); round(3.14159, digits=2)
count <- 42; message <- "Hello, Bioinformatics!"; is_active <- TRUE
print(count); print(message); print(is_active)
class(count); class(message); class(is_active)
count <- count + 8; print(count)
gene_count <- 100; proteinMass <- 45.6; sample_1 <- "ATCG"
weight <- 65.5; class(weight)
count <- 42L; class(count)
dna_sequence <- "ATGCAATAGCATCGAT"; class(dna_sequence)
is_significant <- TRUE; class(is_significant)
complex_number <- 3 + 4i; class(complex_number)
age_str <- "25"; age_num <- as.numeric(age_str)
print(age_num); class(age_num)
temp <- 37.5; temp_str <- as.character(temp)
print(temp_str); class(temp_str)
logical_vector <- c(TRUE, FALSE, TRUE)
numeric_vector <- as.numeric(logical_vector); print(numeric_vector)
measurements <- c(10.2, NA, 12.5, 14.1); print(measurements)
mean(measurements); mean(measurements, na.rm=TRUE)
empty_var <- NULL; print(empty_var); length(empty_var)
pos_inf <- 1/0; print(pos_inf)
neg_inf <- -1/0; print(neg_inf)
undefined <- 0/0; print(undefined)

# Arithmetic Operations
a <- 10; b <- 3
addition <- a + b; subtraction <- a - b; multiplication <- a * b
division <- a / b; exponentiation <- a ^ b; modulus <- a %% b
int_division <- a %/% b
print(paste("Addition:", addition))
print(paste("Subtraction:", subtraction))
print(paste("Multiplication:", multiplication))
print(paste("Division:", division))
print(paste("Exponentiation:", exponentiation))
print(paste("Modulus:", modulus))
print(paste("Integer Division:", int_division))
result <- 2 + 3 * 4; print(result)
result_with_parentheses <- (2 + 3) * 4; print(result_with_parentheses)

# Comparison Operations
x <- 5; y <- 10
equal <- x == y; not_equal <- x != y; greater_than <- x > y
less_than <- x < y; greater_equal <- x >= y; less_equal <- x <= y
print(paste("Equal:", equal))
print(paste("Not Equal:", not_equal))
print(paste("Greater Than:", greater_than))
print(paste("Less Than:", less_than))
print(paste("Greater Than or Equal:", greater_equal))
print(paste("Less Than or Equal:", less_equal))
num_value <- 5; char_value <- "5"
print(num_value == char_value)
value <- 10; print(value == NA); print(is.na(value))

# Logical Operations
a <- TRUE; b <- FALSE; and_result <- a & b
or_result <- a | b; not_result <- !a
print(paste("AND:", and_result))
print(paste("OR:", or_result))
print(paste("NOT:", not_result))
vec1 <- c(TRUE, FALSE, TRUE); vec2 <- c(FALSE, FALSE, TRUE)
and_vec <- vec1 & vec2; or_vec <- vec1 | vec2
print("Vectorized AND:"); print(and_vec)
print("Vectorized OR:"); print(or_vec)
first_elements_and <- vec1 && vec2; print(first_elements_and)
complex_condition <- (x > 0) & (y < 20); print(complex_condition)

# Mathematical Functions
num <- 16; sqrt_num <- sqrt(num); abs_neg <- abs(-10)
log_num <- log(10); log10_num <- log10(100); exp_num <- exp(1)
print(paste("Square root of", num, ":", sqrt_num))
print(paste("Absolute value of -10:", abs_neg))
print(paste("Natural log of 10:", log_num))
print(paste("Log base 10 of 100:", log10_num))
print(paste("e^1:", exp_num))

# Statistical Functions
data <- c(4, 7, 12, 5, 8, 15, 9)
mean_val <- mean(data); median_val <- median(data); sd_val <- sd(data)
min_val <- min(data); max_val <- max(data); sum_val <- sum(data)
print(paste("Mean:", mean_val))
print(paste("Median:", median_val))
print(paste("Standard Deviation:", sd_val))
print(paste("Minimum:", min_val))
print(paste("Maximum:", max_val))
print(paste("Sum:", sum_val))

# String Functions
first_name <- "john"; last_name <- "doe"
full_name <- paste(first_name, last_name); print(full_name)
full_name_comma <- paste(last_name, first_name, sep=", ");
print(full_name_comma)
upper_name <- toupper(full_name); lower_name <- tolower("HELLO WORLD")
print(upper_name); print(lower_name)
substr_result <- substr("ATGCAATAGCATCGAT", start=4, stop=8);
print(substr_result)

# Utility Functions
vec <- c(10, 20, NA, 40, 50)
vec_length <- length(vec); has_na <- is.na(vec); vec_class <- class(vec)
print(paste("Vector length:", vec_length))
print("NA positions:"); print(has_na)
print(paste("Vector class:", vec_class))
pi_value <- pi; print(pi_value)
rounded_pi <- round(pi_value); print(rounded_pi)
rounded_pi_2 <- round(pi_value, digits=2); print(rounded_pi_2)
mean_result1 <- mean(x=c(1, 2, 3, NA), na.rm=TRUE)
mean_result2 <- mean(na.rm=TRUE, x=c(1, 2, 3, NA))
print(mean_result1); print(mean_result2)
mean_result3 <- mean(c(1, 2, 3, NA), TRUE); print(mean_result3)

# Vectors
class(gene_lengths)
gene_names <- c("BRCA1", "TP53", "EGFR", "MYC"); print(gene_names);
class(gene_names)
is_significant <- c(TRUE, FALSE, TRUE, TRUE); print(is_significant);
class(is_significant)
exon_counts <- c(24L, 11L, 28L, 3L); print(exon_counts); class(exon_counts)
sequence_1_to_10 <- 1:10; print(sequence_1_to_10)
sequence_by_step <- seq(from=0, to=1, by=0.2); print(sequence_by_step)
sequence_fixed_length <- seq(from=1, to=100, length.out=5);
print(sequence_fixed_length)
repeated_values <- rep("A", times=5); print(repeated_values)
repeated_pattern <- rep(c(1, 2), times=3); print(repeated_pattern)
repeated_each <- rep(c("Control", "Treated"), each=3); print(repeated_each)
mixed_vector_1 <- c(1, "two", 3); print(mixed_vector_1); class(mixed_vector_1)
mixed_vector_2 <- c(10.5, TRUE, FALSE); print(mixed_vector_2);
class(mixed_vector_2)
mixed_vector_3 <- c(5L, TRUE, 10L, FALSE); print(mixed_vector_3);
class(mixed_vector_3)

# Vector Operations
vec1 <- c(1, 2, 3, 4); vec2 <- c(10, 20, 30, 40)
sum_vec <- vec1 + vec2; print(sum_vec)
product_vec <- vec1 * 5; print(product_vec)
comparison_vec <- vec1 > 2; print(comparison_vec)
sqrt_vec <- sqrt(vec1); print(sqrt_vec)
vec_short <- c(10, 20); vec_long <- c(1, 2, 3, 4, 5, 6)
sum_recycled <- vec_long + vec_short; print(sum_recycled)

# Vector Indexing
gene_expression <- c(15.2, 8.1, 22.5, 19.0, 7.5)
names(gene_expression) <- c("GeneA", "GeneB", "GeneC", "GeneD", "GeneE")
print(gene_expression)
first_element <- gene_expression[1]; print(first_element)
third_and_fifth <- gene_expression[c(3, 5)]; print(third_and_fifth)
all_except_second <- gene_expression[-2]; print(all_except_second)
all_except_first_and_fourth <- gene_expression[c(-1, -4)];
print(all_except_first_and_fourth)
low_expression_indices <- gene_expression < 10; print(low_expression_indices)
low_expression_values <- gene_expression[low_expression_indices];
print(low_expression_values)
high_expression_values <- gene_expression[gene_expression > 18];
print(high_expression_values)
gene_c_value <- gene_expression["GeneC"]; print(gene_c_value)
genes_b_and_d <- gene_expression[c("GeneB", "GeneD")]; print(genes_b_and_d)
gene_expression[1] <- 16.0; gene_expression["GeneE"] <- 8.0
gene_expression[gene_expression < 10] <- 10.0; print(gene_expression)

# Matrices
matrix_data <- 1:12
my_matrix <- matrix(data = matrix_data, nrow = 3, ncol = 4); print(my_matrix)
my_matrix_byrow <- matrix(data = matrix_data, nrow = 3, ncol = 4, byrow =
TRUE); print(my_matrix_byrow)
matrix_direct <- matrix(c(5, 10, 15, 20, 25, 30), nrow=2); print(matrix_direct)
rownames(my_matrix) <- c("Sample1", "Sample2", "Sample3")
colnames(my_matrix) <- c("GeneA", "GeneB", "GeneC", "GeneD"); print(my_matrix)
dim(my_matrix); nrow(my_matrix); ncol(my_matrix)
matrix1 <- matrix(1:6, nrow=2); matrix2 <- matrix(10:15, nrow=2)
print("Matrix 1:"); print(matrix1); print("Matrix 2:"); print(matrix2)
matrix_sum <- matrix1 + matrix2; print("Element-wise Sum:"); print(matrix_sum)
matrix_product_elementwise <- matrix1 * matrix2; print("Element-wise
Product:"); print(matrix_product_elementwise)
matrix_scaled <- matrix1 * 10; print("Scaled Matrix 1:"); print(matrix_scaled)
matrix_a <- matrix(1:6, nrow=2, ncol=3); matrix_b <- matrix(1:6, nrow=3,
ncol=2)
print("Matrix A (2x3):"); print(matrix_a); print("Matrix B (3x2):");
print(matrix_b)
matrix_product_matmul <- matrix_a %*% matrix_b; print("Matrix Multiplication (A
%*% B):"); print(matrix_product_matmul)
transposed_matrix_a <- t(matrix_a); print("Transposed Matrix A (3x2):");
print(transposed_matrix_a)

# Matrix Indexing
print(my_matrix)
element_2_3 <- my_matrix[2, 3]; print(element_2_3)
row_2 <- my_matrix[2, ]; print(row_2); class(row_2)
col_3 <- my_matrix[, 3]; print(col_3); class(col_3)
row_2_matrix <- my_matrix[2, , drop=FALSE]; print(row_2_matrix);
class(row_2_matrix)
col_3_matrix <- my_matrix[, 3, drop=FALSE]; print(col_3_matrix);
class(col_3_matrix)
sub_matrix <- my_matrix[c(1, 3), c(2, 4)]; print(sub_matrix)
sample1_gene_b <- my_matrix["Sample1", "GeneB"]; print(sample1_gene_b)
sample2_data <- my_matrix["Sample2", ]; print(sample2_data)
matrix_subset_logical <- my_matrix[my_matrix[, "GeneA"] > 1, ];
print(matrix_subset_logical)
my_matrix[1, 1] <- 100; my_matrix["Sample3", "GeneC"] <- 999; my_matrix[,
"GeneD"] <- 0; print(my_matrix)

# Lists
patient_info <- list(
patient_id = "P123", age = 45L, diagnosis = "Type 2 Diabetes", is_smoker =
TRUE,
lab_values = c(glucose = 150, hba1c = 7.5, cholesterol = 210),
medications = c("Metformin", "Lisinopril")
)
print(patient_info); str(patient_info)
complex_list <- list(
info = "Experiment Details", samples = c("S1", "S2", "S3"),
expression_matrix = matrix(rnorm(9), nrow=3, dimnames=list(paste0("G", 1:3),
paste0("S", 1:3))),
metadata = patient_info
)
print(complex_list); str(complex_list)

# List Indexing
print(patient_info)
patient_id_value <- patient_info[[1]]; print(patient_id_value);
class(patient_id_value)
diagnosis_value <- patient_info[["diagnosis"]]; print(diagnosis_value);
class(diagnosis_value)
age_value <- patient_info$age; print(age_value); class(age_value)
lab_values_vector <- patient_info$lab_values; print(lab_values_vector);
class(lab_values_vector)
first_two_elements_list <- patient_info[1:2]; print(first_two_elements_list);
class(first_two_elements_list)
medications_list <- patient_info["medications"]; print(medications_list);
class(medications_list)
first_medication <- patient_info$medications[1]; print(first_medication)
glucose_value <- patient_info$lab_values["glucose"]; print(glucose_value)
print(complex_list$`metadata`$patient_id); print(complex_list[["metadata"]]
[["age"]])
patient_info$age <- 46L; patient_info[["is_smoker"]] <- FALSE
patient_info$`medications <- c(patient_info`$medications, "Aspirin")
patient_info$new_field <- "Some new information"; print(patient_info)
patient_info$new_field <- NULL; print(patient_info)

# Data Frames
gene_ids <- c("Gene1", "Gene2", "Gene3", "Gene4")
chromosome <- c("chr1", "chr2", "chr1", "chrX")
expression_level <- c(5.2, 1.8, 7.1, 4.5)
is_expressed <- c(TRUE, TRUE, TRUE, TRUE)
gene_data_df <- data.frame(GeneID = gene_ids, Chromosome = chromosome,
Expression = expression_level, Expressed = is_expressed)
print(gene_data_df); class(gene_data_df); str(gene_data_df)
gene_data_df_no_factors <- data.frame(GeneID = gene_ids, Chromosome =
chromosome, Expression = expression_level, Expressed = is_expressed,
stringsAsFactors = FALSE)
str(gene_data_df_no_factors)

# Tibbles
library(tibble)
gene_data_tbl <- tibble(GeneID = gene_ids, Chromosome = chromosome, Expression
= expression_level, Expressed = is_expressed)
print(gene_data_tbl); class(gene_data_tbl); str(gene_data_tbl)
element_2_3_df <- gene_data_tbl[2, 3]; print(element_2_3_df)
row_1_df <- gene_data_tbl[1, ]; print(row_1_df); class(row_1_df)
expression_col_matrix_style <- gene_data_tbl[, "Expression"];
print(expression_col_matrix_style); class(expression_col_matrix_style)
expression_col_list_style <- gene_data_tbl$Expression;
print(expression_col_list_style); class(expression_col_list_style)
chromosome_col_list_style <- gene_data_tbl[["Chromosome"]];
print(chromosome_col_list_style); class(chromosome_col_list_style)
expressed_high <- gene_data_tbl[gene_data_tbl$Expression > 5, ];
print(expressed_high)
chr1_genes <- gene_data_tbl[gene_data_tbl$Chromosome == "chr1", ];
print(chr1_genes)
gene_expression_subset <- gene_data_tbl[, c("GeneID", "Expression")];
print(gene_expression_subset)
gene_expression_subset_list <- gene_data_tbl[c("GeneID", "Expression")];
print(gene_expression_subset_list)
gene_data_tbl$`Expression[1] <- 6.0; gene_data_tbl`$LogExpression <-
log2(gene_data_tbl$Expression); print(gene_data_tbl)
gene_data_tbl$Expressed <- NULL; print(gene_data_tbl)

# Chapter 3 - Programming Fundamentals

gc_content <- 0.65
if (gc_content > 0.6) { print("High GC content detected.") }
expression_level <- 4.5
if (expression_level > 5.0) { print("Gene is highly expressed.") } else {
print("Gene expression is not high.") }
sequence_length <- 1500
if (sequence_length < 100) { print("Short sequence.") } else if
(sequence_length < 1000) { print("Medium sequence.") } else { print("Long
sequence.") }
expression_values <- c(2.1, 8.5, 4.9, 12.3, 1.5); threshold <- 5.0
classification <- ifelse(expression_values > threshold, "High", "Low");
print(classification)
p_values <- c(0.01, 0.25, 0.04, 0.001, 0.5); significance_level <- 0.05

# dplyr Operations
library(dplyr)
data(iris); iris_tbl <- as_tibble(iris); print(iris_tbl)
setosa_data <- filter(iris_tbl, Species == "setosa"); print(setosa_data)
versicolor_long_sepal <- filter(iris_tbl, Species == "versicolor", Sepal.Length
> 6); print(versicolor_long_sepal)
sorted_by_petal_length <- arrange(iris_tbl, desc(Petal.Length));
print(head(sorted_by_petal_length))
sorted_multi <- arrange(iris_tbl, Species, Sepal.Width);
print(head(sorted_multi))
sepal_species_data <- select(iris_tbl, Species, Sepal.Length);
print(head(sepal_species_data))
no_species_data <- select(iris_tbl, -Species); print(head(no_species_data))
petal_data <- select(iris_tbl, starts_with("Petal")); print(head(petal_data))
iris_with_area <- mutate(iris_tbl, Petal.Area = Petal.Length * Petal.Width)
print(head(select(iris_with_area, Petal.Length, Petal.Width, Petal.Area)))
iris_transformed <- mutate(iris_tbl, Sepal.Length.cm = Sepal.Length / 10,
Petal.Length.cm = Petal.Length / 10)
print(head(iris_transformed))
summary_petal_length <- summarise(iris_tbl, avg_petal_length =
mean(Petal.Length)); print(summary_petal_length)
summary_stats <- summarise(iris_tbl, avg_sepal_width = mean(Sepal.Width),
max_petal_length = max(Petal.Length), n_observations = n())
print(summary_stats)
species_summary <- group_by(iris_tbl, Species); print(species_summary)
species_avg_stats <- summarise(species_summary, avg_sepal_length =
mean(Sepal.Length), avg_sepal_width = mean(Sepal.Width), count = n())
print(species_avg_stats)
species_avg_stats_ungrouped <- ungroup(species_avg_stats);
print(species_avg_stats_ungrouped)
result_nested <- summarise(filter(mutate(iris_tbl, Petal.Area = Petal.Length *
Petal.Width), Species == "virginica", Sepal.Length > 7), avg_petal_area =
mean(Petal.Area))
print(result_nested)

# Chapter 4 - Bioinformatics
BiocManager::install(c("Biostrings", "GenomicRanges", "rtracklayer",
"SummarizedExperiment"))
library(Biostrings)
dna_seq <- DNAString("ATGCAATAGCATCGAT"); print(dna_seq); length(dna_seq)
complement_seq <- complement(dna_seq); print(complement_seq)
reverse_complement_seq <- reverseComplement(dna_seq);
print(reverse_complement_seq)
gc_content_seq <- letterFrequency(dna_seq, "GC", as.prob=TRUE);
print(gc_content_seq)
dna_set <- DNAStringSet(c("ATGCAATAGCATCGAT", "GCTAGCTAGCTA", "AAATTTCCCGGG"))
names(dna_set) <- c("Seq1", "Seq2", "Seq3"); print(dna_set)
writeXStringSet(dna_set, "example_sequences.fasta")
loaded_sequences <- readDNAStringSet("example_sequences.fasta");
print(loaded_sequences)

# GenomicRanges
library(GenomicRanges)
gr <- GRanges(
seqnames = Rle(c("chr1", "chr2", "chr1", "chr3"), c(1, 3, 2, 4)),
ranges = IRanges(start=c(101, 201, 401, 501, 601, 701, 801, 901, 1001, 1101),
end=c(110, 250, 450, 550, 650, 750, 850, 950, 1050, 1150)),
strand = Rle(strand(c("-", "+", "*", "+", "-")), c(1, 2, 2, 3, 2)),
score = 1:10, GC = seq(0.4, 0.58, length.out=10)
)
print(gr); seqnames(gr); ranges(gr); start(gr); end(gr); width(gr); strand(gr)
mcols(gr); gr$`score; gr`$GC; length(gr)
first_three_ranges <- gr[1:3]; print(first_three_ranges)
chr1_ranges <- gr[seqnames(gr) == "chr1"]; print(chr1_ranges)
high_score_ranges <- gr[gr$score > 7]; print(high_score_ranges)
shifted_gr <- shift(gr, shift = 1000); print(shifted_gr)
resized_gr_start <- resize(gr, width = 20, fix = "start");
print(resized_gr_start)
resized_gr_end <- resize(gr, width = 100, fix = "end"); print(resized_gr_end)
gr_query <- GRanges("chr1", IRanges(c(120, 360), c(180, 410)), score=c(50, 80))
print("Query Ranges:"); print(gr_query); print("Subject Ranges (gr):");
print(gr)
overlaps <- findOverlaps(gr_query, gr); print("Overlap results (indices):");
print(overlaps)
query_hits <- gr_query[queryHits(overlaps)]; subject_hits <-
gr[subjectHits(overlaps)]
print("Query ranges that overlap:"); print(query_hits); print("Subject ranges
that overlap:"); print(subject_hits)
subset_by_overlap <- subsetByOverlaps(gr, gr_query); print("Ranges in gr that
overlap with gr_query:"); print(subset_by_overlap)
nearest_indices <- nearest(gr_query, gr); print("Index of nearest range in gr
for each range in gr_query:"); print(nearest_indices);
print(gr[nearest_indices])
distances <- distanceToNearest(gr_query, gr); print("Distances to nearest
ranges:"); print(distances)
gr_a <- GRanges("chr1", IRanges(c(100, 200), c(150, 250))); gr_b <-
GRanges("chr1", IRanges(c(130, 260), c(180, 300)))
union_gr <- union(gr_a, gr_b); intersect_gr <- intersect(gr_a, gr_b);
setdiff_gr <- setdiff(gr_a, gr_b)
print("Union:"); print(union_gr); print("Intersect:"); print(intersect_gr);
print("Set Difference (A-B):"); print(setdiff_gr)
coverage_chr1 <- coverage(gr[seqnames(gr) == "chr1"]); print(coverage_chr1)

# rtracklayer
library(rtracklayer)
bed_content <- c("chr1\t100\t150\tGeneA\t10\t+", "chr1\t300\t380\tGeneB\t25\t-
", "chr2\t200\t250\tGeneC\t15\t+")
writeLines(bed_content, "example.bed"); bed_gr <- import("example.bed", format
= "bed"); print(bed_gr)
gtf_content <- c("chr1\tHAVANA\texon\t11869\t12227\t.\t+\t.\tgene_id
\"ENSG001\"; transcript_id \"ENST001\"; exon_number 1;",
"chr1\tHAVANA\texon\t12613\t12721\t.\t+\t.\tgene_id \"ENSG001\"; transcript_id
\"ENST001\"; exon_number 2;",
"chr1\tHAVANA\tgene\t11869\t14409\t.\t+\t.\tgene_id \"ENSG001\"; gene_name
\"DDX11L1\";", "chr2\tHAVANA\texon\t5000\t5500\t.\t-\t.\tgene_id \"ENSG002\";
transcript_id \"ENST002\"; exon_number 1;")
writeLines(gtf_content, "example.gtf"); gtf_gr <- import("example.gtf", format
= "gtf"); print(gtf_gr); print(mcols(gtf_gr))
exons_gr <- gtf_gr[gtf_gr$type == "exon"]; print(exons_gr)
exons_gr$`score <- sample(1:100, length(exons_gr), replace=TRUE);
exons_gr`$name <- paste0("Exon_", 1:length(exons_gr))
export(exons_gr, "exported_exons.bed", format = "bed")

# SummarizedExperiment
library(SummarizedExperiment)
counts_matrix <- matrix(rpois(100, lambda = 10), nrow = 20, ncol = 5, dimnames
= list(paste0("Gene", 1:20), paste0("Sample", 1:5)))
print(head(counts_matrix))
feature_data <- GRanges(seqnames = sample(c("chr1", "chr2"), 20, replace =
TRUE), ranges = IRanges(start = sample(1:10000, 20), width = sample(50:500,
20)), strand = sample(c("+", "-"), 20, replace = TRUE), gene_id =
paste0("Gene", 1:20), gc_content = rnorm(20, 0.5, 0.05))
names(feature_data) <- rownames(counts_matrix); print(feature_data)
sample_data <- DataFrame(Treatment = rep(c("Control", "Treated"), length.out =
5), Replicate = c(1, 1, 2, 2, 3), row.names = colnames(counts_matrix))
print(sample_data)
se <- SummarizedExperiment(assays = list(counts = counts_matrix), rowData =
feature_data, colData = sample_data); print(se)
assay(se); assays(se); rowData(se); colData(se); dim(se); nrow(se); ncol(se)
se_subset_genes <- se[1:5, ]; print(se_subset_genes)
se_subset_samples <- se[, se$Treatment == "Treated"]; print(se_subset_samples)
se_subset_both <- se[rowData(se)$`gc_content > 0.55, se`$Replicate == 1];
print(se_subset_both)
normalized_counts <- log2(counts_matrix + 1); assay(se, "logcounts") <-
normalized_counts; print(assays(se))

# Chapter 5 - Data Visualization

library(ggplot2); library(dplyr)
data(iris); iris_tbl <- as_tibble(iris)
base_plot <- ggplot(data = iris_tbl, mapping = aes(x = Sepal.Length, y =
Sepal.Width, color = Species))
scatter_plot <- base_plot + geom_point(); print(scatter_plot)
ggplot(data = iris_tbl, aes(x = Sepal.Length, y = Sepal.Width, color =
Species)) + geom_point()
set.seed(123); num_proteins <- 1000
proteomics_data <- tibble(ProteinID = paste0("Prot", 1:num_proteins),
Control_Abundance = rnorm(num_proteins, mean = 10, sd = 2), Treated_Abundance =
Control_Abundance + rnorm(num_proteins, mean = 0.5, sd = 1.5))
proteomics_data$`Treated_Abundance[1:50] <-
proteomics_data`$Treated_Abundance[1:50] + rnorm(50, mean = 3, sd = 1)
proteomics_data$`Treated_Abundance[51:100] <-
proteomics_data`$Treated_Abundance[51:100] - rnorm(50, mean = 3, sd = 1)
scatter_proteomics <- ggplot(proteomics_data, aes(x = Control_Abundance, y =
Treated_Abundance)) + geom_point(alpha = 0.5) + labs(title = "Protein
Abundance: Treated vs. Control", x = "Log2 Abundance (Control)", y = "Log2
Abundance (Treated)") + theme_minimal()
print(scatter_proteomics)
scatter_proteomics_diag <- scatter_proteomics + geom_abline(intercept = 0,
slope = 1, color = "red", linetype = "dashed"); print(scatter_proteomics_diag)
box_plot_iris <- ggplot(iris_tbl, aes(x = Species, y = Sepal.Width, fill =
Species)) + geom_boxplot() + labs(title = "Sepal Width Distribution by
Species", x = "Species", y = "Sepal Width") + theme_light()
print(box_plot_iris)
violin_plot_iris <- ggplot(iris_tbl, aes(x = Species, y = Petal.Length, fill =
Species)) + geom_violin(trim = FALSE, alpha = 0.7) + geom_jitter(width = 0.1,
height = 0, alpha = 0.5, size = 1.5) + labs(title = "Petal Length Distribution
by Species", x = "Species", y = "Petal Length") + theme_bw()
print(violin_plot_iris)
histogram_petal <- ggplot(iris_tbl, aes(x = Petal.Length)) +
geom_histogram(binwidth = 0.2, fill = "skyblue", color = "black") + labs(title
= "Histogram of Petal Lengths", x = "Petal Length", y = "Frequency")
print(histogram_petal)
density_petal_species <- ggplot(iris_tbl, aes(x = Petal.Length, fill = Species,
color = Species)) + geom_density(alpha = 0.5) + labs(title = "Density of Petal
Lengths by Species", x = "Petal Length", y = "Density")
print(density_petal_species)
bar_plot_counts <- ggplot(iris_tbl, aes(x = Species, fill = Species)) +
geom_bar() + labs(title = "Count of Observations per Species", x = "Species", y
= "Count")
print(bar_plot_counts)
iris_summary <- iris_tbl %>% group_by(Species) %>% summarise(Avg.Sepal.Length =
mean(Sepal.Length)); print(iris_summary)
bar_plot_means <- ggplot(iris_summary, aes(x = Species, y = Avg.Sepal.Length,
fill = Species)) + geom_col() + labs(title = "Average Sepal Length per
Species", x = "Species", y = "Average Sepal Length")
print(bar_plot_means)
set.seed(456)
heatmap_data_matrix <- matrix(rnorm(50 * 10, mean = 0, sd = 1.5), nrow = 50,
ncol = 10, dimnames = list(paste0("Prot", 1:50), paste0("Sample", 1:10)))
heatmap_data_matrix[, 1:5] <- heatmap_data_matrix[, 1:5] + 1;
heatmap_data_matrix[1:20, ] <- heatmap_data_matrix[1:20, ] + 1.5
library(tidyr); library(tibble)
heatmap_data_long <- as.data.frame(heatmap_data_matrix) %>%
rownames_to_column(var = "ProteinID") %>% pivot_longer(cols = -ProteinID,
names_to = "SampleID", values_to = "Log2FoldChange")
heatmap_plot <- ggplot(heatmap_data_long, aes(x = SampleID, y = ProteinID, fill
= Log2FoldChange)) + geom_tile(color = "white", size = 0.1) +
scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0) +
labs(title = "Protein Expression Heatmap", x = "Sample", y = "Protein", fill =
"Log2 FC") + theme_minimal() + theme(axis.text.y = element_text(size = 6),
axis.text.x = element_text(angle = 45, hjust = 1, size = 8), legend.position =
"bottom")
print(heatmap_plot)
set.seed(789); num_proteins_volcano <- 2000
volcano_data <- tibble(ProteinID = paste0("Prot", 1:num_proteins_volcano),
log2FoldChange = rnorm(num_proteins_volcano, mean = 0, sd = 1.5), p.value =
runif(num_proteins_volcano, min = 0, max = 1))
up_indices <- 1:100; volcano_data$`log2FoldChange[up_indices] <-
rnorm(length(up_indices), mean = 3, sd = 1); volcano_data`$p.value[up_indices]
<- rbeta(length(up_indices), 1, 50)
down_indices <- 101:200; volcano_data$`log2FoldChange[down_indices] <-
rnorm(length(down_indices), mean = -3, sd = 1);
volcano_data`$p.value[down_indices] <- rbeta(length(down_indices), 1, 50)
volcano_data <- volcano_data %>% mutate(negLog10p = -log10(p.value),
Significance = case_when(log2FoldChange > 1.5 & p.value < 0.01 ~ "Upregulated",
log2FoldChange < -1.5 & p.value < 0.01 ~ "Downregulated", TRUE ~ "Not
Significant"), Significance = factor(Significance, levels = c("Upregulated",
"Downregulated", "Not Significant")))
volcano_plot <- ggplot(volcano_data, aes(x = log2FoldChange, y = negLog10p,
color = Significance)) + geom_point(alpha = 0.6, size = 1.5) +
scale_color_manual(values = c("Upregulated" = "red", "Downregulated" = "blue",
"Not Significant" = "grey")) + labs(title = "Volcano Plot of Differential
Protein Abundance", x = "Log2 Fold Change", y = "-Log10 P-value") +
theme_minimal() + theme(legend.position = "bottom") + geom_hline(yintercept = -
log10(0.01), linetype = "dashed", color = "darkgrey") + geom_vline(xintercept =
c(-1.5, 1.5), linetype = "dashed", color = "darkgrey")

# Chapter 6 - Shiny
library(shiny); library(ggplot2)
ui <- fluidPage(titlePanel("Old Faithful Geyser Data Histogram"),
sidebarLayout(sidebarPanel(sliderInput(inputId = "bins", label = "Number of
bins:", min = 1, max = 50, value = 30)), mainPanel(plotOutput(outputId =
"distPlot"))))
server <- function(input, output) { output$`distPlot <- renderPlot({ x <-
faithful[, 2]; bins <- seq(min(x), max(x), length.out = input`$bins + 1);
hist(x, breaks = bins, col = "darkgray", border = "white", xlab = "Waiting time
to next eruption (in mins)", main = "Histogram of waiting times") }) }
shinyApp(ui = ui, server = server)

# Chapter 7 - Performance
slow_function <- function(n) { result <- 0; for (i in 1:n) { result <- result +
log(sqrt(i^2.5)) }; Sys.sleep(0.1); return(result) }
execution_time <- system.time({ result_single <- slow_function(10000) });
print(execution_time)
num_reps <- 10
system.time({ results_sequential <- vector("list", num_reps); for(i in
1:num_reps) { results_sequential[[i]] <- slow_function(5000) } })

Anatomy Physiology Chapter 3 Cell Structure
100% (2)
Anatomy Physiology Chapter 3 Cell Structure
35 pages
R Cheat Sheet Merged
100% (2)
R Cheat Sheet Merged
35 pages
R Cheat Sheet
No ratings yet
R Cheat Sheet
4 pages
R Cheat Sheet: 1. Basics 4. Input and Export of Data
100% (1)
R Cheat Sheet: 1. Basics 4. Input and Export of Data
4 pages
Shaurya Sharma PS LAB WORK
No ratings yet
Shaurya Sharma PS LAB WORK
70 pages
R Program 2025,-1
No ratings yet
R Program 2025,-1
11 pages
R File Code
No ratings yet
R File Code
16 pages
Arunav Da Prac
No ratings yet
Arunav Da Prac
55 pages
Practical 1 - Basics of R
No ratings yet
Practical 1 - Basics of R
8 pages
R Basics PDF
No ratings yet
R Basics PDF
10 pages
An Introduction To R: Biostatistics 615/815
No ratings yet
An Introduction To R: Biostatistics 615/815
59 pages
R Lab Program
No ratings yet
R Lab Program
21 pages
R Programing Bhagu
No ratings yet
R Programing Bhagu
40 pages
Nishant R File
No ratings yet
Nishant R File
49 pages
R Progm
No ratings yet
R Progm
17 pages
5 Sem Lab Manual R Programming BCA-BSC
No ratings yet
5 Sem Lab Manual R Programming BCA-BSC
16 pages
An Introduction To R Language
No ratings yet
An Introduction To R Language
11 pages
A Short List of Some Useful R Commands: Input and Display
No ratings yet
A Short List of Some Useful R Commands: Input and Display
2 pages
R Programmimg Practical Journal All-1
No ratings yet
R Programmimg Practical Journal All-1
25 pages
Data Science
No ratings yet
Data Science
20 pages
Basic R Programming
No ratings yet
Basic R Programming
16 pages
R Programming Lab
No ratings yet
R Programming Lab
14 pages
50 R Exercises
No ratings yet
50 R Exercises
44 pages
R Cheatsheet Base R
No ratings yet
R Cheatsheet Base R
2 pages
R Intro STAT5000
No ratings yet
R Intro STAT5000
17 pages
R Lab Set2
No ratings yet
R Lab Set2
3 pages
R Cheat Sheet: Data Manipulation Files & Objects
No ratings yet
R Cheat Sheet: Data Manipulation Files & Objects
1 page
Program 1a
No ratings yet
Program 1a
5 pages
File 2
No ratings yet
File 2
17 pages
Workshop Activity: X Seq y Length
No ratings yet
Workshop Activity: X Seq y Length
3 pages
Cost Practical
No ratings yet
Cost Practical
13 pages
R Practical File
No ratings yet
R Practical File
17 pages
Statistic and R Programming Lab Exercise
No ratings yet
Statistic and R Programming Lab Exercise
8 pages
File 2
No ratings yet
File 2
17 pages
Bdo Co1 Session 4
No ratings yet
Bdo Co1 Session 4
43 pages
R Manual
No ratings yet
R Manual
10 pages
R Textbook Full
No ratings yet
R Textbook Full
96 pages
Introduction To R For Gene Expression Data Analysis
No ratings yet
Introduction To R For Gene Expression Data Analysis
11 pages
R Lab Manual
No ratings yet
R Lab Manual
19 pages
S24 Stats10 Lab1-1
No ratings yet
S24 Stats10 Lab1-1
8 pages
R Program
No ratings yet
R Program
22 pages
A Short List of The Most Useful R Commands
No ratings yet
A Short List of The Most Useful R Commands
8 pages
Write A R Program For Different Types of Data Structures
No ratings yet
Write A R Program For Different Types of Data Structures
11 pages
BD
No ratings yet
BD
12 pages
Computing Systems DS AI Lab Manual
No ratings yet
Computing Systems DS AI Lab Manual
68 pages
Unit - 3 Learning Notes
No ratings yet
Unit - 3 Learning Notes
8 pages
RP Lab Manual
No ratings yet
RP Lab Manual
24 pages
R Lab
No ratings yet
R Lab
15 pages
Glocal University: Practical File of R Programming
100% (1)
Glocal University: Practical File of R Programming
32 pages
A Short List of The Most Useful R Commands
No ratings yet
A Short List of The Most Useful R Commands
11 pages
R Tools LAB
No ratings yet
R Tools LAB
31 pages
First Machine Problem
No ratings yet
First Machine Problem
6 pages
Cb161 Lab Manual
No ratings yet
Cb161 Lab Manual
25 pages
23DSCP206-Data Mining Using R
No ratings yet
23DSCP206-Data Mining Using R
14 pages
R Commands: Appendix B
No ratings yet
R Commands: Appendix B
5 pages
Purrr
No ratings yet
Purrr
2 pages
R Programming
No ratings yet
R Programming
23 pages
Statistics With R
No ratings yet
Statistics With R
5 pages
DAUR Lab Manual
No ratings yet
DAUR Lab Manual
14 pages
The Essential R Reference
From Everand
The Essential R Reference
Mark Gardener
No ratings yet
Amazing Java: Learn Java Quickly
From Everand
Amazing Java: Learn Java Quickly
Andrei Besedin
No ratings yet
Mechanisms of Drug Absorption-Final-1
No ratings yet
Mechanisms of Drug Absorption-Final-1
67 pages
Vit D
No ratings yet
Vit D
10 pages
Chemistry Aip
No ratings yet
Chemistry Aip
15 pages
Wykład 1 Principles of Antiviral Vaccine Design
No ratings yet
Wykład 1 Principles of Antiviral Vaccine Design
77 pages
Molecular Biology - Exam (2 Course - 4th Semester)
No ratings yet
Molecular Biology - Exam (2 Course - 4th Semester)
19 pages
Annual Report 2013-14
No ratings yet
Annual Report 2013-14
164 pages
Occurrence and Functions in Biochemistry: Dynamic Equilibrium Isoelectric Point
No ratings yet
Occurrence and Functions in Biochemistry: Dynamic Equilibrium Isoelectric Point
4 pages
Abraham (2003) - How Long Will Long-Term Potentiation Last
No ratings yet
Abraham (2003) - How Long Will Long-Term Potentiation Last
10 pages
Synaptic Transmission at The Crayfish Superficial Flexor Muscle and Third Nerve
No ratings yet
Synaptic Transmission at The Crayfish Superficial Flexor Muscle and Third Nerve
14 pages
Class 11 Bio Objective
No ratings yet
Class 11 Bio Objective
2 pages
Crack Gpat 2018
No ratings yet
Crack Gpat 2018
7 pages
Book List
No ratings yet
Book List
3 pages
Jim Bidlack - BIO 1304
No ratings yet
Jim Bidlack - BIO 1304
1 page
Chapter 1 Test Review Ap Bio
100% (3)
Chapter 1 Test Review Ap Bio
5 pages
G9 Science - Lesson 3 Non Mendelian Inheritance
No ratings yet
G9 Science - Lesson 3 Non Mendelian Inheritance
1 page
Shupe Assignment
No ratings yet
Shupe Assignment
7 pages
TE 02J TYM P4 23 10 2024 Soin
No ratings yet
TE 02J TYM P4 23 10 2024 Soin
24 pages
Pham Thi Hong Xuan Biology BSC Thesis Final
No ratings yet
Pham Thi Hong Xuan Biology BSC Thesis Final
40 pages
Kumar-2011-In Vitro Plant Propagation A Review
No ratings yet
Kumar-2011-In Vitro Plant Propagation A Review
13 pages
Genetics Midterm Exam
No ratings yet
Genetics Midterm Exam
13 pages
Neurofisiologi Dan Transmisi Sinyal: Titis Nurmasitoh, DR, M.Sc. Departemen Fisiologi FK UII
No ratings yet
Neurofisiologi Dan Transmisi Sinyal: Titis Nurmasitoh, DR, M.Sc. Departemen Fisiologi FK UII
44 pages
Biotechnology State Standards
No ratings yet
Biotechnology State Standards
5 pages
Cancer Pathophysiology
No ratings yet
Cancer Pathophysiology
10 pages
Questions
No ratings yet
Questions
66 pages
Why Do We Fall Ill
No ratings yet
Why Do We Fall Ill
42 pages
Plant Biology Pteridophytes and Monocots Vs Dicots
No ratings yet
Plant Biology Pteridophytes and Monocots Vs Dicots
6 pages
Anti Biotics & Lactam Antibiotics Visit For More Ppt's
No ratings yet
Anti Biotics & Lactam Antibiotics Visit For More Ppt's
44 pages
The Neuron and Action Potential
No ratings yet
The Neuron and Action Potential
32 pages
Zoology 11th Class Last 10 Year PYQS Paper
No ratings yet
Zoology 11th Class Last 10 Year PYQS Paper
27 pages