Module 3
Lab 3A
Install a package, e.g. GenomicRanges
Load a package
Create simple SummarizedExperiment
counts <- matrix(rpois(20, 10), ncol=4)
colData <- DataFrame(condition=c("A","A","B","B"))
rowData <- DataFrame(gene=letters[1:5])
se <- SummarizedExperiment(assays=list(counts=counts),
colData=colData,
rowData=rowData)
seDemo 2: ALL dataset
Lab 3A Tasks
Extract and preview sample (patient) metadata
meta <- pData(ALL)
head(meta) # first 6 rows
# Gender distribution
table(meta$sex)
# Mean age (ignoring missing values)
mean(meta$age, na.rm = TRUE)Visualization in Bioconductor
Load All package and data
# Subset patients < 20
young_patients <- ALL[, pData(ALL)$age < 20]
dim(young_patients)
# Count patients by Immunophenotype (BT)
barplot(table(pData(ALL)$BT), main="Patients by Immunophenotype (BT)", ylab="Patients", xlab="Immunophenotype (BT)")
# PCA on first 50 genes
expr <- exprs(ALL)[1:50, ]
pca <- prcomp(t(expr), scale. = TRUE)
plot(pca$x[,1:2], col = as.factor(pData(ALL)$BT),
pch=19, main="PCA of 50 genes")
#Boxplot of Age by Sex
boxplot(age ~ sex, data = pData(ALL),
main="Age Distribution by Sex", xlab="Sex", ylab="Age")
# Challenge (Filter missing age & re-run PCA)
ALL_clean <- ALL[, !is.na(pData(ALL)$age)]
expr_clean <- exprs(ALL_clean)[1:50, ]
pca_clean <- prcomp(t(expr_clean), scale. = TRUE)
plot(pca_clean$x[,1:2], col = as.factor(pData(ALL_clean)$BT),
pch=19, main="PCA after removing NA ages")Lab 3B
Bioconductor Packages and Data sets
Install airway package
BiocManager::install("airway")
# load package and data
library("airway")
data("airway") # loads the dataset into your environment
airwayExplore airway package
ex <- assay(airway)[1:5, 1:5] # expression counts
cols <- colData(airway)[1:5, ] # sample metadata
rows <- rowData(airway)[1:5, ] # gene metadataHands on tasks
Subsetting treated vs untreated
treated <- airway[, airway$dex == "trt"]
untreated <- airway[, airway$dex == "untrt"]
dim(treated)
dim(untreated)Count treated vs untreated
Extract samples from a specific cell line
Get number of genes
ExperimentHub Demo
# Load ExperimentHub
library(ExperimentHub)
# Create a hub object
eh <- ExperimentHub()
# Search for RNA-seq datasets
query(eh, "RNA-seq")
# Access a specific dataset by ID (example)
eh[["EH1234"]] # Loads dataset into RAnnotationHub Demo
# Load AnnotationHub
library(AnnotationHub)
library("rtracklayer")
# Create a hub object
ah <- AnnotationHub()
# Search for human genome resources
query(ah, "Homo sapiens")
# Access an annotation dataset by ID (example)
ah[["AH83281"]] # Loads GRCh38 GTF annotation into Rorg.Hs.eg.db Demo
Lab 3B Tasks
Task 1: Take the first 20 genes from airway. Map ENSEMBL IDs → gene symbols.
Retrieve gene descriptions
Get first 20 ENSEMBL IDs from airway
Map ENSEMBL → Gene Symbol
Map ENSEMBL → Full Gene Name
Combine into a data frame
annotated20 <- data.frame(ENSEMBL_ID = ids20,
Symbol = symbols,
Description = descriptions)
head(annotated20)Task 2: Subset airway to treated samples only. Select the first 5 genes.
Annotate them with symbols + full names
# Subset treated samples
treated <- airway[, airway$dex == "trt"]
# Get first 5 ENSEMBL IDs from treated dataset
ids5 <- rownames(treated)[1:5]
# Map ENSEMBL → Symbol
symbols5 <- mapIds(org.Hs.eg.db,
keys = ids5,
keytype = "ENSEMBL",
column = "SYMBOL")
# Map ENSEMBL → Gene Name
names5 <- mapIds(org.Hs.eg.db,
keys = ids5,
keytype = "ENSEMBL",
column = "GENENAME")
# Combine results
annotated5 <- data.frame(ENSEMBL_ID = ids5,
Symbol = symbols5,
Full_Name = names5)
annotated5