Module 2

Lecture

2A

2B

Lab 2A

Read data in to R

read CSV - base functions

bp <- read.csv2("Desktop/R/data/BloodPressure_Data.csv") # no sepqration
# take a quick look at the data
head(bp)
bp <- read.csv2("Desktop/R/data/BloodPressure_Data.csv", sep = ",") # no sepqration
# take another look at the data
head(bp)
str(bp)

readr functions

library(readr)

Read the CSV file

bp_data <- read_csv("Desktop/R/data/BloodPressure_Data.csv")

Take a quick look at the data

head(bp_data)
str(bp_data)

Work with date

library(readr)
library("lubridate")

read ALL data

bp <-  read.csv2("Desktop/R/data/BloodPressure_wDates.csv", sep = ",")

Convert date column and extract year

bp$Date <- ymd(bp$Date)
bp$Year <- year(bp$Date)

Filtering blood pressure patients by year and gender

subset(bp, Year == 2003 & Gender == "f")

Conditions and loops

If {} else {} statement

if (condition) {
  # code if TRUE
} else {
  # code if FALSE
}
# If else example
age <- 55
if (age > 50) {
  print("Older patient")
} else {
  print("Younger patient")
}
# If {} else if {} else statement 

if (condition1) {
  # code if condition1 is TRUE
} else if (condition2) {
  # code if condition2 is TRUE
} else {
  # code if none are TRUE
}
# If else if example
age <- 35

if (age < 18) {
  print("Child")
} else if (age >= 18 & age < 60) {
  print("Adult")
} else {
  print("Senior")
}
# for loops
for (i in 1:5) {
  print(i)
}
# for loop example
patients <- c("P1", "P2", "P3")
for (p in patients) {
  print(paste("Processing:", p))
}
# the apply () family vs. loops
# Using a for loop
m <- matrix(1:9, nrow=3)
row_sums <- c()
for (i in 1:nrow(m)) {
  row_sums[i] <- sum(m[i, ])
}
# Using apply()
row_sums2 <- apply(m, 1, sum)
# the apply() family
# apply()
apply(m, 1, sum)   # row sums
apply(m, 2, mean)  # column means
# lapply()
lapply(list(1:3, 4:6), mean)
# sapply()
sapply(list(1:3, 4:6), mean)
# lapply()
lapply(list(1:3, 4:6), mean)
# tapply()
ages <- c(21, 25, 30, 40, 35)
gender <- c("M", "M", "F", "F", "M")
tapply(ages, gender, mean)   # mean age by gender
# mapply()
nums1 <- 1:5
nums2 <- 6:10
mapply(sum, nums1, nums2)   # adds 1+6, 2+7, … 5+10

Lab 2A Tasks

Task 1 – Basic Filtering

#Use a for loop with if/else conditions
Go through each row of the dataset and:
Print a message if the patient has High BP (> 140) # Tip: use the paste function, 
Print a message if the patient has Low BP (< 90)
Otherwise, mark them as Normal
# read data
bp_data <- read.csv2("Desktop/R/data/BloodPressure_wDates.csv", sep = ",")

# Use for loop
for (i in 1:nrow(bp_data)) {
  if (bp_data$BloodPressure[i] > 140) {
    print(paste("Patient", bp_data$ID[i], "has HIGH blood pressure"))
  } else if (bp_data$BloodPressure[i] < 90) {
    print(paste("Patient", bp_data$ID[i], "has LOW blood pressure"))
  } else {
    print(paste("Patient", bp_data$ID[i], "is NORMAL"))
  }
}


# Use apply() instead of loops
bp_data$BP_Status <- apply(bp_data, 1, function(row) {
  if (as.numeric(row["BloodPressure"]) > 140) {
    "HIGH"
  } else if (as.numeric(row["BloodPressure"]) < 90) {
    "LOW"
  } else {
    "NORMAL"
  }
})

Lab 2B

Basic plotting in R

Plot multiple panels in one plot

par(mfrow = c(3, 1))
layout(matrix(c(1, 1, 2, 3), nrow = 2, byrow = TRUE))

Histogram of Blood Pressure

hist(bp_data$BloodPressure, main="Blood Pressure Distribution",
     xlab="Blood Pressure", col="lightblue")

Boxplot of BP by Gender

boxplot(BloodPressure ~ Gender, data=bp_data,
        main="BP by Gender", xlab="Gender", ylab="Blood Pressure")

Scatterplot Age vs BP

plot(bp_data$Age, bp_data$BloodPressure,
     main="Age vs Blood Pressure", xlab="Age", ylab="BP")

ggplot2

# example syntax 
#ggplot(data, aes(x, y)) + geom_*()
library(ggplot2)
# Scatterplot: Age vs Blood Pressure
ggplot(bp_data, aes(x=Age, y=BloodPressure)) +
  geom_point()

boxplot

ggplot(bp_data, aes(x=Group, y=BloodPressure)) +
  geom_boxplot()

Customizing ggplot2

ggplot(bp_data, aes(x = Age, y = BloodPressure)) +
  geom_point(color = "blue") +
  labs(
    title = "Age vs Blood Pressure",
    x = "Patient Age",
    y = "BP (mmHg)"
  ) +
  theme_minimal() 
# Save last plot as PNG
ggsave("Age_BP_Scatter.png", width=6, height=4)
# Save specific plot object
p <- ggplot(bp_data, aes(x=Age, y=BloodPressure)) +
  geom_point()
ggsave("data/scatter_plot.png", plot=p)

Lab 2B Tasks

ggplot hands on tasks

library(lubridate)
library(ggplot2)
library(patchwork)

Load data

bp_data <- read.csv2("data/BloodPressure_wDates.csv", sep = ",")

Get the year in a new column

bp_data$Year <- year(bp_data$Date)
  1. Bar plot: patient counts per group
# ggplot hands on tasks
ggplot(bp_data, aes(x=Group)) +
  geom_bar(fill="steelblue") +
  labs(title="Number of Patients per Group", x="Group", y="Count")
  1. Histogram: Age distribution
ggplot(bp_data, aes(x=as.numeric(Age)))  +
  geom_histogram(binwidth=5, fill="lightgreen", color="black") +
  labs(title="Age Distribution of Patients", x="Age", y="Frequency")
  1. Scatterplot: Age vs BloodPressure, colored by Group
ggplot(bp_data, aes(x=Age, y=BloodPressure, color=Group)) +
  geom_point() +
  labs(title="Age vs Blood Pressure by Group", 
       x="Age", y="Blood Pressure")

Bonus hands on

library(ggplot2)
library(patchwork)   # install.packages("patchwork") if needed
  1. Bar plot
p1 <- ggplot(bp_data, aes(x = Gender, y = BloodPressure, fill = Gender)) +
  stat_summary(fun = "mean", geom = "bar") +
  labs(title = "Average BP by Gender") +
  theme_minimal()
  1. Histogram
p2 <- ggplot(bp_data, aes(x=as.numeric(Age)))  +
  geom_histogram(binwidth=5, fill="lightgreen", color="black") +
  labs(title="Age Distribution of Patients", x="Age", y="Frequency")
  1. Boxplot
p3 <- ggplot(bp_data, aes(x = Group, y = BloodPressure, fill = Group)) +
  geom_boxplot() +
  labs(title = "BP by Group") +
  theme_minimal()

Combine plots into one figure

# horizontal layout
(p1 | p2 | p3) 

# vertical layout
(p1 / p2 / p3)  

# 2x2 grid
(p1 | p2) / p3