library(tidyverse)
library(readxl)
<- read_excel("data/RealTimeData.xlsx") %>%
pcr rename(sample = `...1`)
RAdelaide 2024
July 9, 2024
tidyr
tidyr
dplyr
tidyverse
packageTidyR.R
rename
is another dplyr
function..1
is a bit weird
...
makes it a bit specialC/INF
probably mean control or infected cellssample
column
str_replace_all()
" *"
\(\implies\) match zero or more spaces# A tibble: 18 × 3
sample SLAM GAPDH
<chr> <dbl> <dbl>
1 C 24hr 23.9 17.7
2 C 24hr 23.5 17.4
3 C 24hr 23.7 17.8
4 INF 24hr 23.4 15.7
5 INF 24hr 23.3 15.4
6 INF 24hr 23.4 15.8
7 C 72hr 24.7 18.4
8 C 72hr 24.3 18.6
9 C 72hr 24.5 18.7
10 INF 72hr 23.4 15.9
11 INF 72hr 23.3 15.8
12 INF 72hr 23.2 15.4
13 C 120hr 24.4 18.5
14 C 120hr 24.7 18.2
15 C 120hr 25.1 18.3
16 INF 120hr 22.8 16.7
17 INF 120hr 22.8 17.0
18 INF 120hr 22.5 16.5
separate()
to split into two columns
# A tibble: 18 × 4
treatment timepoint SLAM GAPDH
<chr> <chr> <dbl> <dbl>
1 C 24hr 23.9 17.7
2 C 24hr 23.5 17.4
3 C 24hr 23.7 17.8
4 INF 24hr 23.4 15.7
5 INF 24hr 23.3 15.4
6 INF 24hr 23.4 15.8
7 C 72hr 24.7 18.4
8 C 72hr 24.3 18.6
9 C 72hr 24.5 18.7
10 INF 72hr 23.4 15.9
11 INF 72hr 23.3 15.8
12 INF 72hr 23.2 15.4
13 C 120hr 24.4 18.5
14 C 120hr 24.7 18.2
15 C 120hr 25.1 18.3
16 INF 120hr 22.8 16.7
17 INF 120hr 22.8 17.0
18 INF 120hr 22.5 16.5
factor()
.by
argument nests the data invisiblymutate()
pcr %>%
mutate(
sample = str_replace_all(sample, "^([CINF]+) *([0-9]+)[Hh]", "\\1 \\2hr")
) %>%
separate(sample, into = c("treatment", "timepoint")) %>%
mutate(
timepoint = factor(timepoint, levels = c("24hr", "72hr", "120hr")),
treatment = factor(treatment, levels = c("C", "INF"))
) %>%
mutate(
replicate = seq_along(SLAM), .by = c(treatment, timepoint)
)
I would usually do this when I load the data
pcr <- read_excel("data/RealTimeData.xlsx") %>%
rename(sample = `...1`) %>%
mutate(
sample = str_replace_all(sample, "^([CINF]+) *([0-9]+)[Hh]", "\\1 \\2hr")
) %>%
separate(sample, into = c("treatment", "timepoint")) %>%
mutate(
timepoint = factor(timepoint, levels = c("24hr", "72hr", "120hr")),
treatment = factor(treatment, levels = c("C", "INF"))
) %>%
mutate(
replicate = seq_along(SLAM), .by = c(treatment, timepoint)
)
separate()
is unite()
unite()
not to remove the original columnspcr %>%
unite(
col = "merged", all_of(c("treatment","timepoint", "replicate")),
sep = ":", remove = FALSE
) %>%
head()
# A tibble: 6 × 6
merged treatment timepoint SLAM GAPDH replicate
<chr> <fct> <fct> <dbl> <dbl> <int>
1 C:24hr:1 C 24hr 23.9 17.7 1
2 C:24hr:2 C 24hr 23.5 17.4 2
3 C:24hr:3 C 24hr 23.7 17.8 3
4 INF:24hr:1 INF 24hr 23.4 15.7 1
5 INF:24hr:2 INF 24hr 23.3 15.4 2
6 INF:24hr:3 INF 24hr 23.4 15.8 3
pivot_longer()
R
# A tibble: 36 × 5
treatment timepoint replicate gene Ct
<fct> <fct> <int> <chr> <dbl>
1 C 24hr 1 SLAM 23.9
2 C 24hr 1 GAPDH 17.7
3 C 24hr 2 SLAM 23.5
4 C 24hr 2 GAPDH 17.4
5 C 24hr 3 SLAM 23.7
6 C 24hr 3 GAPDH 17.8
7 INF 24hr 1 SLAM 23.4
8 INF 24hr 1 GAPDH 15.7
9 INF 24hr 2 SLAM 23.3
10 INF 24hr 2 GAPDH 15.4
# ℹ 26 more rows
pivot_wider()
pcr %>%
mutate(dCt = SLAM - GAPDH) %>%
pivot_wider(
names_from = "replicate", values_from = "dCt",
id_cols = starts_with("t"), names_prefix = "rep"
) %>%
arrange(treatment, timepoint)
# A tibble: 6 × 5
treatment timepoint rep1 rep2 rep3
<fct> <fct> <dbl> <dbl> <dbl>
1 C 24hr 6.17 6.04 5.92
2 C 72hr 6.35 5.74 5.83
3 C 120hr 5.88 6.46 6.79
4 INF 24hr 7.72 7.9 7.6
5 INF 72hr 7.5 7.55 7.79
6 INF 120hr 6.08 5.79 6.02
tidyverse
is under constant development
gather()
\(\implies\) pivot_longer()
spread
() \(\implies\) pivot_wider()
separate()
is now superseded
separate_wider_delim()