Hello everyone, Purpose of the analysis: In there a difference between TOV21G-PacR and TOV21G-cont, and between the PacR and cont? My question is if this analysis makes sense for you? I am analyzing this particular gene set (GSE172016). Within the data there are two cell lines ("OVCAR3" and "TOV21G") and two conditions (Not-treated as "cont" and treated as "PacR").
# Imporing the data
library(tidyverse)
dt <- readr::read_delim(file = "data/GSE172016_annotated.txt",
col_names = T, delim = "\t")
dt <- dt %>% mutate(across(everything(), .fns = ~replace_na(.,0)))
conti <- as.matrix(dt[,c(2:13)])
mode(conti) <- "integer"
row.names(conti) <- dt$gene_id
Now I build the metadata table:
# building metadata
metadata <- colnames(conti)
metadata <- data.frame(names = metadata,
cell_line = factor(sapply(strsplit(metadata, "-"), "[", 1)),
treatment = factor(substring(metadata, 8, last = 11)),
replica = factor(sapply(strsplit(metadata, "_"), "[", 2))
)
rownames(metadata) <- metadata$names
metadata$names <- NULL
cell_line treatment replica OVCAR3-control_replicate2 OVCAR3 cont replicate2 OVCAR3-control_replicate3 OVCAR3 cont replicate3 OVCAR3-control_replicate1 OVCAR3 cont replicate1 OVCAR3-PacR_replicate1 OVCAR3 PacR replicate1 OVCAR3-PacR_replicate2 OVCAR3 PacR replicate2 OVCAR3-PacR_replicate3 OVCAR3 PacR replicate3 TOV21G-control_replicate1 TOV21G cont replicate1 TOV21G-control_replicate2 TOV21G cont replicate2 TOV21G-control_replicate3 TOV21G cont replicate3 TOV21G-PacR_replicate1 TOV21G PacR replicate1 TOV21G-PacR_replicate2 TOV21G PacR replicate2 TOV21G-PacR_replicate3 TOV21G PacR replicate3
Adding the deseq object: Does the design below make sense in answering the following question: is there an effect of the condition used, the cell line used and additional effect from possible interactions between the cell_line and the condition that could influence the gene expression?
deseq <- DESeqDataSetFromMatrix(countData = conti,
colData = metadata,
design = ~1
)
design(deseq) <- ~1 + cell_line + treatment + cell_line : treatment
deseq <- DESeq(deseq)
colData(deseq)
resultsNames(deseq)
mod_mat <- model.matrix(design(deseq), colData(deseq))
This is the mod_mat:
(Intercept) cell_lineTOV21G treatmentPacR cell_lineTOV21G:treatmentPacR OVCAR3-control_replicate2 1 0 0 0 OVCAR3-control_replicate3 1 0 0 0 OVCAR3-control_replicate1 1 0 0 0 OVCAR3-PacR_replicate1 1 0 1 0 OVCAR3-PacR_replicate2 1 0 1 0 OVCAR3-PacR_replicate3 1 0 1 0 TOV21G-control_replicate1 1 1 0 0 TOV21G-control_replicate2 1 1 0 0 TOV21G-control_replicate3 1 1 0 0 TOV21G-PacR_replicate1 1 1 1 1 TOV21G-PacR_replicate2 1 1 1 1 TOV21G-PacR_replicate3 1 1 1 1 attr(,"assign") [1] 0 1 2 3 attr(,"contrasts") attr(,"contrasts")$cell_line [1] "contr.treatment"
attr(,"contrasts")$treatment [1] "contr.treatment"
Then I "personalize" my contrasts
OVCAR3_control <- colMeans(mod_mat[deseq$cell_line == "OVCAR3" & deseq$treatment == "cont", ])
OVCAR3_test <- colMeans(mod_mat[deseq$cell_line == "OVCAR3" & deseq$treatment == "PacR", ])
TOV21G_control <- colMeans(mod_mat[deseq$cell_line == "TOV21G" & deseq$treatment == "cont", ])
TOV21G_test <- colMeans(mod_mat[deseq$cell_line == "TOV21G" & deseq$treatment == "PacR", ])
Finally I get the results:
res1 <- results(deseq,
contrast = OVCAR3_test - OVCAR3_control,
alpha = 0.05,
pAdjustMethod = "BH")
res2 <- results(deseq,
contrast = TOV21G_test - TOV21G_control,
alpha = 0.05,
pAdjustMethod = "BH")
res3 <- results(deseq,
contrast = (OVCAR3_control - TOV21G_control) - (TOV21G_control - TOV21G_test),
alpha = 0.05,
pAdjustMethod = "BH")
I would be grateful if anyone could help me understand well the contrast part, whether it makes sense or if you would do it differently. Moreover, what would you add to this analysis? Would you add any confirmation(s) that the data makes sense?