Question: How many samples are wild-type versus mutant with respect to the most significantly enriched gene?
I'm confused by the question. Am I asked to obtain the total number of mutations (vs WT) across all the samples/patients? Or am I asked to find the number of mutants out of the 50 patients?
Background: I performed clinical enrichment on a group of 50 tumor patients, half of which respond to drugs respondent
.
fpath <- list.files(path="./mafs", full.names=TRUE)
c <- read.table(file="sample-information.tsv", sep="\t", header=T)
d <- merge_mafs(fpath, clinicalData=c)
# Clinical enrichment
response.ce = clinicalEnrichment(maf=d, clinicalFeature="Response")
# Significant associations p-value < 0.05
res.sig <- response.ce$groupwise_comparision[p_value < 0.05]
write.csv(res.sig, "enriched_sig.csv", row.names=F)
If I look for the total number of mutations (vs WT) across all the samples, this is my code and output:
# Number of samples in wild-type versus mutant with respect to the most significantly enriched genes
total <- sum(as.numeric(res.sig %>%
filter(Group1=="Responder") %>%
mutate(last_letter = substr(n_mutated_group1,nchar(n_mutated_group1)-1,nchar(n_mutated_group1))) %>%
pull(last_letter)))
mut <- sum(as.numeric(res.sig %>%
filter(Group1=="Responder") %>%
mutate(first_letter = substr(n_mutated_group1,1,1)) %>%
pull(first_letter)))
wt <- total-mut
# 129 wt and 46 mutant
However, if I am asked to find the number of mutants out of the 50 patients, how should I write the code?
dput(c)
structure(list(Patient_ID = c("Patient-0", "Patient-1", "Patient-2",
"Patient-3", "Patient-4", "Patient-5", "Patient-6", "Patient-7",
"Patient-8", "Patient-9", "Patient-10", "Patient-11", "Patient-12",
"Patient-13", "Patient-14", "Patient-15", "Patient-16", "Patient-17",
"Patient-18", "Patient-19", "Patient-20", "Patient-21", "Patient-22",
"Patient-23", "Patient-24", "Patient-25", "Patient-26", "Patient-27",
"Patient-28", "Patient-29", "Patient-30", "Patient-31", "Patient-32",
"Patient-33", "Patient-34", "Patient-35", "Patient-36", "Patient-37",
"Patient-38", "Patient-39", "Patient-40", "Patient-41", "Patient-42",
"Patient-43", "Patient-44", "Patient-45", "Patient-46", "Patient-47",
"Patient-48", "Patient-49"), Tumor_Sample_Barcode = c("Patient-0-Tumor",
"Patient-1-Tumor", "Patient-2-Tumor", "Patient-3-Tumor", "Patient-4-Tumor",
"Patient-5-Tumor", "Patient-6-Tumor", "Patient-7-Tumor", "Patient-8-Tumor",
"Patient-9-Tumor", "Patient-10-Tumor", "Patient-11-Tumor", "Patient-12-Tumor",
"Patient-13-Tumor", "Patient-14-Tumor", "Patient-15-Tumor", "Patient-16-Tumor",
"Patient-17-Tumor", "Patient-18-Tumor", "Patient-19-Tumor", "Patient-20-Tumor",
"Patient-21-Tumor", "Patient-22-Tumor", "Patient-23-Tumor", "Patient-24-Tumor",
"Patient-25-Tumor", "Patient-26-Tumor", "Patient-27-Tumor", "Patient-28-Tumor",
"Patient-29-Tumor", "Patient-30-Tumor", "Patient-31-Tumor", "Patient-32-Tumor",
"Patient-33-Tumor", "Patient-34-Tumor", "Patient-35-Tumor", "Patient-36-Tumor",
"Patient-37-Tumor", "Patient-38-Tumor", "Patient-39-Tumor", "Patient-40-Tumor",
"Patient-41-Tumor", "Patient-42-Tumor", "Patient-43-Tumor", "Patient-44-Tumor",
"Patient-45-Tumor", "Patient-46-Tumor", "Patient-47-Tumor", "Patient-48-Tumor",
"Patient-49-Tumor"), Matched_Norm_Sample_Barcode = c("Patient-0-Normal",
"Patient-1-Normal", "Patient-2-Normal", "Patient-3-Normal", "Patient-4-Normal",
"Patient-5-Normal", "Patient-6-Normal", "Patient-7-Normal", "Patient-8-Normal",
"Patient-9-Normal", "Patient-10-Normal", "Patient-11-Normal",
"Patient-12-Normal", "Patient-13-Normal", "Patient-14-Normal",
"Patient-15-Normal", "Patient-16-Normal", "Patient-17-Normal",
"Patient-18-Normal", "Patient-19-Normal", "Patient-20-Normal",
"Patient-21-Normal", "Patient-22-Normal", "Patient-23-Normal",
"Patient-24-Normal", "Patient-25-Normal", "Patient-26-Normal",
"Patient-27-Normal", "Patient-28-Normal", "Patient-29-Normal",
"Patient-30-Normal", "Patient-31-Normal", "Patient-32-Normal",
"Patient-33-Normal", "Patient-34-Normal", "Patient-35-Normal",
"Patient-36-Normal", "Patient-37-Normal", "Patient-38-Normal",
"Patient-39-Normal", "Patient-40-Normal", "Patient-41-Normal",
"Patient-42-Normal", "Patient-43-Normal", "Patient-44-Normal",
"Patient-45-Normal", "Patient-46-Normal", "Patient-47-Normal",
"Patient-48-Normal", "Patient-49-Normal"), Response = c("Non-Responder",
"Responder", "Responder", "Non-Responder", "Responder", "Non-Responder",
"Non-Responder", "Responder", "Responder", "Non-Responder", "Non-Responder",
"Responder", "Responder", "Responder", "Responder", "Non-Responder",
"Non-Responder", "Responder", "Non-Responder", "Responder", "Non-Responder",
"Non-Responder", "Non-Responder", "Non-Responder", "Responder",
"Non-Responder", "Non-Responder", "Responder", "Non-Responder",
"Responder", "Responder", "Responder", "Non-Responder", "Responder",
"Responder", "Non-Responder", "Responder", "Responder", "Responder",
"Non-Responder", "Responder", "Non-Responder", "Responder", "Non-Responder",
"Responder", "Non-Responder", "Non-Responder", "Non-Responder",
"Non-Responder", "Responder"), Silent_mutations_per_Mb = c(2.87,
1.92, 1.32, 1.78, 4.93, 3.01, 3.07, 3.67, 1, 1.61, 0.03, 3.73,
2.44, 0.6, 1.03, 1.29, 1, 7.35, 1.53, 1.64, 0.64, 0.75, 2.99,
0.6, 10.6, 0.69, 0.57, 3.73, 1.22, 5.89, 5.22, 2.55, 0.6, 0.44,
2.87, 0.17, 2.47, 9.1, 0.96, 1.23, 0.98, 1.21, 3.67, 1.06, 4.19,
1.61, 0.66, 0.8, 0.55, 2.04), Nonsynonymous_mutations_per_Mb = c(6.77,
6.14, 2.84, 5, 10.5, 8.15, 6.69, 10.3, 2.64, 4.48, 0.12, 9.16,
6.98, 2.1, 3.79, 3.1, 4.39, 19.3, 4.14, 4.25, 1.83, 1.84, 7.44,
2.61, 20.8, 3.34, 2.1, 9.39, 2.87, 14.1, 11.8, 7.18, 2.76, 1.69,
8.47, 0.89, 6, 19.6, 3.34, 4.05, 3.53, 3.88, 11.1, 3.33, 11.3,
4.19, 2.84, 2.33, 2.27, 5.6), Mutations_per_Mb = c(9.64, 8.06,
4.16, 6.78, 15.43, 11.16, 9.76, 13.97, 3.64, 6.09, 0.1525, 12.89,
9.42, 2.703, 4.82, 4.39, 5.39, 26.65, 5.67, 5.89, 2.471, 2.586,
10.43, 3.205, 31.4, 4.027, 2.674, 13.12, 4.09, 19.99, 17.02,
9.73, 3.355, 2.133, 11.34, 1.053, 8.47, 28.7, 4.302, 5.28, 4.507,
5.09, 14.77, 4.39, 15.49, 5.8, 3.5, 3.134, 2.815, 7.64)), class = "data.frame", row.names = c(NA,
-50L))
dput(res.sig)
structure(list(Hugo_Symbol = c("ERCC2", "ERCC2", "AKAP9", "AKAP9",
"HERC1", "HERC1", "HECTD1", "HECTD1", "MACF1", "MACF1", "MROH2B",
"MROH2B", "KMT2C", "KMT2C"), Group1 = c("Non-Responder", "Responder",
"Non-Responder", "Responder", "Non-Responder", "Responder", "Non-Responder",
"Responder", "Non-Responder", "Responder", "Non-Responder", "Responder",
"Non-Responder", "Responder"), Group2 = c("Rest", "Rest", "Rest",
"Rest", "Rest", "Rest", "Rest", "Rest", "Rest", "Rest", "Rest",
"Rest", "Rest", "Rest"), n_mutated_group1 = c("0 of 25", "9 of 25",
"0 of 25", "6 of 25", "0 of 25", "6 of 25", "0 of 25", "6 of 25",
"0 of 25", "6 of 25", "0 of 25", "6 of 25", "1 of 25", "7 of 25"
), n_mutated_group2 = c("9 of 25", "0 of 25", "6 of 25", "0 of 25",
"6 of 25", "0 of 25", "6 of 25", "0 of 25", "6 of 25", "0 of 25",
"6 of 25", "0 of 25", "7 of 25", "1 of 25"), p_value = c(0.00163083541184905,
0.00163083541184905, 0.022289766970618, 0.022289766970618, 0.022289766970618,
0.022289766970618, 0.022289766970618, 0.022289766970618, 0.022289766970618,
0.022289766970618, 0.022289766970618, 0.022289766970618, 0.0487971536957187,
0.0487971536957187), OR = c(0, Inf, 0, Inf, 0, Inf, 0, Inf, 0,
Inf, 0, Inf, 0.111488645279478, 8.96952328636894), OR_low = c(0,
2.56647319276964, 0, 1.33358819424024, 0, 1.33358819424024, 0,
1.33358819424024, 0, 1.33358819424024, 0, 1.33358819424024, 0.00228988507629356,
1.0079479819766), OR_high = c(0.38963976043749, Inf, 0.749856668137133,
Inf, 0.749856668137133, Inf, 0.749856668137133, Inf, 0.749856668137133,
Inf, 0.749856668137133, Inf, 0.992114690322592, 436.703138665198
), fdr = c(0.109265972593886, 0.109265972593886, 0.248902397838568,
0.248902397838568, 0.248902397838568, 0.248902397838568, 0.248902397838568,
0.248902397838568, 0.248902397838568, 0.248902397838568, 0.248902397838568,
0.248902397838568, 0.467058471087594, 0.467058471087594)), row.names = c(NA,
-14L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x000002adab171ef0>)