How many samples are wild-type versus mutant with respect to the most significantly enriched gene?
0
0
Entering edit mode
2.8 years ago

Question: How many samples are wild-type versus mutant with respect to the most significantly enriched gene?

I'm confused by the question. Am I asked to obtain the total number of mutations (vs WT) across all the samples/patients? Or am I asked to find the number of mutants out of the 50 patients?

Background: I performed clinical enrichment on a group of 50 tumor patients, half of which respond to drugs respondent.

fpath <- list.files(path="./mafs", full.names=TRUE)
c <- read.table(file="sample-information.tsv", sep="\t", header=T)  
d <- merge_mafs(fpath, clinicalData=c)


# Clinical enrichment
response.ce = clinicalEnrichment(maf=d, clinicalFeature="Response")

# Significant associations p-value < 0.05
res.sig <- response.ce$groupwise_comparision[p_value < 0.05]
write.csv(res.sig, "enriched_sig.csv", row.names=F)

If I look for the total number of mutations (vs WT) across all the samples, this is my code and output:

# Number of samples in wild-type versus mutant with respect to the most significantly enriched genes 
total <- sum(as.numeric(res.sig %>%
                       filter(Group1=="Responder") %>%
                       mutate(last_letter = substr(n_mutated_group1,nchar(n_mutated_group1)-1,nchar(n_mutated_group1))) %>%
                       pull(last_letter)))

mut <- sum(as.numeric(res.sig %>%
                        filter(Group1=="Responder") %>%
                        mutate(first_letter = substr(n_mutated_group1,1,1)) %>%
                        pull(first_letter)))

wt <- total-mut
# 129 wt and 46 mutant

However, if I am asked to find the number of mutants out of the 50 patients, how should I write the code?

dput(c)
structure(list(Patient_ID = c("Patient-0", "Patient-1", "Patient-2", 
"Patient-3", "Patient-4", "Patient-5", "Patient-6", "Patient-7", 
"Patient-8", "Patient-9", "Patient-10", "Patient-11", "Patient-12", 
"Patient-13", "Patient-14", "Patient-15", "Patient-16", "Patient-17", 
"Patient-18", "Patient-19", "Patient-20", "Patient-21", "Patient-22", 
"Patient-23", "Patient-24", "Patient-25", "Patient-26", "Patient-27", 
"Patient-28", "Patient-29", "Patient-30", "Patient-31", "Patient-32", 
"Patient-33", "Patient-34", "Patient-35", "Patient-36", "Patient-37", 
"Patient-38", "Patient-39", "Patient-40", "Patient-41", "Patient-42", 
"Patient-43", "Patient-44", "Patient-45", "Patient-46", "Patient-47", 
"Patient-48", "Patient-49"), Tumor_Sample_Barcode = c("Patient-0-Tumor", 
"Patient-1-Tumor", "Patient-2-Tumor", "Patient-3-Tumor", "Patient-4-Tumor", 
"Patient-5-Tumor", "Patient-6-Tumor", "Patient-7-Tumor", "Patient-8-Tumor", 
"Patient-9-Tumor", "Patient-10-Tumor", "Patient-11-Tumor", "Patient-12-Tumor", 
"Patient-13-Tumor", "Patient-14-Tumor", "Patient-15-Tumor", "Patient-16-Tumor", 
"Patient-17-Tumor", "Patient-18-Tumor", "Patient-19-Tumor", "Patient-20-Tumor", 
"Patient-21-Tumor", "Patient-22-Tumor", "Patient-23-Tumor", "Patient-24-Tumor", 
"Patient-25-Tumor", "Patient-26-Tumor", "Patient-27-Tumor", "Patient-28-Tumor", 
"Patient-29-Tumor", "Patient-30-Tumor", "Patient-31-Tumor", "Patient-32-Tumor", 
"Patient-33-Tumor", "Patient-34-Tumor", "Patient-35-Tumor", "Patient-36-Tumor", 
"Patient-37-Tumor", "Patient-38-Tumor", "Patient-39-Tumor", "Patient-40-Tumor", 
"Patient-41-Tumor", "Patient-42-Tumor", "Patient-43-Tumor", "Patient-44-Tumor", 
"Patient-45-Tumor", "Patient-46-Tumor", "Patient-47-Tumor", "Patient-48-Tumor", 
"Patient-49-Tumor"), Matched_Norm_Sample_Barcode = c("Patient-0-Normal", 
"Patient-1-Normal", "Patient-2-Normal", "Patient-3-Normal", "Patient-4-Normal", 
"Patient-5-Normal", "Patient-6-Normal", "Patient-7-Normal", "Patient-8-Normal", 
"Patient-9-Normal", "Patient-10-Normal", "Patient-11-Normal", 
"Patient-12-Normal", "Patient-13-Normal", "Patient-14-Normal", 
"Patient-15-Normal", "Patient-16-Normal", "Patient-17-Normal", 
"Patient-18-Normal", "Patient-19-Normal", "Patient-20-Normal", 
"Patient-21-Normal", "Patient-22-Normal", "Patient-23-Normal", 
"Patient-24-Normal", "Patient-25-Normal", "Patient-26-Normal", 
"Patient-27-Normal", "Patient-28-Normal", "Patient-29-Normal", 
"Patient-30-Normal", "Patient-31-Normal", "Patient-32-Normal", 
"Patient-33-Normal", "Patient-34-Normal", "Patient-35-Normal", 
"Patient-36-Normal", "Patient-37-Normal", "Patient-38-Normal", 
"Patient-39-Normal", "Patient-40-Normal", "Patient-41-Normal", 
"Patient-42-Normal", "Patient-43-Normal", "Patient-44-Normal", 
"Patient-45-Normal", "Patient-46-Normal", "Patient-47-Normal", 
"Patient-48-Normal", "Patient-49-Normal"), Response = c("Non-Responder", 
"Responder", "Responder", "Non-Responder", "Responder", "Non-Responder", 
"Non-Responder", "Responder", "Responder", "Non-Responder", "Non-Responder", 
"Responder", "Responder", "Responder", "Responder", "Non-Responder", 
"Non-Responder", "Responder", "Non-Responder", "Responder", "Non-Responder", 
"Non-Responder", "Non-Responder", "Non-Responder", "Responder", 
"Non-Responder", "Non-Responder", "Responder", "Non-Responder", 
"Responder", "Responder", "Responder", "Non-Responder", "Responder", 
"Responder", "Non-Responder", "Responder", "Responder", "Responder", 
"Non-Responder", "Responder", "Non-Responder", "Responder", "Non-Responder", 
"Responder", "Non-Responder", "Non-Responder", "Non-Responder", 
"Non-Responder", "Responder"), Silent_mutations_per_Mb = c(2.87, 
1.92, 1.32, 1.78, 4.93, 3.01, 3.07, 3.67, 1, 1.61, 0.03, 3.73, 
2.44, 0.6, 1.03, 1.29, 1, 7.35, 1.53, 1.64, 0.64, 0.75, 2.99, 
0.6, 10.6, 0.69, 0.57, 3.73, 1.22, 5.89, 5.22, 2.55, 0.6, 0.44, 
2.87, 0.17, 2.47, 9.1, 0.96, 1.23, 0.98, 1.21, 3.67, 1.06, 4.19, 
1.61, 0.66, 0.8, 0.55, 2.04), Nonsynonymous_mutations_per_Mb = c(6.77, 
6.14, 2.84, 5, 10.5, 8.15, 6.69, 10.3, 2.64, 4.48, 0.12, 9.16, 
6.98, 2.1, 3.79, 3.1, 4.39, 19.3, 4.14, 4.25, 1.83, 1.84, 7.44, 
2.61, 20.8, 3.34, 2.1, 9.39, 2.87, 14.1, 11.8, 7.18, 2.76, 1.69, 
8.47, 0.89, 6, 19.6, 3.34, 4.05, 3.53, 3.88, 11.1, 3.33, 11.3, 
4.19, 2.84, 2.33, 2.27, 5.6), Mutations_per_Mb = c(9.64, 8.06, 
4.16, 6.78, 15.43, 11.16, 9.76, 13.97, 3.64, 6.09, 0.1525, 12.89, 
9.42, 2.703, 4.82, 4.39, 5.39, 26.65, 5.67, 5.89, 2.471, 2.586, 
10.43, 3.205, 31.4, 4.027, 2.674, 13.12, 4.09, 19.99, 17.02, 
9.73, 3.355, 2.133, 11.34, 1.053, 8.47, 28.7, 4.302, 5.28, 4.507, 
5.09, 14.77, 4.39, 15.49, 5.8, 3.5, 3.134, 2.815, 7.64)), class = "data.frame", row.names = c(NA, 
-50L))

dput(res.sig)
structure(list(Hugo_Symbol = c("ERCC2", "ERCC2", "AKAP9", "AKAP9", 
"HERC1", "HERC1", "HECTD1", "HECTD1", "MACF1", "MACF1", "MROH2B", 
"MROH2B", "KMT2C", "KMT2C"), Group1 = c("Non-Responder", "Responder", 
"Non-Responder", "Responder", "Non-Responder", "Responder", "Non-Responder", 
"Responder", "Non-Responder", "Responder", "Non-Responder", "Responder", 
"Non-Responder", "Responder"), Group2 = c("Rest", "Rest", "Rest", 
"Rest", "Rest", "Rest", "Rest", "Rest", "Rest", "Rest", "Rest", 
"Rest", "Rest", "Rest"), n_mutated_group1 = c("0 of 25", "9 of 25", 
"0 of 25", "6 of 25", "0 of 25", "6 of 25", "0 of 25", "6 of 25", 
"0 of 25", "6 of 25", "0 of 25", "6 of 25", "1 of 25", "7 of 25"
), n_mutated_group2 = c("9 of 25", "0 of 25", "6 of 25", "0 of 25", 
"6 of 25", "0 of 25", "6 of 25", "0 of 25", "6 of 25", "0 of 25", 
"6 of 25", "0 of 25", "7 of 25", "1 of 25"), p_value = c(0.00163083541184905, 
0.00163083541184905, 0.022289766970618, 0.022289766970618, 0.022289766970618, 
0.022289766970618, 0.022289766970618, 0.022289766970618, 0.022289766970618, 
0.022289766970618, 0.022289766970618, 0.022289766970618, 0.0487971536957187, 
0.0487971536957187), OR = c(0, Inf, 0, Inf, 0, Inf, 0, Inf, 0, 
Inf, 0, Inf, 0.111488645279478, 8.96952328636894), OR_low = c(0, 
2.56647319276964, 0, 1.33358819424024, 0, 1.33358819424024, 0, 
1.33358819424024, 0, 1.33358819424024, 0, 1.33358819424024, 0.00228988507629356, 
1.0079479819766), OR_high = c(0.38963976043749, Inf, 0.749856668137133, 
Inf, 0.749856668137133, Inf, 0.749856668137133, Inf, 0.749856668137133, 
Inf, 0.749856668137133, Inf, 0.992114690322592, 436.703138665198
), fdr = c(0.109265972593886, 0.109265972593886, 0.248902397838568, 
0.248902397838568, 0.248902397838568, 0.248902397838568, 0.248902397838568, 
0.248902397838568, 0.248902397838568, 0.248902397838568, 0.248902397838568, 
0.248902397838568, 0.467058471087594, 0.467058471087594)), row.names = c(NA, 
-14L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x000002adab171ef0>)
maftools mutation r • 560 views
ADD COMMENT

Login before adding your answer.

Traffic: 1981 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6