Entering edit mode
3.9 years ago
kamalikaray1792
▴
20
Hi, I want to perform pairwise differential analysis for BRCA tumor subtype Luminal A and normal sample. I have used the following code to download and process the data from TCGA using TCGAbiolinks package in R. How should prepare this data to run paired differential analysis in Deseq2/edgeR.
library(TCGAbiolinks)
query.BRCA.tumor<- GDCquery(project = "TCGA-BRCA", data.category = "Transcriptome Profiling", data.type = "Gene", Expression Quantification", workflow.type = "HTSeq - Counts", experimental.strategy = "RNA-Seq", sample.type = "Primary Tumor")
GDCdownload(query.BRCA.tumor)
prep.BRCA.tumor <- GDCprepare(query = query.BRCA.tumor, summarizedExperiment = TRUE)
query.BRCA.normal <- GDCquery(project = "TCGA-BRCA", data.category = "Transcriptome Profiling", data.type = "Gene Expression Quantification", workflow.type = "HTSeq - Counts", experimental.strategy = "RNA-Seq", sample.type = "Solid Tissue Normal")
GDCdownload(query.BRCA.normal)
prep.BRCA.normal <- GDCprepare(query = query.BRCA.normal, summarizedExperiment = TRUE)
dataSubt_BRCA <-TCGAquery_subtype(tumor = "BRCA")
samplePam50_BRCA.LumA <- dataSubt_BRCA[dataSubt_BRCA$BRCA_Subtype_PAM50 %in% "LumA",]
samples.normal = subset(prep.BRCA.normal, select = colData(prep.BRCA.normal)$patient %in%
samplePam50_BRCA.LumA$patient)
samples.tumor <- subset(prep.BRCA.tumor, select = colData(prep.BRCA.tumor)$patient %in%
colData(samples.normal)$patient)
Matched.Samples.Normal <- subset(samples.normal,
select = colData(samples.normal)$patient
%in% colData(samples.tumor)$patient)
Matched.Samples.LumA<- subset(prep.BRCA.tumor,
select = colData(samples.tumor)$patient
%in% colData(samples.normal)$patient)
pre_Matched.normal <- TCGAanalyze_Preprocessing(object = Matched.Samples.Normal, cor.cut = 0.6, datatype = "HTSeq - Counts")
pre_Matched.LumA <- TCGAanalyze_Preprocessing(object = Matched.Samples.LumA, cor.cut = 0.6, datatype = "HTSeq - Counts")
matched_data.norm <- TCGAanalyze_Normalization(tabDF = cbind(pre_Matched.normal, pre_Matched.LumA), geneInfo = geneInfoHT, method = "gcContent")
matched_data.norm <- TCGAanalyze_Normalization(tabDF = matched_data.norm, geneInfo = geneInfoHT, method = "geneLength")
matched_data_filt <- TCGAanalyze_Filtering(tabDF = matched_data.norm, method = "quantile", qnt.cut = 0.25)
samplesNT <- TCGAquery_SampleTypes(barcode = colnames(matched_data_filt),
typesample = c("NT"))
MatchedNTdataFilt <- AllMatcheddataFilt[,samplesMatchedNT]
MatchedNTdataFilt <- MatchedNTdataFilt[, order(colnames(MatchedNTdataFilt))]
samplesTP <- TCGAquery_SampleTypes(barcode = colnames(matched_data_filt),
typesample = c("TP"))
matched_NT_filt <- matched_data_filt[,samplesNT]
matched_LumA_filt <- matched_data_filt[, samplesTP]
You can use xena browser to download the row_counts which can be used for input for DESeq2
I already have the above data from TCGA. I would like to prepare this for paired analysis in DESeq2.
you can find it here Make sure the that your data are row counts