This gets asked a lot. Should probably make a tutorial..
Switch TCGA-PRAD
to TCGA-BRCA
and don't blindly run the metadata section.
---
title: "TCGA_analysis"
author: "Barry"
date: "07/07/2020"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(root.dir = "~/Desktop/TCGA/")
```
```{R, messages = F}
library(knitr)
library(DESeq2)
library(IHW)
library(apeglm)
library(TCGAbiolinks)
```
## miRNA analysis
```{R}
mirna_query <- GDCquery(project = "TCGA-PRAD",
data.category = "Transcriptome Profiling",
data.type = "miRNA Expression Quantification",
#workflow.type = "BCGSC miRNA Profiling",
experimental.strategy = "miRNA-Seq")
GDCdownload(mirna_query, method = "api", files.per.chunk = 100,
directory = "~/Desktop/TCGA/miRNA/")
miR_df <- GDCprepare(mirna_query, directory = "~/Desktop/TCGA/miRNA/")
## remove columns we dont need, keep counts
rownames(miR_df) <- miR_df$miRNA_ID
miR_df <- miR_df[,-1]
number_cols <- ncol(miR_df)
subset <- seq(from = 1, to = number_cols, by = 3)
miR_df <- miR_df[, subset]
## Strip read_count, just want the 'cases' ID
colnames(miR_df) <- gsub(".*_","",colnames(miR_df))
## Match to metadata
miR_meta <- mirna_query[[1]][[1]]
miR_meta <- miR_meta[,c("cases", "tissue.definition")]
rownames(miR_meta) <- colnames(miR_df)
table(rownames(miR_meta) == miR_meta$cases)
## fix the levels that R thinks are there but are not
miR_meta$tissue.definition <- as.character(miR_meta$tissue.definition)
table(miR_meta$tissue.definition)
## Remove metastatic sample
metastatic_key <- miR_meta[which(miR_meta$tissue.definition == "Metastatic"),]
miR_meta <- miR_meta[!miR_meta$tissue.definition == metastatic_key$tissue.definition,]
miR_df <- miR_df[, -grep(paste0(metastatic_key$cases), colnames(miR_df))]
## Rename conditions
miR_meta$tissue.definition <- gsub("Primary solid Tumor", "Tumor", miR_meta$tissue.definition)
miR_meta$tissue.definition <- gsub("Solid Tissue Normal", "Normal", miR_meta$tissue.definition)
miR_meta$tissue.definition <- as.factor(miR_meta$tissue.definition)
levels(miR_meta$tissue.definition)
colnames(miR_meta) <- c("cases", "Condition")
## tidy vars
rm(mirna_query)
rm(subset)
rm(number_cols)
rm(metastatic_key)
## DESeq2 Analysis
miR_dds <- DESeqDataSetFromMatrix(miR_df, colData = miR_meta, design = ~ Condition)
miR_dds$Condition <- relevel(miR_dds$Condition, ref = "Normal")
miR_dds <- DESeq(miR_dds)
resultsNames(miR_dds)
## DESeq2 results
miR_res <- results(miR_dds, filterFun = ihw, alpha = 0.05, name = "Condition_Tumor_vs_Normal")
summary(miR_res)
miR_res_df <- as.data.frame(miR_res)
Thanks a lot for your reply. Can I use this for gene expression quantification data as well?
If you edit it accordingly, yes.
Thank you, that was very helpful.