How can I calculate the OS of each patient?
0
0
Entering edit mode
7 months ago
Pedro • 0

Hello!

I need to calculate the OS of two types of cancer: CESC and HNSC. I will search the relation between the OS and gene expression.

But I didn't find the OS column on TCGA data. Can anyone help me ?

overall-survival • 474 views
ADD COMMENT
0
Entering edit mode

Please be specific. Which TCGA subset, what did you try? Show code.

ADD REPLY
0
Entering edit mode

I have already tried this code. listSamples with HNSC cancer and smokers. I need at least one supporting material to find out the survival of each of the HNSC and CESC cancer individuals

library(DESeq2) library(TCGAbiolinks) library(survminer) library(survival) library(SummarizedExperiment) library(tidyverse)

listSamples <- c("TCGA-BA-5152", "TCGA-CN-6013", "TCGA-CN-A49A", "TCGA-CQ-7069", "TCGA-P3-A5QF", "TCGA-P3-A6T6", "TCGA-QK-A6IH", "TCGA-CR-6472", "TCGA-DQ-7594", "TCGA-HD-8224", "TCGA-HD-8314", "TCGA-P3-A5QE", "TCGA-CQ-6220", "TCGA-CQ-7064", "TCGA-F7-A624", "TCGA-HD-A4C1", "TCGA-P3-A6T2", "TCGA-CQ-7068", "TCGA-CV-6953", "TCGA-CV-7407", "TCGA-CV-A463", "TCGA-KU-A66T", "TCGA-MT-A7BN", "TCGA-UF-A71E", "TCGA-UF-A7JO", "TCGA-UF-A7JT", "TCGA-BA-5558", "TCGA-QK-A64Z", "TCGA-CV-A6JM", "TCGA-UF-A7JV", "TCGA-CV-7247", "TCGA-CV-7437", "TCGA-D6-6826", "TCGA-D6-A6EQ", "TCGA-F7-A622", "TCGA-TN-A7HJ", "TCGA-BB-A5HU", "TCGA-BB-A5HZ", "TCGA-CN-6018", "TCGA-CQ-7071", "TCGA-CQ-A4CD", "TCGA-CR-6484", "TCGA-CR-7380", "TCGA-CV-6942", "TCGA-CV-6955", "TCGA-CV-7252", "TCGA-CV-7416", "TCGA-CV-7425", "TCGA-CV-A45V", "TCGA-HD-A633", "TCGA-MT-A67F", "TCGA-P3-A6T3", "TCGA-RS-A6TO", "TCGA-BA-5557", "TCGA-BA-A6DB", "TCGA-BB-4224", "TCGA-BB-7863", "TCGA-BB-7872", "TCGA-C9-A47Z", "TCGA-C9-A480", "TCGA-CN-4725", "TCGA-CN-4733", "TCGA-CN-4737", "TCGA-CN-6996", "TCGA-CN-A640", "TCGA-CQ-5327", "TCGA-CQ-5329", "TCGA-CQ-6229", "TCGA-CQ-7065", "TCGA-CQ-A4CE", "TCGA-CQ-A4CH", "TCGA-CR-6488", "TCGA-CR-7372", "TCGA-CR-7382", "TCGA-CR-7393", "TCGA-CV-5973", "TCGA-CV-5979", "TCGA-CV-6003", "TCGA-CV-6939", "TCGA-CV-6959", "TCGA-CV-7104", "TCGA-CV-7238", "TCGA-CV-7243", "TCGA-CV-7255", "TCGA-CV-7438", "TCGA-CV-A45P", "TCGA-CV-A465", "TCGA-CV-A6JT", "TCGA-CV-A6K0", "TCGA-D6-6515", "TCGA-D6-A6EM", "TCGA-DQ-5624", "TCGA-HD-7831", "TCGA-HD-A6HZ", "TCGA-IQ-A61J", "TCGA-IQ-A61L", "TCGA-IQ-A6SG", "TCGA-MT-A67A", "TCGA-P3-A5QA", "TCGA-QK-A652", "TCGA-T2-A6WX", "TCGA-UP-A6WW", "TCGA-BA-5153", "TCGA-BA-5559", "TCGA-BB-4223", "TCGA-CN-A499", "TCGA-CN-A63Y", "TCGA-CN-A6V1", "TCGA-CN-A6V7", "TCGA-CR-5243", "TCGA-CR-5249", "TCGA-CR-6470", "TCGA-CR-6480", "TCGA-CR-6481", "TCGA-CR-7404", "TCGA-DQ-7596", "TCGA-MT-A67G", "TCGA-QK-A6IF", "TCGA-QK-A6V9" )

clinical_HNSC <- GDCquery_clinic("TCGA-HNSC") any(colnames(clinical_HNSC) %in% c("vital_status", "days_to_last_follow_up", "days_to_death")) which(colnames(clinical_HNSC) %in% c("vital_status", "days_to_last_follow_up", "days_to_death")) clinical_HNSC[,c(9,43,48)]

table (clinical_HNSC$vital_status)

clinical_HNSC$deceased <- ifelse(clinical_HNSC$vital_status =="Alive",FALSE,TRUE)

clinical_HNSC$overall_survival <- ifelse(clinical_HNSC$vital_status == "Alive", clinical_HNSC$days_to_last_follow_up, clinical_HNSC$days_to_death) write.csv(clinical_HNSC, "tabela.csv", row.names = FALSE)

query_HNSC_all <- GDCquery( project = "TCGA-HNSC", data.category = "Transcriptome Profiling", experimental.strategy = "RNA-Seq", workflow.type = "STAR - Counts", data.type = "Gene Expression Quantification", barcode = listSamples, sample.type = c("Primary Tumor","Solid Tissue Normal"), access="open" )

output_HNSC <- getResults(query_HNSC_all) tumor <- output_HNSC$cases

query_HNSC_all <- GDCquery( project = "TCGA-HNSC", data.category = "Transcriptome Profiling", experimental.strategy = "RNA-Seq", workflow.type = "STAR - Counts", data.type = "Gene Expression Quantification", sample.type = "Primary Tumor", access="open", barcode=tumor )

GDCdownload (query_HNSC_all) tcga_hnsc_data <- GDCprepare(query_HNSC_all,summarizedExperiment = TRUE) hnsc_matrix <- assay(tcga_hnsc_data, "unstranded") hnsc_matrix[1:10,1:10]

gene_metadata <- as.data.frame(rowData(tcga_hnsc_data)) coldata <- as.data.frame (colData(tcga_hnsc_data))

dds <- DESeqDataSetFromMatrix(countData=hnsc_matrix, colData = coldata, design = ~1)

keep <- rowSums(counts(dds)) >=10 dds <- dds[keep,]

vsd <- vst(dds,blind=FALSE) hnsc_matrix_vst <- assay (vsd)

hnsc_tp53 <- hnsc_matrix_vst %>% as.data.frame() %>% rownames_to_column(var = 'gene_id') %>% gather(key = 'case_id', value = 'counts', -gene_id) %>% left_join(., gene_metadata, by = "gene_id") %>% filter(gene_name == "TP53")

median_value <- median(hnsc_tp53$counts)

hnsc_tp53$strata <- ifelse(hnsc_tp53$counts >= median_value, "HIGH", "LOW")

hnsc_tp53$case_id <- gsub('-01.*', '', hnsc_tp53$case_id) hnsc_tp53 <- merge(hnsc_tp53, clinical_HNSC, by.x = 'case_id', by.y = 'submitter_id')

fit <- survfit(Surv(overall_survival, deceased) ~ strata, data = hnsc_tp53) fit ggsurvplot(fit, data = hnsc_tp53, pval = T, risk.table = T)

fit2 <- survdiff(Surv(overall_survival, deceased) ~ strata, data = hnsc_tp53)

ADD REPLY

Login before adding your answer.

Traffic: 1901 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6