Entering edit mode
11 weeks ago
Pumla
•
0
Can you help me understand what is wrong with my codes I am doing a transcript analysis of my dataset as demomstarted but i keep getting stuck and error on Rstudio when i get to the last part of this script:
# Setting up environment
# Clean environment
rm(list = ls(all.names = TRUE)) # Clears all objects including hidden ones
gc() # Free up memory and report the memory usage
# Install and load required packages
if (!requireNamespace("BiocManager", quietly = TRUE)) {
install.packages("BiocManager")
}
BiocManager::install(c("GenomicFeatures", "tximport", "rhdf5", "txdbmaker"))
if (!requireNamespace("devtools", quietly = TRUE)) {
install.packages("devtools")
}
devtools::install_github("pachterlab/sleuth")
library(GenomicFeatures)
library(tximport)
library(rhdf5)
library(txdbmaker)
library(sleuth)
# Checking R version
print(R.version.string)
# Set working directory
setwd("~/New folder")
# Gene-Level Aggregation with tximport
# Paths to your abundance.tsv files for healthy and treated samples
healthy_files <- c("output_directory_48/abundance_48.tsv",
"output_directory_49/abundance_49.tsv",
"output_directory_50/abundance_50.tsv",
"output_directory_51/abundance_51.tsv",
"output_directory_52/abundance_52.tsv",
"output_directory_53/abundance_53.tsv",
"output_directory_54/abundance_54.tsv",
"output_directory_55/abundance_55.tsv",
"output_directory_56/abundance_56.tsv",
"output_directory_57/abundance_57.tsv",
"output_directory_58/abundance_58.tsv",
"output_directory_59/abundance_59.tsv",
"output_directory_60/abundance_60.tsv",
"output_directory_61/abundance_61.tsv",
"output_directory_62/abundance_62.tsv",
"output_directory_63/abundance_63.tsv",
"output_directory_64/abundance_64.tsv",
"output_directory_65/abundance_65.tsv",
"output_directory_66/abundance_66.tsv",
"output_directory_67/abundance_67.tsv",
"output_directory_68/abundance_68.tsv",
"output_directory_69/abundance_69.tsv",
"output_directory_70/abundance_70.tsv",
"output_directory_71/abundance_71.tsv")
treated_files <- c("output_directory_24/abundance_24.tsv",
"output_directory_25/abundance_25.tsv",
"output_directory_26/abundance_26.tsv",
"output_directory_27/abundance_27.tsv",
"output_directory_28/abundance_28.tsv",
"output_directory_29/abundance_29.tsv",
"output_directory_30/abundance_30.tsv",
"output_directory_31/abundance_31.tsv",
"output_directory_32/abundance_32.tsv",
"output_directory_33/abundance_33.tsv",
"output_directory_34/abundance_34.tsv",
"output_directory_35/abundance_35.tsv",
"output_directory_36/abundance_36.tsv",
"output_directory_37/abundance_37.tsv",
"output_directory_38/abundance_38.tsv",
"output_directory_39/abundance_39.tsv",
"output_directory_40/abundance_40.tsv",
"output_directory_41/abundance_41.tsv",
"output_directory_42/abundance_42.tsv",
"output_directory_43/abundance_43.tsv",
"output_directory_44/abundance_44.tsv",
"output_directory_45/abundance_45.tsv",
"output_directory_46/abundance_46.tsv",
"output_directory_47/abundance_47.tsv")
# Combine all file paths
all_files <- c(healthy_files, treated_files)
# Check if files exist
if (!all(file.exists(all_files))) {
stop("One or more files are missing. Please check file paths.")
}
# Load the transcript-to-gene mapping using a GTF file
gunzip("gencode.vM35.annotation.gtf.gz", remove = FALSE) # Unzip the annotation file
txdb <- txdbmaker::makeTxDbFromGFF("gencode.vM35.annotation.gtf")
tx2gene <- select(txdb, keys(txdb, keytype = "TXNAME"), "GENEID", "TXNAME")
# Clean transcript IDs in tx2gene
tx2gene$TXNAME <- sub("\\|.*", "", tx2gene$TXNAME)
colnames(tx2gene) <- c("target_id", "gene_id")
# Import Kallisto results and aggregate to gene-level
txi <- tximport(all_files, type = "kallisto", tx2gene = tx2gene)
# Prepare the sample-to-condition mapping
sample_id <- basename(all_files)
condition <- rep(c("healthy", "treated"), times = c(length(healthy_files), length(treated_files)))
# Create sample-to-condition mapping
s2c <- data.frame(sample = sample_id, condition = condition)
s2c$path <- all_files
This is the part where my scripts stops working
# Prepare Sleuth
so <- sleuth_prep(s2c, ~condition, target_mapping = tx2gene)
# Fit the model and perform differential expression analysis
so <- sleuth_fit(so)
so <- sleuth_wt(so, 'conditiontreated')
# View the results
results <- sleuth_results(so, 'conditiontreated', 'wt')
head(results)
I don't think anyone can help you until you tell us the error code.
Not sure what you're doing. You're using tximport (loading your data into txi) but are never using txi again. So you're basically running your differential gene expression on nothing. Also, why use tximport? Use the HDF5 files generated by kallisto (running kallisto with bootstraps) and just load those in sleuth. You want transcript-level analysis so why are you aggregating everything to the gene level?
So many things don't really make sense here...