Entering edit mode
2.5 years ago
melissachua90
▴
70
I want to annotate a CNV file derived from FireBrowse using Gaia, as inspired by Kevin Blighe .
As reported by several others, I'm encountering Error in start_index:end_index : argument of length 0
, even when I use "SNP6 GRCh38 Remapped Probeset File" (suggested here - https://support.bioconductor.org/p/111990/).
Does anyone else have a solution?
R code:
# CNV file
cnv <- read.table("cnv.txt", sep="\t", stringsAsFactors=FALSE, header=TRUE)
###########################
# Create a markers object #
###########################
library(gaia)
library(GenomicDataCommons)
# Retrieve probes meta file
temp <- "snp6.na35.remap.hg38.subset.txt.gz"
markersMatrix <- read.csv(gzfile(temp), sep = "\t",as.is = TRUE)
markersMatrix = markersMatrix[markersMatrix[,"freqcnv"]==FALSE,]
colnames(markersMatrix)[1:3] <- c("Probe.Name", "Chromosome", "Start")
# Change sex chr names
unique(markersMatrix$Chromosome)
markersMatrix[which(markersMatrix$Chromosome=="X"),"Chromosome"] <- 23
markersMatrix[which(markersMatrix$Chromosome=="Y"),"Chromosome"] <- 24
markersMatrix$Chromosome <- sapply(markersMatrix$Chromosome, as.integer)
# Create a marker ID
markerID <- apply(markersMatrix, 1, function(x) paste0(x[2], ":", x[3]))
#Remove duplicates
markersMatrix <- markersMatrix[-which(duplicated(markerID)),]
# Filter markersMatrix for common CNV
markerID <- paste(markersMatrix$Chromosome,markersMatrix$Start, sep =":")
# Create the markers object
markers_obj <- load_markers(markersMatrix)
############################################
# Determine recurrent aberrations in tumor #
############################################
# Prepare CNV matrix
cnvMatrix <- cnv[1:100000,]
#Add label (0 for loss, 1 for gain)
#A segment mean of 0.3 is defined as the cut-off
cnvMatrix <- cbind(cnvMatrix, Label=NA)
cnvMatrix[cnvMatrix$Segment_Mean < -0.3,"Label"] <- 0
cnvMatrix[cnvMatrix$Segment_Mean > 0.3,"Label"] <- 1
cnvMatrix <- cnvMatrix[!is.na(cnvMatrix$Label),]
#Remove segment mean as we now go by the binary classification of gain or loss
cnvMatrix <- cnvMatrix[,-6]
colnames(cnvMatrix) <- c("Sample.Name", "Chromosome", "Start", "End", "Num.of.Markers", "Aberration")
#Substitute Chromosomes "X" and "Y" with "23" and "24"
xidx <- which(cnvMatrix$Chromosome=="X")
yidx <- which(cnvMatrix$Chromosome=="Y")
cnvMatrix[xidx,"Chromosome"] <- 23
cnvMatrix[yidx,"Chromosome"] <- 24
cnvMatrix$Chromosome <- sapply(cnvMatrix$Chromosome, as.integer)
#Run GAIA, which looks for recurrent aberrations in your input file
n <- length(unique(cnvMatrix[,1]))
cnv_obj <- load_cnv(cnvMatrix, markers_obj, n)
results.all <- runGAIA(cnv_obj,markers_obj,output_file_name = 'Tumor.all.txt',aberrations = -1,chromosomes = -1,num_iterations = 10,threshold = 0.15)
##################################################
# plot recurrent sCNA gains and losses from GAIA #
##################################################
# Run GAIA
results <- runGAIA(cnv_obj, markers_obj, output_file_name="Tumor.asian.txt", aberrations=-1, chromosomes=-1, num_iterations=10, threshold=0.15)
#Set qvalue threshold for plotting and annotation
threshold <- 0.15
#Convert the recurrent aberrations to numeric (GAIA saves it as text)
RecCNV <- t(apply(results, 1, as.numeric))
colnames(RecCNV) <- colnames(results)
#Add a new column for 'score'
RecCNV <- cbind(RecCNV, score=0)
#Determine the minimum Q value that's not equal to 0
minval <- format(min(RecCNV[RecCNV[,"q-value"]!=0, "q-value"]), scientific=FALSE)
minval <- substring(minval,1, nchar(minval)-1)
#Replace Q values of 0 with the minimum, non-zero value
RecCNV[RecCNV[,"q-value"]==0, "q-value"] <- as.numeric(minval)
#Set the score to equal -log base 10 of the Q value
RecCNV[,"score"] <- sapply(RecCNV[,"q-value"], function(x) -log10(as.numeric(x)))
#Create a function for plotting the recurrent copy number variants
gaiaCNVplot <- function (calls, threshold=0.01, main="main") {
Calls <- calls[order(calls[,"Region Start [bp]"]),]
Calls <- Calls[order(Calls[,"Chromosome"]),]
rownames(Calls) <- NULL
Chromo <- Calls[,"Chromosome"]
Gains <- apply(Calls,1,function(x) ifelse(x["Aberration Kind"]==1, x["score"], 0))
Losses <- apply(Calls, 1,function(x) ifelse(x["Aberration Kind"]==0, x["score"], 0))
plot(Gains, ylim=c(-max(Calls [,"score"]+2), max(Calls[,"score"]+2)), type="h", col="red2", xlab="Chromosome", ylab=expression("-log"[10]~italic(Q)~"value"), main=main, cex.main=4, xaxt="n", font=2, font.axis=2, font.lab=2, font.axis=2)
points(-(Losses), type="h", col="forestgreen")
abline(h= 0, cex=4)
abline(h=-log10(threshold), col="black", cex=4, main="test", lty=6, lwd=2)
abline(h=log10(threshold), col="black", cex=4, main="test", lty=6, lwd=2)
uni.chr <- unique(Chromo)
temp <- rep(0, length(uni.chr))
for (i in 1:length(uni.chr)) {
temp[i] <- max(which(uni.chr[i] == Chromo))
}
for (i in 1:length(temp)) {
abline(v = temp[i], col = "black", lty = "dashed", )
}
nChroms <- length(uni.chr)
begin <- c()
for (d in 1:nChroms) {
chrom <- sum(Chromo == uni.chr[d])
begin <- append(begin, chrom)
}
temp2 <- rep(0, nChroms)
for (i in 1:nChroms) {
if (i == 1) {
temp2[1] <- (begin[1] * 0.5)
}
else if (i > 1) {
temp2[i] <- temp[i - 1] + (begin[i] * 0.5)
}
}
uni.chr[uni.chr==23] <- "X"
uni.chr[uni.chr==24] <- "Y"
for (i in 1:length(temp)) {
axis(1, at = temp2[i], labels = uni.chr[i], cex.axis = 1)
}
#legend("topright", y.intersp=0.8, c("Amplification"), pch=15, col=c("red2"), text.font=2)
#legend("bottomright", y.intersp=0.8, c("Deletion"), pch=15, col=c("forestgreen"), text.font=2)
}
# Plot data
gaiaCNVplot(RecCNV, threshold, "A")
#######################################
# annotate the recurrent sCNA regions #
#######################################
# Create a reference dataset of all genes and save it as a GenomicRanges object
library(biomaRt)
library(GenomicRanges)
mart <- useMart(biomart="ensembl", dataset="hsapiens_gene_ensembl")
mart <- useMart(biomart="ENSEMBL_MART_ENSEMBL", host="grch37.ensembl.org", path="/biomart/martservice", dataset="hsapiens_gene_ensembl")
genes <- getBM(attributes=c("hgnc_symbol","chromosome_name","start_position","end_position"), mart=mart)
genes <- genes[genes[,1]!="" & genes[,2] %in% c(1:22,"X","Y"),]
xidx <- which(genes[,2]=="X")
yidx <- which(genes[,2]=="Y")
genes[xidx, 2] <- 23
genes[yidx, 2] <- 24
genes[,2] <- sapply(genes[,2],as.integer)
genes <- genes[order(genes[,3]),]
genes <- genes[order(genes[,2]),]
colnames(genes) <- c("GeneSymbol","Chr","Start","End")
genes_GR <- makeGRangesFromDataFrame(genes,keep.extra.columns = TRUE)
# Store your own data as a GenomicRanges object:
colnames(df) <- c("chr", "start", "end", "extra1", "extra2")
# Overlap your regions with the reference dataset that you created
hits <- findOverlaps(genes_GR, df_GR, type="within")
df_ann <- cbind(df[subjectHits(hits),],genes[queryHits(hits),])
head(df_ann)
# show the precise co-ordinates of each gene and the region co-ordinates in which it was found to have CNA / CNV
AberrantRegion <- paste0(df_ann[,1],":", df_ann[,3],"-", df_ann[,4])
GeneRegion <- paste0(df_ann[,7],":", df_ann[,8],"-", df_ann[,9])
Final_regions <- cbind(df_ann[,c(6,2,5)], AberrantRegion, GeneRegion)
Final_regions[seq(1, nrow(Final_regions), 20),]
Traceback:
> cnv_obj <- load_cnv(cnvMatrix, markers_obj, n)
Loading Copy Number Data
.Error in start_index:end_index : argument of length 0