CNV annotation using GAIA
0
0
Entering edit mode
2.5 years ago

I want to annotate a CNV file derived from FireBrowse using Gaia, as inspired by Kevin Blighe .

As reported by several others, I'm encountering Error in start_index:end_index : argument of length 0, even when I use "SNP6 GRCh38 Remapped Probeset File" (suggested here - https://support.bioconductor.org/p/111990/).

Does anyone else have a solution?

R code:

# CNV file
cnv <- read.table("cnv.txt", sep="\t", stringsAsFactors=FALSE, header=TRUE)



###########################
# Create a markers object #
###########################
library(gaia)
library(GenomicDataCommons)

# Retrieve probes meta file 
temp <- "snp6.na35.remap.hg38.subset.txt.gz"
markersMatrix <- read.csv(gzfile(temp), sep = "\t",as.is = TRUE) 
markersMatrix = markersMatrix[markersMatrix[,"freqcnv"]==FALSE,] 
colnames(markersMatrix)[1:3] <- c("Probe.Name", "Chromosome", "Start")


# Change sex chr names
unique(markersMatrix$Chromosome)
markersMatrix[which(markersMatrix$Chromosome=="X"),"Chromosome"] <- 23
markersMatrix[which(markersMatrix$Chromosome=="Y"),"Chromosome"] <- 24
markersMatrix$Chromosome <- sapply(markersMatrix$Chromosome, as.integer)

# Create a marker ID
markerID <- apply(markersMatrix, 1, function(x) paste0(x[2], ":", x[3]))

#Remove duplicates
markersMatrix <- markersMatrix[-which(duplicated(markerID)),]

# Filter markersMatrix for common CNV
markerID <- paste(markersMatrix$Chromosome,markersMatrix$Start, sep =":")

# Create the markers object
markers_obj <- load_markers(markersMatrix)


############################################
# Determine recurrent aberrations in tumor #
############################################

# Prepare CNV matrix
cnvMatrix <- cnv[1:100000,]

#Add label (0 for loss, 1 for gain)
#A segment mean of 0.3 is defined as the cut-off
cnvMatrix <- cbind(cnvMatrix, Label=NA)
cnvMatrix[cnvMatrix$Segment_Mean < -0.3,"Label"] <- 0
cnvMatrix[cnvMatrix$Segment_Mean > 0.3,"Label"] <- 1
cnvMatrix <- cnvMatrix[!is.na(cnvMatrix$Label),]

#Remove segment mean as we now go by the binary classification of gain or loss
cnvMatrix <- cnvMatrix[,-6]
colnames(cnvMatrix) <- c("Sample.Name", "Chromosome", "Start", "End", "Num.of.Markers", "Aberration")

#Substitute Chromosomes "X" and "Y" with "23" and "24"
xidx <- which(cnvMatrix$Chromosome=="X")
yidx <- which(cnvMatrix$Chromosome=="Y")
cnvMatrix[xidx,"Chromosome"] <- 23
cnvMatrix[yidx,"Chromosome"] <- 24
cnvMatrix$Chromosome <- sapply(cnvMatrix$Chromosome, as.integer)


#Run GAIA, which looks for recurrent aberrations in your input file
n <- length(unique(cnvMatrix[,1]))
cnv_obj <- load_cnv(cnvMatrix, markers_obj, n)
results.all <- runGAIA(cnv_obj,markers_obj,output_file_name = 'Tumor.all.txt',aberrations = -1,chromosomes = -1,num_iterations = 10,threshold = 0.15)

##################################################
# plot recurrent sCNA gains and losses from GAIA #
##################################################
# Run GAIA
results <- runGAIA(cnv_obj, markers_obj, output_file_name="Tumor.asian.txt", aberrations=-1, chromosomes=-1, num_iterations=10, threshold=0.15)

#Set qvalue threshold for plotting and annotation
threshold <- 0.15

#Convert the recurrent aberrations to numeric (GAIA saves it as text)
RecCNV <- t(apply(results, 1, as.numeric))
colnames(RecCNV) <- colnames(results)

#Add a new column for 'score'
RecCNV <- cbind(RecCNV, score=0)

#Determine the minimum Q value that's not equal to 0
minval <- format(min(RecCNV[RecCNV[,"q-value"]!=0, "q-value"]), scientific=FALSE)
minval <- substring(minval,1, nchar(minval)-1)

#Replace Q values of 0 with the minimum, non-zero value
RecCNV[RecCNV[,"q-value"]==0, "q-value"] <- as.numeric(minval)

#Set the score to equal -log base 10 of the Q value
RecCNV[,"score"] <- sapply(RecCNV[,"q-value"], function(x) -log10(as.numeric(x)))

#Create a function for plotting the recurrent copy number variants
gaiaCNVplot <- function (calls, threshold=0.01, main="main") {
  Calls <- calls[order(calls[,"Region Start [bp]"]),]
  Calls <- Calls[order(Calls[,"Chromosome"]),]
  rownames(Calls) <- NULL
  Chromo <- Calls[,"Chromosome"]
  Gains <- apply(Calls,1,function(x) ifelse(x["Aberration Kind"]==1, x["score"], 0))
  Losses <- apply(Calls, 1,function(x) ifelse(x["Aberration Kind"]==0, x["score"], 0))
  plot(Gains, ylim=c(-max(Calls [,"score"]+2), max(Calls[,"score"]+2)), type="h", col="red2", xlab="Chromosome", ylab=expression("-log"[10]~italic(Q)~"value"), main=main, cex.main=4, xaxt="n", font=2, font.axis=2, font.lab=2, font.axis=2)
  points(-(Losses), type="h", col="forestgreen")
  abline(h= 0, cex=4)
  abline(h=-log10(threshold), col="black", cex=4, main="test", lty=6, lwd=2)
  abline(h=log10(threshold), col="black", cex=4, main="test", lty=6, lwd=2)
  uni.chr <- unique(Chromo)
  temp <- rep(0, length(uni.chr))

  for (i in 1:length(uni.chr)) {
    temp[i] <- max(which(uni.chr[i] == Chromo))
  }

  for (i in 1:length(temp)) {
    abline(v = temp[i], col = "black", lty = "dashed", )
  }

  nChroms <- length(uni.chr)

  begin <- c()

  for (d in 1:nChroms) {
    chrom <- sum(Chromo == uni.chr[d])
    begin <- append(begin, chrom)
  }

  temp2 <- rep(0, nChroms)

  for (i in 1:nChroms) {
    if (i == 1) {
      temp2[1] <- (begin[1] * 0.5)
    }
    else if (i > 1) {
      temp2[i] <- temp[i - 1] + (begin[i] * 0.5)
    }
  }

  uni.chr[uni.chr==23] <- "X"
  uni.chr[uni.chr==24] <- "Y"

  for (i in 1:length(temp)) {
    axis(1, at = temp2[i], labels = uni.chr[i], cex.axis = 1)
  }

  #legend("topright", y.intersp=0.8, c("Amplification"), pch=15, col=c("red2"), text.font=2)
  #legend("bottomright", y.intersp=0.8, c("Deletion"), pch=15, col=c("forestgreen"), text.font=2)
}

# Plot data
gaiaCNVplot(RecCNV, threshold, "A")



#######################################
# annotate the recurrent sCNA regions #
#######################################

# Create a reference dataset of all genes and save it as a GenomicRanges object
library(biomaRt)
library(GenomicRanges)
mart <- useMart(biomart="ensembl", dataset="hsapiens_gene_ensembl")
mart <- useMart(biomart="ENSEMBL_MART_ENSEMBL", host="grch37.ensembl.org", path="/biomart/martservice", dataset="hsapiens_gene_ensembl")
genes <- getBM(attributes=c("hgnc_symbol","chromosome_name","start_position","end_position"), mart=mart)
genes <- genes[genes[,1]!="" & genes[,2] %in% c(1:22,"X","Y"),]
xidx <- which(genes[,2]=="X")
yidx <- which(genes[,2]=="Y")
genes[xidx, 2] <- 23
genes[yidx, 2] <- 24
genes[,2] <- sapply(genes[,2],as.integer)
genes <- genes[order(genes[,3]),]
genes <- genes[order(genes[,2]),]
colnames(genes) <- c("GeneSymbol","Chr","Start","End")
genes_GR <- makeGRangesFromDataFrame(genes,keep.extra.columns = TRUE)

# Store your own data as a GenomicRanges object:
colnames(df) <- c("chr", "start", "end", "extra1", "extra2")

# Overlap your regions with the reference dataset that you created
hits <- findOverlaps(genes_GR, df_GR, type="within")
df_ann <- cbind(df[subjectHits(hits),],genes[queryHits(hits),])
head(df_ann)

# show the precise co-ordinates of each gene and the region co-ordinates in which it was found to have CNA / CNV
AberrantRegion <- paste0(df_ann[,1],":", df_ann[,3],"-", df_ann[,4])
GeneRegion <- paste0(df_ann[,7],":", df_ann[,8],"-", df_ann[,9])
Final_regions <- cbind(df_ann[,c(6,2,5)], AberrantRegion, GeneRegion)
Final_regions[seq(1, nrow(Final_regions), 20),]

Traceback:

> cnv_obj <- load_cnv(cnvMatrix, markers_obj, n)
Loading Copy Number Data
.Error in start_index:end_index : argument of length 0
cnv gaia • 790 views
ADD COMMENT

Login before adding your answer.

Traffic: 2018 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6