Hello All,
I have the following code that aims to use getBM through BiomaRt to extract gene names based on refseq_mrna ids. I am able to get a list of 3877 values. The original data is 3883. I understand this is because ensembl will not give back NA values. Consequently, I used the merge function (in a variety of formats) to see if I could combine the original data and the extracted data to provide NA values. Unfortunately, the best I can do is get 3895 values, more than even the original data. Please enlighten me to any wisdom you may have on this topic!
Sierra
# call biomaRt library
library("biomaRt")
HumanEnsembl = useMart("ensembl", dataset="hsapiens_gene_ensembl")
# Setup Parameters for Query
ID_QueryAttributes = c("refseq_mrna", "external_gene_name")
ID_QueryFilters = "refseq_mrna"
#"external_gene_name"
# Load Transcript Ids from csv file. Data Frame, 120k rows, 1 column Gene.ID)
m6AmIds = read.csv("/Users/sierraniemiec/statdata/m6AmGeneList.txt", stringsAsFactors = FALSE, header = FALSE)
colnames(m6AmIds) = "refseq_mrna"
m6AmIdsList = m6AmIds[[1]]
# Split GeneID_Query1 into chunks on 500
ID_queryChunks = split(m6AmIdsList, ceiling(seq_along(m6AmIdsList)/10.0))
#Loop through each chunk and run it on GetBM
ID_query_resultChunks = c()
numberOfChunks = length(ID_queryChunks)
i = 0
for (chunk in ID_queryChunks){
if (i == 0){
combinedResult = getBM(attributes=ID_QueryAttributes, filters = ID_QueryFilters, values = chunk, HumanEnsembl)
}
if (i %% 10 == 0){
print(i/numberOfChunks)
}
chunkResult = getBM(attributes=ID_QueryAttributes, filters = ID_QueryFilters, values = chunk, HumanEnsembl)
combinedResult = rbind(combinedResult, chunkResult)
i = i + 1
}
datframe = as.data.frame(m6AmIds)
idmap = data.frame.na(x = datframe, y = combinedResult, by ="refseq_mrna")
For a sample of the data, I would use:
data = c("NM_000019", "NM_000026", "NM_000030", "NM_000033", "NM_000034", "NM_000046", "NM_000067", "NM_000071", "NM_000075", "NM_000097", "NM_000098", "NM_000100", "NM_000101", "NM_000117", "NM_000122", "NM_000146", "NM_000158", "NM_000169", "NM_000175", "NM_001001561")