ChEMBL ID to Ensembl ID
1
Okay so, just worked out something here - you will need to make a directory on your Desktop named Biostars... this script will just make a dataframe of CHEMBL, UNIPROT, and ENSEMBL GENE ID, all matched. and then you can do the rest of what you need from the data frame...
curl::curl_download( url = "ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/latest/chembl_uniprot_mapping.txt" , "~/Desktop/Biostars/chembl_uniprot_mapping.txt" , quiet = FALSE)
chembl < - read.table( "~/Desktop/Biostars/chembl_uniprot_mapping.txt" , sep = "\t" )
chembl$V3 < - NULL
chembl$V4 < - NULL
colnames( chembl) < - c( "uniprot" , "chembl" )
curl::curl_download( url = "ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/HUMAN_9606_idmapping_selected.tab.gz" , "~/Desktop/Biostars/HUMAN_9606_idmapping_selected.tab.gz" , quiet = FALSE)
system( "gunzip ~/Desktop/Biostars/HUMAN_9606_idmapping_selected.tab.gz" )
uniprot.db < - read.table( "~/Desktop/Biostars/HUMAN_9606_idmapping_selected.tab" , sep = "\t" )
ensembl < - as.data.frame( uniprot.db$V19 )
uniprot < - as.data.frame( uniprot.db$V1 )
ensembl.uniprot < - cbind( ensembl, uniprot)
colnames( ensembl.uniprot) < - c( "ensembl" , "uniprot" )
ensembl.uniprot.chembl < - merge( chembl, ensembl.uniprot, by = "uniprot" )
Here is a sample of the data frame:
Login before adding your answer.
Traffic: 3396 users visited in the last hour