These comparative analysis are not my field, but "naively" if you use existing resources and do simple filtering, then you can use biomaRt from Ensembl. Essentially, they provide homology tables between mouse and human. Hence, if you retrieve all human genes, and the human-to-mouse homology table, then the human-only genes would be those human genes not found in the homology table, right? Again, this is a naive solution, it is not my field, I do not guarantee for integrity of the results, but it might be a start.
library(biomaRt)
human_mart <- biomaRt::useEnsembl("genes", dataset="hsapiens_gene_ensembl", version=100)
mouse_mart <- biomaRt::useEnsembl("genes", dataset="mmusculus_gene_ensembl", version=100)
# All human genes
all_human <- biomaRt::getBM(attributes=c("ensembl_gene_id", "hgnc_symbol", "gene_biotype"),
mart=human_mart)
# All homologs for human in mouse
human2mouse_homologs <-
biomaRt::getLDS(attributes=c("ensembl_gene_id", "hgnc_symbol"),
attributesL=c("ensembl_gene_id", "mgi_symbol"),
mart=human_mart,
martL=mouse_mart,
uniqueRows=TRUE)
colnames(human2mouse_homologs) <- c("human_id", "human_name", "mouse_id", "mouse_name")
# Filter the ones that are present in human but not in the homolog table = only present in human
only_human <-
all_human[!all_human$ensembl_gene_id %in% human2mouse_homologs$human_id,]
head(only_human)
#> ensembl_gene_id hgnc_symbol gene_biotype
#> 1 ENSG00000210049 MT-TF Mt_tRNA
#> 2 ENSG00000211459 MT-RNR1 Mt_rRNA
#> 3 ENSG00000210077 MT-TV Mt_tRNA
#> 4 ENSG00000210082 MT-RNR2 Mt_rRNA
#> 5 ENSG00000209082 MT-TL1 Mt_tRNA
#> 7 ENSG00000210100 MT-TI Mt_tRNA
table(only_human$gene_biotype)
#>
#> IG_C_gene IG_C_pseudogene
#> 18 11
#> IG_D_gene IG_J_gene
#> 64 24
#> IG_J_pseudogene IG_pseudogene
#> 6 1
#> IG_V_gene IG_V_pseudogene
#> 153 290
#> lncRNA miRNA
#> 17957 1699
#> misc_RNA Mt_rRNA
#> 2186 2
#> Mt_tRNA polymorphic_pseudogene
#> 22 42
#> processed_pseudogene protein_coding
#> 10830 4843
#> pseudogene ribozyme
#> 40 5
#> rRNA rRNA_pseudogene
#> 55 517
#> scaRNA scRNA
#> 31 1
#> snoRNA snRNA
#> 561 1453
#> sRNA TEC
#> 6 1118
#> TR_C_gene TR_D_gene
#> 5 5
#> TR_J_gene TR_J_pseudogene
#> 93 4
#> TR_V_gene TR_V_pseudogene
#> 110 46
#> transcribed_processed_pseudogene transcribed_unitary_pseudogene
#> 562 142
#> transcribed_unprocessed_pseudogene translated_processed_pseudogene
#> 1097 2
#> translated_unprocessed_pseudogene unitary_pseudogene
#> 1 104
#> unprocessed_pseudogene vaultRNA
#> 3362 1
Created on 2022-11-29 with reprex v2.0.2