Hi all,
I'm trying to get the main transcript for all orthologous human Mx1 genes. Essentially, I want the first transcript of all genes listed here: http://www.ensembl.org/Homo_sapiens/Gene/Compara_Ortholog?db=core;g=ENSG00000157601;r=21:42792442-42831141;t=ENST00000398600
Ideally, I'd like to exclude any non-vertebrates and write a fasta file with the format: Species/Ensembl ID/Name. I'm pretty unfamiliar with Perl but here's my code.
#!/usr/bin/perl
use warnings;
use strict;
use Bio::EnsEMBL::Registry;
use Bio::SeqIO ;
use Data::Dumper;
my $registry = 'Bio::EnsEMBL::Registry';
print "Connecting to Ensembl..." ; print "\n" ;
$registry->load_registry_from_db(
-host => 'useastdb.ensembl.org',
-user => 'anonymous'
);
print 'Succesfully connected to Ensembl Database' ; print "\n" ;
my $gene_member_adaptor = Bio::EnsEMBL::Registry->get_adaptor('Multi', 'compara', 'GeneMember');
my $gene_member = $gene_member_adaptor->fetch_by_source_stable_id('ENSEMBLGENE','ENSG00000157601');
my $homology_adaptor = Bio::EnsEMBL::Registry->get_adaptor('Multi', 'compara', 'Homology');
my $homologies = $homology_adaptor->fetch_all_by_Member($gene_member); #homologies = hash array
my $outseq = Bio::SeqIO->new( -fh => \*STDOUT,
-format => 'FASTA');
#open (my $fh, ">", "homology_output_2") ;
foreach my $homology (@{$homologies}) {
foreach my $member (@{$homology->get_all_Members}) {
#print "DESCRIPTION: ", $member -> description(), "\n" ;
#print "Stable ID = ", $member ->get_Transcript()->stable_id() ,"\n" ;
#print "Name = ", $member -> get_Transcript() ->external_name() ,"\n" ;
#print $member -> get_Transcript() -> seq()->seq() , "\n" ;
print $outseq -> write_seq($member -> get_Transcript() -> seq()) . "\n" ;
} }
#close $fh;
The biggest problem is isolating the transcripts of just the orthologs (don't need paralogues) I need. Any suggestions would be much appreciated!
Perfect! Thanks Emily, that works great.