|
#!/usr/bin/perl |
|
|
|
use strict; |
|
use warnings; |
|
|
|
use WWW::Mechanize; |
|
use Getopt::Long; |
|
|
|
my $GOrillaURL = "http://cbl-gorilla.cs.technion.ac.il/"; |
|
|
|
my @organisms = qw(ARABIDOPSIS_THALIANA |
|
SACCHAROMYCES_CEREVISIAE |
|
CAENORHABDITIS_ELEGANS |
|
DROSOPHILA_MELANOGASTER |
|
DANIO_RERIO |
|
HOMO_SAPIENS |
|
MUS_MUSCULUS |
|
RATTUS_NORVEGICUS |
|
); |
|
my %organisms; @organisms{@organisms} = (1) x @organisms; |
|
my $organism = "HOMO_SAPIENS"; |
|
|
|
my @runmodes = qw(mhg hg); |
|
my %runmodes; @runmodes{@runmodes} = (1) x @runmodes; |
|
my $runmode = "mhg"; |
|
|
|
my @ontologies = qw(proc func comp all); |
|
my %ontologies; @ontologies{@ontologies} = (1) x @ontologies; |
|
my $ontology = "all"; |
|
|
|
my $pvalue = "0.001"; |
|
my $name = ""; |
|
my $email = ""; |
|
my $includedups = 0; |
|
my $revigo = 1; |
|
my $fast = 1; |
|
my ($targets, $background); |
|
|
|
my $result = GetOptions("organism=s" => $organism, |
|
"runmode=s" => $runmode, |
|
"targets=s" => $targets, |
|
"background=s" => $background, |
|
"ontology=s" => $ontology, |
|
"pvalue=f" => $pvalue, |
|
"name=s" => $name, |
|
"email=s" => $email, |
|
"includedups!" => $includedups, |
|
"fast!" => $fast, |
|
); |
|
|
|
die "No such organism $organism\n" unless $organisms{$organism}; |
|
die "No such runmode $runmode\n" unless $runmodes{$runmode}; |
|
die "No such ontology $ontology\n" unless $ontologies{$ontology}; |
|
|
|
die "Must supply both target and background files with runmode hg\n" |
|
unless ($runmode eq "mhg" || ($targets && $background)); |
|
|
|
die "Must supply target file with runmode mhg\n" |
|
unless ($runmode eq "hg" || $targets); |
|
|
|
my $mech = WWW::Mechanize->new(); |
|
|
|
$mech->get($GOrillaURL); |
|
|
|
$mech->form_name("gorilla"); |
|
|
|
$mech->select("species" => $organism); |
|
$mech->set_fields("run_mode" => $runmode); |
|
$mech->set_fields("target_file_name" => $targets); |
|
if ($runmode eq "hg") { |
|
$mech->set_file("background_file_name" => $background); |
|
} |
|
$mech->set_fields("db" => $ontology); |
|
$mech->select("pvalue_thresh" => $pvalue); |
|
$mech->set_fields("analysis_name" => $name); |
|
$mech->set_fields("user_email" => $email); |
|
$mech->set_fields("output_excel" => 1); |
|
$mech->set_fields("output_unresolved" => $includedups); |
|
$mech->set_fields("output_revigo" => $revigo); |
|
$mech->set_fields("fast_mode" => $fast); |
|
|
|
$mech->click("run_gogo_button"); |
|
|
|
my $res = $mech->response(); |
|
my $base = $res->base(); |
|
my ($id) = $base =~ m/id=(.*)/; |
|
|
|
warn "Results can be found at: |
|
http://cbl-gorilla.cs.technion.ac.il/GOrilla/${id}/GOResults.html\n"; |
|
|
|
print "# Results can be found at: |
|
# http://cbl-gorilla.cs.technion.ac.il/GOrilla/${id}/GOResults.html\n"; |
|
|
|
do $mech->get($base) |
|
until $mech->response->base() ne $base; |
|
|
|
my %pages = (proc => "PROCESS", |
|
func => "FUNCTION", |
|
comp => "COMPONENT"); |
|
|
|
my @pages = $ontology eq "all" ? values(%pages) : $pages{$ontology}; |
|
|
|
for my $page (@pages) { |
|
my $excel = "${GOrillaURL}/GOrilla/${id}/GO${page}.xls"; |
|
$mech->get($excel); |
|
my $content = $mech->content(); |
|
print $content; |
|
} |
Thanks, this script worked very well.
After retrieving the
${id}
value,curl
orwget
can be used to retrieve these eight files into their own sub-folder:These results can be loaded by a local web-browser by loading
GOResults.html
, and they can be kept indefinitely.Here are my modifications to this script, which put the GO analysis results in the folder specified with
--outputDir
:Thanks for the script! Save me a lot of manual work. Here are few modifications of a couple of lines which I believe are not correct. I am not a Perl guy but it seems it's working better after the changes.
Line 80 should be
instead of
Lines 110-113 should be
instead of
this does not capture the Excel output. How can this be modified so the 3 excel files can be saved?
Perhaps I can help modify this to add a few more GETs, assuming I understand your question correctly. What are their filenames? I just haven't looked at this in some years, so filenames would be useful.
Link broken for Gist! link