]> git.donarmstrong.com Git - function2gene.git/blobdiff - bin/get_ncbi_results
* Handle when NCBI has remote server errors "gracefully"
[function2gene.git] / bin / get_ncbi_results
index 3e1a6259ce1d2e717727ad7a9ed4a1d1a3bef118..313ecad351f8b6eb62acb0d921402e3f9b6376cd 100755 (executable)
@@ -87,9 +87,9 @@ my %options = (debug    => 0,
               name     => 'ncbi_${search}_results.$format',
               terms    => '-',
               orgn     => 'homo',
-              pubmed_site => 'http://www.ncbi.nlm.nih.gov',
-              pubmed_search_url  => '/entrez/query.fcgi?cmd=search&doptcmdl=Brief&dispmax=1000',
-              pubmed_get_url     => '/entrez/query.fcgi?cmd=Text',
+              ncbi_site => 'http://www.ncbi.nlm.nih.gov',
+              ncbi_search_url  => '/entrez/query.fcgi?cmd=search&doptcmdl=Brief&dispmax=1000',
+              ncbi_get_url     => '/entrez/query.fcgi?cmd=Text',
              );
 
 GetOptions(\%options,'format|f=s','database|b=s','name|n=s',
@@ -121,7 +121,7 @@ while (<$terms>) {
      chomp;
      my $search = $_;
      my $format = $options{format};
-     my $uri = URI->new($options{pubmed_site}.$options{pubmed_search_url});
+     my $uri = URI->new($options{ncbi_site}.$options{ncbi_search_url});
      $uri->query_form($uri->query_form(),
                      term => $search.' AND '.$options{orgn}.'[Orgn]',
                      db   => $options{database},
@@ -145,7 +145,7 @@ while (<$terms>) {
      my @current_ids;
      print {$xml_file} "<opt>\n";
      while (@current_ids = splice(@gene_ids,0,5)) {
-         $uri = URI->new($options{pubmed_site}.$options{pubmed_get_url});
+         $uri = URI->new($options{ncbi_site}.$options{ncbi_get_url});
          $uri->query_form($uri->query_form(),
                           dopt => uc($options{format}),
                           db   => $options{database},
@@ -155,6 +155,15 @@ while (<$terms>) {
          print STDERR "url: $url\n";
          $mech->get($url);
          my $response = $mech->content;
+         my $retry_count=5;
+         while ($response =~ /Error reading from remote server/ and $retry_count > 0) {
+              $mech->get($url);
+              $response = $mech->content;
+              $retry_count--;
+         }
+         if ($rety_count <= 0) {
+              die 'Unable to retreive ids ['.join(',',@current_ids).'] because of a remote server error';
+         }
          # For some dumb reason, they send us xml with html
          # entities. Ditch them.
          #$response = decode_entities($response);