X-Git-Url: https://git.donarmstrong.com/?p=function2gene.git;a=blobdiff_plain;f=bin%2Fget_ncbi_results;h=e126d5f687fc984107aafcb1263986158ddd2086;hp=7a869014bc15525ab80f36c0fc02ad50ac94f4eb;hb=50e9739109c91b53ab620b462255f71a0b870f95;hpb=8e1fe04cb712a2231b9a7112b7a701a67f9ddd31 diff --git a/bin/get_ncbi_results b/bin/get_ncbi_results index 7a86901..e126d5f 100755 --- a/bin/get_ncbi_results +++ b/bin/get_ncbi_results @@ -87,9 +87,9 @@ my %options = (debug => 0, name => 'ncbi_${search}_results.$format', terms => '-', orgn => 'homo', - pubmed_site => 'http://www.ncbi.nlm.nih.gov', - pubmed_search_url => '/entrez/query.fcgi?cmd=search&doptcmdl=Brief&dispmax=1000', - pubmed_get_url => '/entrez/query.fcgi?cmd=Text', + ncbi_site => 'http://www.ncbi.nlm.nih.gov', + ncbi_search_url => '/entrez/query.fcgi?cmd=search&doptcmdl=Brief&dispmax=1000', + ncbi_get_url => '/entrez/query.fcgi?cmd=Text', ); GetOptions(\%options,'format|f=s','database|b=s','name|n=s', @@ -121,7 +121,7 @@ while (<$terms>) { chomp; my $search = $_; my $format = $options{format}; - my $uri = URI->new($options{pubmed_site}.$options{pubmed_search_url}); + my $uri = URI->new($options{ncbi_site}.$options{ncbi_search_url}); $uri->query_form($uri->query_form(), term => $search.' AND '.$options{orgn}.'[Orgn]', db => $options{database}, @@ -143,8 +143,9 @@ while (<$terms>) { # Get XML file my @current_ids; + print {$xml_file} "\n"; while (@current_ids = splice(@gene_ids,0,5)) { - $uri = URI->new($options{pubmed_site}.$options{pubmed_get_url}); + $uri = URI->new($options{ncbi_site}.$options{ncbi_get_url}); $uri->query_form($uri->query_form(), dopt => uc($options{format}), db => $options{database}, @@ -154,6 +155,15 @@ while (<$terms>) { print STDERR "url: $url\n"; $mech->get($url); my $response = $mech->content; + my $retry_count=5; + while ($response =~ /Error reading from remote server/ and $retry_count > 0) { + $mech->get($url); + $response = $mech->content; + $retry_count--; + } + if ($retry_count <= 0) { + die 'Unable to retreive ids ['.join(',',@current_ids).'] because of a remote server error'; + } # For some dumb reason, they send us xml with html # entities. Ditch them. #$response = decode_entities($response); @@ -167,10 +177,10 @@ while (<$terms>) { $response =~ s/^\s*
//gso;
 	  $response =~ s#
\s*$##gso; - print {$xml_file} $response; sleep 10; } + print {$xml_file} "
\n"; undef $xml_file; }