]> git.donarmstrong.com Git - function2gene.git/blobdiff - bin/get_genecard_results
* Stop requiring wget
[function2gene.git] / bin / get_genecard_results
index 017f1e1b2b76957fbb7c67e90fd7f0d075811ede..ac38e29d6767e8cb13040eb2b591258f07edf5b2 100755 (executable)
@@ -70,6 +70,7 @@ BEGIN{
 use IO::File;
 use URI;
 use WWW::Mechanize;
+use Time::HiRes qw(usleep);
 
 # XXX parse config file
 
@@ -127,7 +128,22 @@ while (<$terms>) {
      # Get XML file
      my @current_urls;
      while (@current_urls = map{$options{genecard_site}.$_} splice(@result_urls,0,30)) {
-         system(q(wget),'-nd','-nH','-w','2','--random-wait','-P',qq($options{dir}/$dir_name),@current_urls) == 0 or warn "$!";
+         for my $url (@current_urls) {
+              # sleep for around 2 seconds
+              usleep((0.5+rand)*2*1000000);
+              $mech->get($url);
+              my $cleaned_url = $url;
+              $cleaned_url =~ s{http://}{}g;
+              $cleaned_url =~ s/[^\w]//g;
+              eval {
+                   $mech->save_content($options{dir}.'/'.$dir_name.'/'.$cleaned_url);
+                   print "retreived $url\n";
+              };
+              if ($@) {
+                   warn $@;
+              }
+         }
+         #system(q(wget),'-nd','-nH','-w','2','--random-wait','-P',qq($options{dir}/$dir_name),@current_urls) == 0 or warn "$!";
      }
 }