]> git.donarmstrong.com Git - function2gene.git/blobdiff - bin/do_it_all
update search program with options for do_it_all; implement calls to subsideary scripts
[function2gene.git] / bin / do_it_all
index b85356d94d17c7b3bec1258852135ebc9a47194b..788319a03226fab077cda0ca67543ac5702340ee 100755 (executable)
@@ -3,7 +3,6 @@
 # under the terms of the GPL version 2, or any later version, at your
 # option. See the file README and COPYING for more information.
 # Copyright 2007 by Don Armstrong <don@donarmstrong.com>.
-# $Id: perl_script 495 2006-08-10 08:02:01Z don $
 
 
 use warnings;
@@ -16,8 +15,8 @@ use Storable;
 
 =head1 NAME
 
-do_it_all - Call out to each of the search modules to search for each
-of the terms
+  do_it_all - Call out to each of the search modules to search for
+  each of the terms
 
 =head1 SYNOPSIS
 
@@ -79,22 +78,28 @@ Display this manual.
 
 
 use vars qw($DEBUG);
+use Cwd qw(abs_path);
+use IO::File;
+use Storable qw(thaw freeze);
+use File::Basename qw(basename);
 
 my %options = (databases       => [],
               keywords        => [],
               debug           => 0,
               help            => 0,
               man             => 0,
-              directory       => '',
+              results         => '',
               );
 
 GetOptions(\%options,'keywords=s@','databases=s@',
-          'restart_at|restart-at=s',
+          'restart_at|restart-at=s','results=s',
           'debug|d+','help|h|?','man|m');
 
 pod2usage() if $options{help};
 pod2usage({verbose=>2}) if $options{man};
 
+my $base_dir = basename($0);
+
 my $ERRORS='';
 
 $ERRORS.="restart-at must be one of get, parse or combine\n" if
@@ -104,11 +109,11 @@ $ERRORS.="unknown database(s)" if
      @{$options{databases}} and
      grep {$_ !~ /^(?:ncbi|genecards|harvester)$/i} @{$options{databases}};
 
-if (not length $options{directory}) {
-     $ERRORS.="directory not specified";
+if (not length $options{results}) {
+     $ERRORS.="results directory not specified";
 }
-elsif (not -d $options{directory} or not -w $options{directory}) {
-     $ERRORS.="directory $options{directory} does not exist or is not writeable";
+elsif (not -d $options{results} or not -w $options{results}) {
+     $ERRORS.="results directory $options{results} does not exist or is not writeable";
 }
 
 pod2usage($ERRORS) if length $ERRORS;
@@ -128,21 +133,27 @@ $DEBUG = $options{debug};
 
 my %state;
 
-if (-e "$options{directory}/do_it_all_state") {
+$options{keywords} = [map {abs_path($_)} @{$options{keywords}}];
+
+chdir $options{results} or die "Unable to chdir to $options{results}";
+
+if (-e "do_it_all_state") {
      ADVISE("Using existing state information");
-     my $state_fh = IO::File->new("$options{directory}/do_it_all_state",'r') or die
+     my $state_fh = IO::File->new("do_it_all_state",'r') or die
          "Unable to open state file for reading: $!";
      local $/;
-     my $state_file = <$state_fh> or die "Unabel to read state file $!";
+     my $state_file = <$state_fh>;
      %state = %{thaw($state_file)} or die "Unable to thaw state file";
 }
 else {
      ADVISE("Starting new run");
      %state = (keywords => [],
               databases => [map {lc($_)} @{$options{databases}}],
-              gotten_keywords => {},
-              parsed_keywords => {},
-              combined_keywords => {},
+              done_keywords => {
+                                get => {},
+                                parse => {},
+                                combine => {},
+                               },
              );
 }
 
@@ -171,16 +182,16 @@ if (@{$options{keywords}}) {
 
 if (exists $options{restart_at} and length $options{restart_at}) {
      if (lc($options{restart_at}) eq 'get') {
-         delete $state{gotten_keywords};
-         delete $state{parsed_keywords};
-         delete $state{combined_keywords};
+         delete $state{done_keywords}{get};
+         delete $state{done_keywords}{parse};
+         delete $state{done_keywords}{combine};
      }
      elsif (lc($options{restart_at}) eq 'parse') {
-         delete $state{parsed_keywords};
-         delete $state{combined_keywords};
+         delete $state{done_keywords}{parse};
+         delete $state{done_keywords}{combine};
      }
      elsif (lc($options{restart_at}) eq 'combine') {
-         delete $state{combined_keywords};
+         delete $state{done_keywords}{combine};
      }
 }
 
@@ -188,40 +199,151 @@ if (exists $options{restart_at} and length $options{restart_at}) {
 # for each keyword, we check to see if we've got results, parsed
 # results, and combined it. If not, we queue up those actions.
 
-my @get_needed = ();
-my @parse_needed = ();
-my $combine_needed = 0;
+my %actions = (combine => 0,
+              get     => {},
+              parse   => {},
+             );
+
+if (not @{$state{keywords}}) {
+     ADVISE("There are no keywords specified");
+}
 
 for my $keyword (@{$state{keywords}}) {
      for my $database (@{$state{databases}}) {
-         if (not exists $state{gotten_keywords}{$database}{$keyword}) {
-              push @get_needed,[$database,$keyword];
-              delete $state{parsed_keywords}{$database}{$keyword} if
-                   exists $state{gotten_keywords}{$database}{$keyword};
-              delete $state{combined_keywords}{$database}{$keyword} if
-                   exists $state{gotten_keywords}{$database}{$keyword};
+         if (not exists $state{done_keywords}{get}{$database}{$keyword}) {
+              push @{$actions{get}{$database}}, $keyword;
+              delete $state{done_keywords}{parse}{$database}{$keyword} if
+                   exists $state{done_keywords}{parse}{$database}{$keyword};
+              delete $state{done_keywords}{combine}{$database}{$keyword} if
+                   exists $state{done_keywords}{combine}{$database}{$keyword};
          }
-         if (not exists $state{parsed_keywords}{$database}{$keyword}) {
-              push @parse_needed,[$database,$keyword];
-              delete $state{combined_keywords}{$database}{$keyword} if
-                   exists $state{gotten_keywords}{$database}{$keyword};
+         if (not exists $state{done_keywords}{parse}{$database}{$keyword}) {
+              push @{$actions{parse}{$database}},$keyword;
+              delete $state{done_keywords}{combine}{$database}{$keyword} if
+                   exists $state{done_keywords}{combine}{$database}{$keyword};
          }
-         if (not exists $state{combined_keywords}{$database}{$keyword}) {
-             $combine_needed = 1;
+         if (not exists $state{done_keywords}{combine}{$database}{$keyword}) {
+             $actions{combine} = 1;
          }
      }
 }
 
-# handle getting needed results
-for my $action (@get_needed) {
-     
+use threads;
+use Thread::Queue;
+
+for my $state (qw(get parse)) {
+     my %databases;
+     for my $database (keys %{$actions{$state}}) {
+         next unless @{$actions{$state}{$database}};
+         $databases{$database}{queue} = Thread::Queue->new;
+         $databases{$database}{thread} = threads->new(\&handle_action($state,$database,$databases{database}{queue}));
+         $databases{$database}{queue}->enqueue(@{$actions{$state}{$database}});
+         $databases{$database}{queue}->enqueue(undef);
+     }
+     my $ERRORS=0;
+     for my $database (keys %databases) {
+         my ($actioned_keywords,$failed_keywords) = $databases{$database}{thread}->join;
+         if (@{$failed_keywords}) {
+              ADVISE("These keywords failed during '$state' of '$database':",@{$failed_keywords});
+              $ERRORS=1;
+         }
+         @{$state{done_keywords}{$state}{$database}}{@{$actioned_keywords}} = (1) x @{$actioned_keywords};
+         delete @{$state{done_keywords}{$state}{$database}}{@{$failed_keywords}};
+     }
+     save_state(\%state);
+     if ($ERRORS) {
+         WARN("Stoping, as there are errors");
+         exit 1;
+     }
+}
+
+if ($actions{combine}) {
+     save_state(\%state);
+     # deal with combining results
+     my @parsed_results = map { my $db = $_;
+                               map {
+                                    "parsed_results_${db}_${_}.txt"
+                               } keys %{$state{done_keywords}{parse}{$db}}
+                          } keys %{$state{done_keywords}{parse}};
+
+     write_command_to_file('combined_results.txt',
+                          "$base_dir/combine_results",
+                          @parsed_results,
+                         );
+     for my $result (@parsed_results) {
+         s/^parsed_results_//;
+         s/\.txt$//;
+         my ($db,$keyword) = split /_/, $_, 2;
+         $state{done_keywords}{combined}{$db}{$keyword} = 1;
+     }
+     save_state(\%state);
+     ADVISE("Finished; results in $options{results}/combined_results");
+}
+else {
+     ADVISE('Nothing to do. [Perhaps you wanted --restart-at?]');
 }
-# handle parsing needed results
-for my $action (@parse_needed) {
+
+sub handle_action{
+     my ($state,$database,$queue) = @_;
+     my $keyword;
+     my $actioned_keywords = ();
+     my $failed_keywords = ();
+     while ($keyword = $queue->dequeue) {
+         # handle the action, baybee
+         if ($state eq 'get') {
+              my $command_fh;
+              open($command_fh,'|-',
+                   "get_${database}_results",
+                  );
+              print {$command_fh} "$keyword\n";
+              close($command_fh);
+              if ($? != 0) {
+                   WARN("get_${database}_results with keyword $keyword failed with error code ".($?>>8));
+                   next;
+              }
+         }
+         elsif ($state eq 'parse') {
+              eval {
+                   write_command_to_file("parsed_results_${database}_${keyword}.txt",
+                                         "parse_${database}_results",
+                                         '--keywords',
+                                         $keyword,
+                                        );
+              };
+              if ($@) {
+                   WARN("parse_${database}_results failed with $@");
+                   next;
+              }
+         }
+         else {
+              die "I don't know how to handle state $state";
+         }
+         ADVISE("$state results from '$database' for '$keyword'");
+         push @{$actioned_keywords},$keyword;
+     }
+     return ($actioned_keywords,$failed_keywords);
 }
 
-# handle combining results
+sub save_state{
+     my ($state) = @_;
+     my $state_fh = IO::File->new("do_it_all_state",'w') or die
+         "Unable to open state file for writing: $!";
+     print {$state_fh} freeze($state) or die "Unable to freeze state file";
+     close $state_fh or die "Unable to close state file: $!";
+}
 
+sub write_command_to_file{
+     my ($file,@command);
+     my $fh = IO::File->new($file,'w') or
+         die "Unable to open $file for writing: $!";
+     my $command_fh;
+     open($command_fh,'-|',
+         @command,
+        ) or die "Unable to execute $command[0] $!";
+     print {$fh} <$command_fh>;
+     close $fh;
+     close $command_fh or die "$command[0] failed with ".($?>>8);
+}
 
 
 sub ADVISE{