X-Git-Url: https://git.donarmstrong.com/?p=function2gene.git;a=blobdiff_plain;f=bin%2Fdo_it_all;h=788319a03226fab077cda0ca67543ac5702340ee;hp=b85356d94d17c7b3bec1258852135ebc9a47194b;hb=79c0f28d89ba108bcfca68a8b5d4f0e3855455dc;hpb=15e77fec798b027d7708003da1c099c694c6a45e diff --git a/bin/do_it_all b/bin/do_it_all index b85356d..788319a 100755 --- a/bin/do_it_all +++ b/bin/do_it_all @@ -3,7 +3,6 @@ # under the terms of the GPL version 2, or any later version, at your # option. See the file README and COPYING for more information. # Copyright 2007 by Don Armstrong . -# $Id: perl_script 495 2006-08-10 08:02:01Z don $ use warnings; @@ -16,8 +15,8 @@ use Storable; =head1 NAME -do_it_all - Call out to each of the search modules to search for each -of the terms + do_it_all - Call out to each of the search modules to search for + each of the terms =head1 SYNOPSIS @@ -79,22 +78,28 @@ Display this manual. use vars qw($DEBUG); +use Cwd qw(abs_path); +use IO::File; +use Storable qw(thaw freeze); +use File::Basename qw(basename); my %options = (databases => [], keywords => [], debug => 0, help => 0, man => 0, - directory => '', + results => '', ); GetOptions(\%options,'keywords=s@','databases=s@', - 'restart_at|restart-at=s', + 'restart_at|restart-at=s','results=s', 'debug|d+','help|h|?','man|m'); pod2usage() if $options{help}; pod2usage({verbose=>2}) if $options{man}; +my $base_dir = basename($0); + my $ERRORS=''; $ERRORS.="restart-at must be one of get, parse or combine\n" if @@ -104,11 +109,11 @@ $ERRORS.="unknown database(s)" if @{$options{databases}} and grep {$_ !~ /^(?:ncbi|genecards|harvester)$/i} @{$options{databases}}; -if (not length $options{directory}) { - $ERRORS.="directory not specified"; +if (not length $options{results}) { + $ERRORS.="results directory not specified"; } -elsif (not -d $options{directory} or not -w $options{directory}) { - $ERRORS.="directory $options{directory} does not exist or is not writeable"; +elsif (not -d $options{results} or not -w $options{results}) { + $ERRORS.="results directory $options{results} does not exist or is not writeable"; } pod2usage($ERRORS) if length $ERRORS; @@ -128,21 +133,27 @@ $DEBUG = $options{debug}; my %state; -if (-e "$options{directory}/do_it_all_state") { +$options{keywords} = [map {abs_path($_)} @{$options{keywords}}]; + +chdir $options{results} or die "Unable to chdir to $options{results}"; + +if (-e "do_it_all_state") { ADVISE("Using existing state information"); - my $state_fh = IO::File->new("$options{directory}/do_it_all_state",'r') or die + my $state_fh = IO::File->new("do_it_all_state",'r') or die "Unable to open state file for reading: $!"; local $/; - my $state_file = <$state_fh> or die "Unabel to read state file $!"; + my $state_file = <$state_fh>; %state = %{thaw($state_file)} or die "Unable to thaw state file"; } else { ADVISE("Starting new run"); %state = (keywords => [], databases => [map {lc($_)} @{$options{databases}}], - gotten_keywords => {}, - parsed_keywords => {}, - combined_keywords => {}, + done_keywords => { + get => {}, + parse => {}, + combine => {}, + }, ); } @@ -171,16 +182,16 @@ if (@{$options{keywords}}) { if (exists $options{restart_at} and length $options{restart_at}) { if (lc($options{restart_at}) eq 'get') { - delete $state{gotten_keywords}; - delete $state{parsed_keywords}; - delete $state{combined_keywords}; + delete $state{done_keywords}{get}; + delete $state{done_keywords}{parse}; + delete $state{done_keywords}{combine}; } elsif (lc($options{restart_at}) eq 'parse') { - delete $state{parsed_keywords}; - delete $state{combined_keywords}; + delete $state{done_keywords}{parse}; + delete $state{done_keywords}{combine}; } elsif (lc($options{restart_at}) eq 'combine') { - delete $state{combined_keywords}; + delete $state{done_keywords}{combine}; } } @@ -188,40 +199,151 @@ if (exists $options{restart_at} and length $options{restart_at}) { # for each keyword, we check to see if we've got results, parsed # results, and combined it. If not, we queue up those actions. -my @get_needed = (); -my @parse_needed = (); -my $combine_needed = 0; +my %actions = (combine => 0, + get => {}, + parse => {}, + ); + +if (not @{$state{keywords}}) { + ADVISE("There are no keywords specified"); +} for my $keyword (@{$state{keywords}}) { for my $database (@{$state{databases}}) { - if (not exists $state{gotten_keywords}{$database}{$keyword}) { - push @get_needed,[$database,$keyword]; - delete $state{parsed_keywords}{$database}{$keyword} if - exists $state{gotten_keywords}{$database}{$keyword}; - delete $state{combined_keywords}{$database}{$keyword} if - exists $state{gotten_keywords}{$database}{$keyword}; + if (not exists $state{done_keywords}{get}{$database}{$keyword}) { + push @{$actions{get}{$database}}, $keyword; + delete $state{done_keywords}{parse}{$database}{$keyword} if + exists $state{done_keywords}{parse}{$database}{$keyword}; + delete $state{done_keywords}{combine}{$database}{$keyword} if + exists $state{done_keywords}{combine}{$database}{$keyword}; } - if (not exists $state{parsed_keywords}{$database}{$keyword}) { - push @parse_needed,[$database,$keyword]; - delete $state{combined_keywords}{$database}{$keyword} if - exists $state{gotten_keywords}{$database}{$keyword}; + if (not exists $state{done_keywords}{parse}{$database}{$keyword}) { + push @{$actions{parse}{$database}},$keyword; + delete $state{done_keywords}{combine}{$database}{$keyword} if + exists $state{done_keywords}{combine}{$database}{$keyword}; } - if (not exists $state{combined_keywords}{$database}{$keyword}) { - $combine_needed = 1; + if (not exists $state{done_keywords}{combine}{$database}{$keyword}) { + $actions{combine} = 1; } } } -# handle getting needed results -for my $action (@get_needed) { - +use threads; +use Thread::Queue; + +for my $state (qw(get parse)) { + my %databases; + for my $database (keys %{$actions{$state}}) { + next unless @{$actions{$state}{$database}}; + $databases{$database}{queue} = Thread::Queue->new; + $databases{$database}{thread} = threads->new(\&handle_action($state,$database,$databases{database}{queue})); + $databases{$database}{queue}->enqueue(@{$actions{$state}{$database}}); + $databases{$database}{queue}->enqueue(undef); + } + my $ERRORS=0; + for my $database (keys %databases) { + my ($actioned_keywords,$failed_keywords) = $databases{$database}{thread}->join; + if (@{$failed_keywords}) { + ADVISE("These keywords failed during '$state' of '$database':",@{$failed_keywords}); + $ERRORS=1; + } + @{$state{done_keywords}{$state}{$database}}{@{$actioned_keywords}} = (1) x @{$actioned_keywords}; + delete @{$state{done_keywords}{$state}{$database}}{@{$failed_keywords}}; + } + save_state(\%state); + if ($ERRORS) { + WARN("Stoping, as there are errors"); + exit 1; + } +} + +if ($actions{combine}) { + save_state(\%state); + # deal with combining results + my @parsed_results = map { my $db = $_; + map { + "parsed_results_${db}_${_}.txt" + } keys %{$state{done_keywords}{parse}{$db}} + } keys %{$state{done_keywords}{parse}}; + + write_command_to_file('combined_results.txt', + "$base_dir/combine_results", + @parsed_results, + ); + for my $result (@parsed_results) { + s/^parsed_results_//; + s/\.txt$//; + my ($db,$keyword) = split /_/, $_, 2; + $state{done_keywords}{combined}{$db}{$keyword} = 1; + } + save_state(\%state); + ADVISE("Finished; results in $options{results}/combined_results"); +} +else { + ADVISE('Nothing to do. [Perhaps you wanted --restart-at?]'); } -# handle parsing needed results -for my $action (@parse_needed) { + +sub handle_action{ + my ($state,$database,$queue) = @_; + my $keyword; + my $actioned_keywords = (); + my $failed_keywords = (); + while ($keyword = $queue->dequeue) { + # handle the action, baybee + if ($state eq 'get') { + my $command_fh; + open($command_fh,'|-', + "get_${database}_results", + ); + print {$command_fh} "$keyword\n"; + close($command_fh); + if ($? != 0) { + WARN("get_${database}_results with keyword $keyword failed with error code ".($?>>8)); + next; + } + } + elsif ($state eq 'parse') { + eval { + write_command_to_file("parsed_results_${database}_${keyword}.txt", + "parse_${database}_results", + '--keywords', + $keyword, + ); + }; + if ($@) { + WARN("parse_${database}_results failed with $@"); + next; + } + } + else { + die "I don't know how to handle state $state"; + } + ADVISE("$state results from '$database' for '$keyword'"); + push @{$actioned_keywords},$keyword; + } + return ($actioned_keywords,$failed_keywords); } -# handle combining results +sub save_state{ + my ($state) = @_; + my $state_fh = IO::File->new("do_it_all_state",'w') or die + "Unable to open state file for writing: $!"; + print {$state_fh} freeze($state) or die "Unable to freeze state file"; + close $state_fh or die "Unable to close state file: $!"; +} +sub write_command_to_file{ + my ($file,@command); + my $fh = IO::File->new($file,'w') or + die "Unable to open $file for writing: $!"; + my $command_fh; + open($command_fh,'-|', + @command, + ) or die "Unable to execute $command[0] $!"; + print {$fh} <$command_fh>; + close $fh; + close $command_fh or die "$command[0] failed with ".($?>>8); +} sub ADVISE{