X-Git-Url: https://git.donarmstrong.com/?p=function2gene.git;a=blobdiff_plain;f=bin%2Fdo_it_all;h=9a681ecd6047aac77cccf84d37e62894b88abd6e;hp=b85356d94d17c7b3bec1258852135ebc9a47194b;hb=4ec55f9073ff175b7f591c66eed2ef91a274268c;hpb=15e77fec798b027d7708003da1c099c694c6a45e diff --git a/bin/do_it_all b/bin/do_it_all index b85356d..9a681ec 100755 --- a/bin/do_it_all +++ b/bin/do_it_all @@ -3,9 +3,9 @@ # under the terms of the GPL version 2, or any later version, at your # option. See the file README and COPYING for more information. # Copyright 2007 by Don Armstrong . -# $Id: perl_script 495 2006-08-10 08:02:01Z don $ +use threads; use warnings; use strict; @@ -16,8 +16,8 @@ use Storable; =head1 NAME -do_it_all - Call out to each of the search modules to search for each -of the terms + do_it_all - Call out to each of the search modules to search for + each of the terms =head1 SYNOPSIS @@ -79,22 +79,29 @@ Display this manual. use vars qw($DEBUG); +use Cwd qw(abs_path); +use IO::File; +use Storable qw(thaw freeze); +use File::Basename qw(basename dirname); +use Thread::Queue; my %options = (databases => [], keywords => [], debug => 0, help => 0, man => 0, - directory => '', + results => '', ); GetOptions(\%options,'keywords=s@','databases=s@', - 'restart_at|restart-at=s', + 'restart_at|restart-at=s','results=s', 'debug|d+','help|h|?','man|m'); pod2usage() if $options{help}; pod2usage({verbose=>2}) if $options{man}; +my $base_dir = dirname(abs_path($0)); + my $ERRORS=''; $ERRORS.="restart-at must be one of get, parse or combine\n" if @@ -102,19 +109,19 @@ $ERRORS.="restart-at must be one of get, parse or combine\n" if $ERRORS.="unknown database(s)" if @{$options{databases}} and - grep {$_ !~ /^(?:ncbi|genecards|harvester)$/i} @{$options{databases}}; + grep {$_ !~ /^(?:ncbi|genecard|harvester)$/i} @{$options{databases}}; -if (not length $options{directory}) { - $ERRORS.="directory not specified"; +if (not length $options{results}) { + $ERRORS.="results directory not specified"; } -elsif (not -d $options{directory} or not -w $options{directory}) { - $ERRORS.="directory $options{directory} does not exist or is not writeable"; +elsif (not -d $options{results} or not -w $options{results}) { + $ERRORS.="results directory $options{results} does not exist or is not writeable"; } pod2usage($ERRORS) if length $ERRORS; if (not @{$options{databases}}) { - $options{databases} = [qw(ncbi genecards harvester)] + $options{databases} = [qw(ncbi genecard harvester)] } $DEBUG = $options{debug}; @@ -128,21 +135,27 @@ $DEBUG = $options{debug}; my %state; -if (-e "$options{directory}/do_it_all_state") { +$options{keywords} = [map {abs_path($_)} @{$options{keywords}}]; + +chdir $options{results} or die "Unable to chdir to $options{results}"; + +if (-e "do_it_all_state") { ADVISE("Using existing state information"); - my $state_fh = IO::File->new("$options{directory}/do_it_all_state",'r') or die + my $state_fh = IO::File->new("do_it_all_state",'r') or die "Unable to open state file for reading: $!"; local $/; - my $state_file = <$state_fh> or die "Unabel to read state file $!"; + my $state_file = <$state_fh>; %state = %{thaw($state_file)} or die "Unable to thaw state file"; } else { ADVISE("Starting new run"); %state = (keywords => [], databases => [map {lc($_)} @{$options{databases}}], - gotten_keywords => {}, - parsed_keywords => {}, - combined_keywords => {}, + done_keywords => { + get => {}, + parse => {}, + combine => {}, + }, ); } @@ -154,9 +167,9 @@ if (@{$options{keywords}}) { for my $keyword_file (@{$options{keywords}}) { my $keyword_fh = IO::File->new($keyword_file,'r') or die "Unable to open $keyword_file for reading: $!"; - local $/; while (<$keyword_fh>) { next if /^\s*[#;]/; + next unless /\w+/; chomp; if (not $old_keywords{$_}) { DEBUG("Adding new keyword '$_'"); @@ -167,20 +180,21 @@ if (@{$options{keywords}}) { } } } + push @{$state{keywords}},@new_keywords; } if (exists $options{restart_at} and length $options{restart_at}) { if (lc($options{restart_at}) eq 'get') { - delete $state{gotten_keywords}; - delete $state{parsed_keywords}; - delete $state{combined_keywords}; + delete $state{done_keywords}{get}; + delete $state{done_keywords}{parse}; + delete $state{done_keywords}{combine}; } elsif (lc($options{restart_at}) eq 'parse') { - delete $state{parsed_keywords}; - delete $state{combined_keywords}; + delete $state{done_keywords}{parse}; + delete $state{done_keywords}{combine}; } elsif (lc($options{restart_at}) eq 'combine') { - delete $state{combined_keywords}; + delete $state{done_keywords}{combine}; } } @@ -188,40 +202,164 @@ if (exists $options{restart_at} and length $options{restart_at}) { # for each keyword, we check to see if we've got results, parsed # results, and combined it. If not, we queue up those actions. -my @get_needed = (); -my @parse_needed = (); -my $combine_needed = 0; +my %actions = (combine => 0, + get => {}, + parse => {}, + ); + +if (not @{$state{keywords}}) { + ADVISE("There are no keywords specified"); +} for my $keyword (@{$state{keywords}}) { for my $database (@{$state{databases}}) { - if (not exists $state{gotten_keywords}{$database}{$keyword}) { - push @get_needed,[$database,$keyword]; - delete $state{parsed_keywords}{$database}{$keyword} if - exists $state{gotten_keywords}{$database}{$keyword}; - delete $state{combined_keywords}{$database}{$keyword} if - exists $state{gotten_keywords}{$database}{$keyword}; + if (not exists $state{done_keywords}{get}{$database}{$keyword}) { + push @{$actions{get}{$database}}, $keyword; + delete $state{done_keywords}{parse}{$database}{$keyword} if + exists $state{done_keywords}{parse}{$database}{$keyword}; + delete $state{done_keywords}{combine}{$database}{$keyword} if + exists $state{done_keywords}{combine}{$database}{$keyword}; + } + if (not exists $state{done_keywords}{parse}{$database}{$keyword}) { + push @{$actions{parse}{$database}},$keyword; + delete $state{done_keywords}{combine}{$database}{$keyword} if + exists $state{done_keywords}{combine}{$database}{$keyword}; } - if (not exists $state{parsed_keywords}{$database}{$keyword}) { - push @parse_needed,[$database,$keyword]; - delete $state{combined_keywords}{$database}{$keyword} if - exists $state{gotten_keywords}{$database}{$keyword}; + if (not exists $state{done_keywords}{combine}{$database}{$keyword}) { + $actions{combine} = 1; + } + } +} + + +for my $state (qw(get parse)) { + my %databases; + for my $database (keys %{$actions{$state}}) { + next unless @{$actions{$state}{$database}}; + $databases{$database}{queue} = Thread::Queue->new + or die "Unable to create new thread queue"; + $databases{$database}{thread} = threads->create(\&handle_action,$state,$database,$databases{$database}{queue}) + or die "Unable to create new thread"; + $databases{$database}{queue}->enqueue(@{$actions{$state}{$database}}); + $databases{$database}{queue}->enqueue(undef); + } + my $ERRORS=0; + for my $database (keys %databases) { + my ($actioned_keywords,$failed_keywords) = @{$databases{$database}{thread}->join||[]}; + if (not defined $failed_keywords) { + ADVISE("Something bad happened during '$state' of '$database'"); + $ERRORS = 1; } - if (not exists $state{combined_keywords}{$database}{$keyword}) { - $combine_needed = 1; + elsif (@{$failed_keywords}) { + ADVISE("These keywords failed during '$state' of '$database':",@{$failed_keywords}); + $ERRORS=1; } + @{$state{done_keywords}{$state}{$database}}{@{$actioned_keywords}} = (1) x @{$actioned_keywords}; + delete @{$state{done_keywords}{$state}{$database}}{@{$failed_keywords}}; + } + save_state(\%state); + if ($ERRORS) { + WARN("Stoping, as there are errors"); + exit 1; } } -# handle getting needed results -for my $action (@get_needed) { - +if ($actions{combine}) { + save_state(\%state); + # deal with combining results + my @parsed_results = map { my $db = $_; + map { + "parsed_results_${db}_${_}.txt" + } keys %{$state{done_keywords}{parse}{$db}} + } keys %{$state{done_keywords}{parse}}; + + write_command_to_file('combined_results.txt', + "$base_dir/combine_results", + @parsed_results, + ); + for my $result (@parsed_results) { + s/^parsed_results_//; + s/\.txt$//; + my ($db,$keyword) = split /_/, $_, 2; + $state{done_keywords}{combined}{$db}{$keyword} = 1; + } + save_state(\%state); + ADVISE("Finished; results in $options{results}/combined_results"); } -# handle parsing needed results -for my $action (@parse_needed) { +else { + ADVISE('Nothing to do. [Perhaps you wanted --restart-at?]'); } -# handle combining results +sub handle_action{ + my ($state,$database,$queue) = @_; + my $keyword; + my $actioned_keywords = []; + my $failed_keywords = []; + DEBUG("Beginning to handle actions for state '$state' database '$database'"); + while ($keyword = $queue->dequeue) { + DEBUG("Handling state '$state' database '$database' keyword '$keyword'"); + # handle the action, baybee + if ($state eq 'get') { + my $command_fh; + eval { + open($command_fh,'|-', + "$base_dir/get_${database}_results", + ) or die "unable to execute '$base_dir/get_${database}_results'"; + print {$command_fh} "$keyword\n" or die "unable to print $keyword to 'get_${database}_results'"; + close($command_fh) or die "Unable to close filehandle"; + if ($? != 0) { + die "get_${database}_results with keyword $keyword failed with error code ".($?>>8); + } + }; + if ($@) { + WARN($@); + push @{$failed_keywords}, $keyword; + next; + } + } + elsif ($state eq 'parse') { + eval { + write_command_to_file("parsed_results_${database}_${keyword}.txt", + "$base_dir/parse_${database}_results", + '--keywords', + $keyword, + ); + }; + if ($@) { + WARN("parse_${database}_results failed with $@"); + push @{$failed_keywords}, $keyword; + next; + } + } + else { + die "I don't know how to handle state $state"; + } + ADVISE("$state results from '$database' for '$keyword'"); + push @{$actioned_keywords},$keyword; + } + return [$actioned_keywords,$failed_keywords]; +} +sub save_state{ + my ($state) = @_; + my $state_fh = IO::File->new("do_it_all_state",'w') or die + "Unable to open state file for writing: $!"; + print {$state_fh} freeze($state) or die "Unable to freeze state file"; + close $state_fh or die "Unable to close state file: $!"; +} + +sub write_command_to_file{ + my ($file,@command) = @_; + my $fh = IO::File->new($file,'w') or + die "Unable to open $file for writing: $!"; + my $command_fh; + open($command_fh,'-|', + @command, + ) or die "Unable to execute $command[0] $!"; + print {$fh} <$command_fh>; + close $fh; + close $command_fh or die "$command[0] failed with ".($?>>8); +} sub ADVISE{