X-Git-Url: https://git.donarmstrong.com/?p=function2gene.git;a=blobdiff_plain;f=bin%2Fdo_it_all;h=9a681ecd6047aac77cccf84d37e62894b88abd6e;hp=3d3cfa3effb42640cbe917f9ebb41da3fbcad279;hb=59bb41e2cadc989c299822191d8e8ee7d348779c;hpb=672a430873e686b884bccb1b027321658c541b25 diff --git a/bin/do_it_all b/bin/do_it_all index 3d3cfa3..9a681ec 100755 --- a/bin/do_it_all +++ b/bin/do_it_all @@ -3,9 +3,9 @@ # under the terms of the GPL version 2, or any later version, at your # option. See the file README and COPYING for more information. # Copyright 2007 by Don Armstrong . -# $Id: perl_script 495 2006-08-10 08:02:01Z don $ +use threads; use warnings; use strict; @@ -16,8 +16,8 @@ use Storable; =head1 NAME -do_it_all - Call out to each of the search modules to search for each -of the terms + do_it_all - Call out to each of the search modules to search for + each of the terms =head1 SYNOPSIS @@ -82,6 +82,8 @@ use vars qw($DEBUG); use Cwd qw(abs_path); use IO::File; use Storable qw(thaw freeze); +use File::Basename qw(basename dirname); +use Thread::Queue; my %options = (databases => [], keywords => [], @@ -98,6 +100,8 @@ GetOptions(\%options,'keywords=s@','databases=s@', pod2usage() if $options{help}; pod2usage({verbose=>2}) if $options{man}; +my $base_dir = dirname(abs_path($0)); + my $ERRORS=''; $ERRORS.="restart-at must be one of get, parse or combine\n" if @@ -105,7 +109,7 @@ $ERRORS.="restart-at must be one of get, parse or combine\n" if $ERRORS.="unknown database(s)" if @{$options{databases}} and - grep {$_ !~ /^(?:ncbi|genecards|harvester)$/i} @{$options{databases}}; + grep {$_ !~ /^(?:ncbi|genecard|harvester)$/i} @{$options{databases}}; if (not length $options{results}) { $ERRORS.="results directory not specified"; @@ -117,7 +121,7 @@ elsif (not -d $options{results} or not -w $options{results}) { pod2usage($ERRORS) if length $ERRORS; if (not @{$options{databases}}) { - $options{databases} = [qw(ncbi genecards harvester)] + $options{databases} = [qw(ncbi genecard harvester)] } $DEBUG = $options{debug}; @@ -163,9 +167,9 @@ if (@{$options{keywords}}) { for my $keyword_file (@{$options{keywords}}) { my $keyword_fh = IO::File->new($keyword_file,'r') or die "Unable to open $keyword_file for reading: $!"; - local $/; while (<$keyword_fh>) { next if /^\s*[#;]/; + next unless /\w+/; chomp; if (not $old_keywords{$_}) { DEBUG("Adding new keyword '$_'"); @@ -176,20 +180,21 @@ if (@{$options{keywords}}) { } } } + push @{$state{keywords}},@new_keywords; } if (exists $options{restart_at} and length $options{restart_at}) { if (lc($options{restart_at}) eq 'get') { - delete $state{gotten_keywords}; - delete $state{parsed_keywords}; - delete $state{combined_keywords}; + delete $state{done_keywords}{get}; + delete $state{done_keywords}{parse}; + delete $state{done_keywords}{combine}; } elsif (lc($options{restart_at}) eq 'parse') { - delete $state{parsed_keywords}; - delete $state{combined_keywords}; + delete $state{done_keywords}{parse}; + delete $state{done_keywords}{combine}; } elsif (lc($options{restart_at}) eq 'combine') { - delete $state{combined_keywords}; + delete $state{done_keywords}{combine}; } } @@ -217,7 +222,7 @@ for my $keyword (@{$state{keywords}}) { } if (not exists $state{done_keywords}{parse}{$database}{$keyword}) { push @{$actions{parse}{$database}},$keyword; - delete $state{done_keywords}{combine}{$database}{$keyword} if + delete $state{done_keywords}{combine}{$database}{$keyword} if exists $state{done_keywords}{combine}{$database}{$keyword}; } if (not exists $state{done_keywords}{combine}{$database}{$keyword}) { @@ -226,22 +231,26 @@ for my $keyword (@{$state{keywords}}) { } } -use threads; -use Thread::Queue; for my $state (qw(get parse)) { my %databases; for my $database (keys %{$actions{$state}}) { next unless @{$actions{$state}{$database}}; - $databases{$database}{queue} = Thread::Queue->new; - $databases{$database}{thread} = threads->new(\&handle_action($state,$database,$databases{database}{queue})); + $databases{$database}{queue} = Thread::Queue->new + or die "Unable to create new thread queue"; + $databases{$database}{thread} = threads->create(\&handle_action,$state,$database,$databases{$database}{queue}) + or die "Unable to create new thread"; $databases{$database}{queue}->enqueue(@{$actions{$state}{$database}}); $databases{$database}{queue}->enqueue(undef); } my $ERRORS=0; for my $database (keys %databases) { - my ($actioned_keywords,$failed_keywords) = $databases{$database}{thread}->join; - if (@{$failed_keywords}) { + my ($actioned_keywords,$failed_keywords) = @{$databases{$database}{thread}->join||[]}; + if (not defined $failed_keywords) { + ADVISE("Something bad happened during '$state' of '$database'"); + $ERRORS = 1; + } + elsif (@{$failed_keywords}) { ADVISE("These keywords failed during '$state' of '$database':",@{$failed_keywords}); $ERRORS=1; } @@ -255,17 +264,80 @@ for my $state (qw(get parse)) { } } +if ($actions{combine}) { + save_state(\%state); + # deal with combining results + my @parsed_results = map { my $db = $_; + map { + "parsed_results_${db}_${_}.txt" + } keys %{$state{done_keywords}{parse}{$db}} + } keys %{$state{done_keywords}{parse}}; + + write_command_to_file('combined_results.txt', + "$base_dir/combine_results", + @parsed_results, + ); + for my $result (@parsed_results) { + s/^parsed_results_//; + s/\.txt$//; + my ($db,$keyword) = split /_/, $_, 2; + $state{done_keywords}{combined}{$db}{$keyword} = 1; + } + save_state(\%state); + ADVISE("Finished; results in $options{results}/combined_results"); +} +else { + ADVISE('Nothing to do. [Perhaps you wanted --restart-at?]'); +} + sub handle_action{ my ($state,$database,$queue) = @_; my $keyword; - my $actioned_keywords = (); - my $failed_keywords = (); + my $actioned_keywords = []; + my $failed_keywords = []; + DEBUG("Beginning to handle actions for state '$state' database '$database'"); while ($keyword = $queue->dequeue) { + DEBUG("Handling state '$state' database '$database' keyword '$keyword'"); # handle the action, baybee + if ($state eq 'get') { + my $command_fh; + eval { + open($command_fh,'|-', + "$base_dir/get_${database}_results", + ) or die "unable to execute '$base_dir/get_${database}_results'"; + print {$command_fh} "$keyword\n" or die "unable to print $keyword to 'get_${database}_results'"; + close($command_fh) or die "Unable to close filehandle"; + if ($? != 0) { + die "get_${database}_results with keyword $keyword failed with error code ".($?>>8); + } + }; + if ($@) { + WARN($@); + push @{$failed_keywords}, $keyword; + next; + } + } + elsif ($state eq 'parse') { + eval { + write_command_to_file("parsed_results_${database}_${keyword}.txt", + "$base_dir/parse_${database}_results", + '--keywords', + $keyword, + ); + }; + if ($@) { + WARN("parse_${database}_results failed with $@"); + push @{$failed_keywords}, $keyword; + next; + } + } + else { + die "I don't know how to handle state $state"; + } ADVISE("$state results from '$database' for '$keyword'"); push @{$actioned_keywords},$keyword; } - return ($actioned_keywords,$failed_keywords); + return [$actioned_keywords,$failed_keywords]; } sub save_state{ @@ -276,6 +348,19 @@ sub save_state{ close $state_fh or die "Unable to close state file: $!"; } +sub write_command_to_file{ + my ($file,@command) = @_; + my $fh = IO::File->new($file,'w') or + die "Unable to open $file for writing: $!"; + my $command_fh; + open($command_fh,'-|', + @command, + ) or die "Unable to execute $command[0] $!"; + print {$fh} <$command_fh>; + close $fh; + close $command_fh or die "$command[0] failed with ".($?>>8); +} + sub ADVISE{ print STDOUT map {($_,qq(\n))} @_;