X-Git-Url: https://git.donarmstrong.com/?p=function2gene.git;a=blobdiff_plain;f=bin%2Fdo_it_all;fp=bin%2Fdo_it_all;h=0000000000000000000000000000000000000000;hp=9a681ecd6047aac77cccf84d37e62894b88abd6e;hb=3557eb364c40602a2f2d3f4c2a68edd6ee00b632;hpb=4ec55f9073ff175b7f591c66eed2ef91a274268c diff --git a/bin/do_it_all b/bin/do_it_all deleted file mode 100755 index 9a681ec..0000000 --- a/bin/do_it_all +++ /dev/null @@ -1,378 +0,0 @@ -#! /usr/bin/perl -# do_it_all, is part of the gene search suite and is released -# under the terms of the GPL version 2, or any later version, at your -# option. See the file README and COPYING for more information. -# Copyright 2007 by Don Armstrong . - - -use threads; -use warnings; -use strict; - -use Getopt::Long; -use Pod::Usage; - -use Storable; - -=head1 NAME - - do_it_all - Call out to each of the search modules to search for - each of the terms - -=head1 SYNOPSIS - - do_it_all --keywords keywords.txt --results gene_search_results - - Options: - --keywords newline delineated list of keywords to search for - --results directory to store results in - --database databases to search - --restart-at mode to start searching at - --debug, -d debugging level (Default 0) - --help, -h display this help - --man, -m display manual - -=head1 OPTIONS - -=over - -=item B<--keywords> - -A file which contains a newline delinated list of keywords to search -for. Can be specified multiple times. Lines starting with # or ; are -ignored. - -=item B<--results> - -Directory in which to store results; also stores the current state of -the system - -=item B<--database> - -Databases to search, can be specified multiple times. [Defaults to -NCBI, GeneCards and Harvester, the only currently supported -databases.] - -=item B<--restart-at> - -If you need to restart the process at a particular state (which has -already been completed) specify this option. - -=item B<--debug, -d> - -Debug verbosity. (Default 0) - -=item B<--help, -h> - -Display brief useage information. - -=item B<--man, -m> - -Display this manual. - -=back - -=head1 EXAMPLES - - -=cut - - -use vars qw($DEBUG); -use Cwd qw(abs_path); -use IO::File; -use Storable qw(thaw freeze); -use File::Basename qw(basename dirname); -use Thread::Queue; - -my %options = (databases => [], - keywords => [], - debug => 0, - help => 0, - man => 0, - results => '', - ); - -GetOptions(\%options,'keywords=s@','databases=s@', - 'restart_at|restart-at=s','results=s', - 'debug|d+','help|h|?','man|m'); - -pod2usage() if $options{help}; -pod2usage({verbose=>2}) if $options{man}; - -my $base_dir = dirname(abs_path($0)); - -my $ERRORS=''; - -$ERRORS.="restart-at must be one of get, parse or combine\n" if - exists $options{restart_at} and $options{restart_at} !~ /^(?:get|parse|combine)$/; - -$ERRORS.="unknown database(s)" if - @{$options{databases}} and - grep {$_ !~ /^(?:ncbi|genecard|harvester)$/i} @{$options{databases}}; - -if (not length $options{results}) { - $ERRORS.="results directory not specified"; -} -elsif (not -d $options{results} or not -w $options{results}) { - $ERRORS.="results directory $options{results} does not exist or is not writeable"; -} - -pod2usage($ERRORS) if length $ERRORS; - -if (not @{$options{databases}}) { - $options{databases} = [qw(ncbi genecard harvester)] -} - -$DEBUG = $options{debug}; - -# There are three states for our engine -# Getting results -# Parsing them -# Combining results - -# first, check to see if the state in the result directory exists - -my %state; - -$options{keywords} = [map {abs_path($_)} @{$options{keywords}}]; - -chdir $options{results} or die "Unable to chdir to $options{results}"; - -if (-e "do_it_all_state") { - ADVISE("Using existing state information"); - my $state_fh = IO::File->new("do_it_all_state",'r') or die - "Unable to open state file for reading: $!"; - local $/; - my $state_file = <$state_fh>; - %state = %{thaw($state_file)} or die "Unable to thaw state file"; -} -else { - ADVISE("Starting new run"); - %state = (keywords => [], - databases => [map {lc($_)} @{$options{databases}}], - done_keywords => { - get => {}, - parse => {}, - combine => {}, - }, - ); -} - -my @new_keywords; -if (@{$options{keywords}}) { - # uniqify keywords - my %old_keywords; - @old_keywords{@{$state{keywords}}} = (1) x @{$state{keywords}}; - for my $keyword_file (@{$options{keywords}}) { - my $keyword_fh = IO::File->new($keyword_file,'r') or die - "Unable to open $keyword_file for reading: $!"; - while (<$keyword_fh>) { - next if /^\s*[#;]/; - next unless /\w+/; - chomp; - if (not $old_keywords{$_}) { - DEBUG("Adding new keyword '$_'"); - push @new_keywords, $_; - } - else { - DEBUG("Not adding duplicate keyword '$_'"); - } - } - } - push @{$state{keywords}},@new_keywords; -} - -if (exists $options{restart_at} and length $options{restart_at}) { - if (lc($options{restart_at}) eq 'get') { - delete $state{done_keywords}{get}; - delete $state{done_keywords}{parse}; - delete $state{done_keywords}{combine}; - } - elsif (lc($options{restart_at}) eq 'parse') { - delete $state{done_keywords}{parse}; - delete $state{done_keywords}{combine}; - } - elsif (lc($options{restart_at}) eq 'combine') { - delete $state{done_keywords}{combine}; - } -} - -# now we need to figure out what has to happen -# for each keyword, we check to see if we've got results, parsed -# results, and combined it. If not, we queue up those actions. - -my %actions = (combine => 0, - get => {}, - parse => {}, - ); - -if (not @{$state{keywords}}) { - ADVISE("There are no keywords specified"); -} - -for my $keyword (@{$state{keywords}}) { - for my $database (@{$state{databases}}) { - if (not exists $state{done_keywords}{get}{$database}{$keyword}) { - push @{$actions{get}{$database}}, $keyword; - delete $state{done_keywords}{parse}{$database}{$keyword} if - exists $state{done_keywords}{parse}{$database}{$keyword}; - delete $state{done_keywords}{combine}{$database}{$keyword} if - exists $state{done_keywords}{combine}{$database}{$keyword}; - } - if (not exists $state{done_keywords}{parse}{$database}{$keyword}) { - push @{$actions{parse}{$database}},$keyword; - delete $state{done_keywords}{combine}{$database}{$keyword} if - exists $state{done_keywords}{combine}{$database}{$keyword}; - } - if (not exists $state{done_keywords}{combine}{$database}{$keyword}) { - $actions{combine} = 1; - } - } -} - - -for my $state (qw(get parse)) { - my %databases; - for my $database (keys %{$actions{$state}}) { - next unless @{$actions{$state}{$database}}; - $databases{$database}{queue} = Thread::Queue->new - or die "Unable to create new thread queue"; - $databases{$database}{thread} = threads->create(\&handle_action,$state,$database,$databases{$database}{queue}) - or die "Unable to create new thread"; - $databases{$database}{queue}->enqueue(@{$actions{$state}{$database}}); - $databases{$database}{queue}->enqueue(undef); - } - my $ERRORS=0; - for my $database (keys %databases) { - my ($actioned_keywords,$failed_keywords) = @{$databases{$database}{thread}->join||[]}; - if (not defined $failed_keywords) { - ADVISE("Something bad happened during '$state' of '$database'"); - $ERRORS = 1; - } - elsif (@{$failed_keywords}) { - ADVISE("These keywords failed during '$state' of '$database':",@{$failed_keywords}); - $ERRORS=1; - } - @{$state{done_keywords}{$state}{$database}}{@{$actioned_keywords}} = (1) x @{$actioned_keywords}; - delete @{$state{done_keywords}{$state}{$database}}{@{$failed_keywords}}; - } - save_state(\%state); - if ($ERRORS) { - WARN("Stoping, as there are errors"); - exit 1; - } -} - -if ($actions{combine}) { - save_state(\%state); - # deal with combining results - my @parsed_results = map { my $db = $_; - map { - "parsed_results_${db}_${_}.txt" - } keys %{$state{done_keywords}{parse}{$db}} - } keys %{$state{done_keywords}{parse}}; - - write_command_to_file('combined_results.txt', - "$base_dir/combine_results", - @parsed_results, - ); - for my $result (@parsed_results) { - s/^parsed_results_//; - s/\.txt$//; - my ($db,$keyword) = split /_/, $_, 2; - $state{done_keywords}{combined}{$db}{$keyword} = 1; - } - save_state(\%state); - ADVISE("Finished; results in $options{results}/combined_results"); -} -else { - ADVISE('Nothing to do. [Perhaps you wanted --restart-at?]'); -} - -sub handle_action{ - my ($state,$database,$queue) = @_; - my $keyword; - my $actioned_keywords = []; - my $failed_keywords = []; - DEBUG("Beginning to handle actions for state '$state' database '$database'"); - while ($keyword = $queue->dequeue) { - DEBUG("Handling state '$state' database '$database' keyword '$keyword'"); - # handle the action, baybee - if ($state eq 'get') { - my $command_fh; - eval { - open($command_fh,'|-', - "$base_dir/get_${database}_results", - ) or die "unable to execute '$base_dir/get_${database}_results'"; - print {$command_fh} "$keyword\n" or die "unable to print $keyword to 'get_${database}_results'"; - close($command_fh) or die "Unable to close filehandle"; - if ($? != 0) { - die "get_${database}_results with keyword $keyword failed with error code ".($?>>8); - } - }; - if ($@) { - WARN($@); - push @{$failed_keywords}, $keyword; - next; - } - } - elsif ($state eq 'parse') { - eval { - write_command_to_file("parsed_results_${database}_${keyword}.txt", - "$base_dir/parse_${database}_results", - '--keywords', - $keyword, - ); - }; - if ($@) { - WARN("parse_${database}_results failed with $@"); - push @{$failed_keywords}, $keyword; - next; - } - } - else { - die "I don't know how to handle state $state"; - } - ADVISE("$state results from '$database' for '$keyword'"); - push @{$actioned_keywords},$keyword; - } - return [$actioned_keywords,$failed_keywords]; -} - -sub save_state{ - my ($state) = @_; - my $state_fh = IO::File->new("do_it_all_state",'w') or die - "Unable to open state file for writing: $!"; - print {$state_fh} freeze($state) or die "Unable to freeze state file"; - close $state_fh or die "Unable to close state file: $!"; -} - -sub write_command_to_file{ - my ($file,@command) = @_; - my $fh = IO::File->new($file,'w') or - die "Unable to open $file for writing: $!"; - my $command_fh; - open($command_fh,'-|', - @command, - ) or die "Unable to execute $command[0] $!"; - print {$fh} <$command_fh>; - close $fh; - close $command_fh or die "$command[0] failed with ".($?>>8); -} - - -sub ADVISE{ - print STDOUT map {($_,qq(\n))} @_; -} - -sub DEBUG{ - print STDERR map {($_,qq(\n))} @_; -} - - -sub WARN { - print STDERR map {($_,qq(\n))} @_; -} - -__END__