+++ /dev/null
-#! /usr/bin/perl
-# do_it_all, is part of the gene search suite and is released
-# under the terms of the GPL version 2, or any later version, at your
-# option. See the file README and COPYING for more information.
-# Copyright 2007 by Don Armstrong <don@donarmstrong.com>.
-
-
-use threads;
-use warnings;
-use strict;
-
-use Getopt::Long;
-use Pod::Usage;
-
-use Storable;
-
-=head1 NAME
-
- do_it_all - Call out to each of the search modules to search for
- each of the terms
-
-=head1 SYNOPSIS
-
- do_it_all --keywords keywords.txt --results gene_search_results
-
- Options:
- --keywords newline delineated list of keywords to search for
- --results directory to store results in
- --database databases to search
- --restart-at mode to start searching at
- --debug, -d debugging level (Default 0)
- --help, -h display this help
- --man, -m display manual
-
-=head1 OPTIONS
-
-=over
-
-=item B<--keywords>
-
-A file which contains a newline delinated list of keywords to search
-for. Can be specified multiple times. Lines starting with # or ; are
-ignored.
-
-=item B<--results>
-
-Directory in which to store results; also stores the current state of
-the system
-
-=item B<--database>
-
-Databases to search, can be specified multiple times. [Defaults to
-NCBI, GeneCards and Harvester, the only currently supported
-databases.]
-
-=item B<--restart-at>
-
-If you need to restart the process at a particular state (which has
-already been completed) specify this option.
-
-=item B<--debug, -d>
-
-Debug verbosity. (Default 0)
-
-=item B<--help, -h>
-
-Display brief useage information.
-
-=item B<--man, -m>
-
-Display this manual.
-
-=back
-
-=head1 EXAMPLES
-
-
-=cut
-
-
-use vars qw($DEBUG);
-use Cwd qw(abs_path);
-use IO::File;
-use Storable qw(thaw freeze);
-use File::Basename qw(basename dirname);
-use Thread::Queue;
-
-my %options = (databases => [],
- keywords => [],
- debug => 0,
- help => 0,
- man => 0,
- results => '',
- );
-
-GetOptions(\%options,'keywords=s@','databases=s@',
- 'restart_at|restart-at=s','results=s',
- 'debug|d+','help|h|?','man|m');
-
-pod2usage() if $options{help};
-pod2usage({verbose=>2}) if $options{man};
-
-my $base_dir = dirname(abs_path($0));
-
-my $ERRORS='';
-
-$ERRORS.="restart-at must be one of get, parse or combine\n" if
- exists $options{restart_at} and $options{restart_at} !~ /^(?:get|parse|combine)$/;
-
-$ERRORS.="unknown database(s)" if
- @{$options{databases}} and
- grep {$_ !~ /^(?:ncbi|genecard|harvester)$/i} @{$options{databases}};
-
-if (not length $options{results}) {
- $ERRORS.="results directory not specified";
-}
-elsif (not -d $options{results} or not -w $options{results}) {
- $ERRORS.="results directory $options{results} does not exist or is not writeable";
-}
-
-pod2usage($ERRORS) if length $ERRORS;
-
-if (not @{$options{databases}}) {
- $options{databases} = [qw(ncbi genecard harvester)]
-}
-
-$DEBUG = $options{debug};
-
-# There are three states for our engine
-# Getting results
-# Parsing them
-# Combining results
-
-# first, check to see if the state in the result directory exists
-
-my %state;
-
-$options{keywords} = [map {abs_path($_)} @{$options{keywords}}];
-
-chdir $options{results} or die "Unable to chdir to $options{results}";
-
-if (-e "do_it_all_state") {
- ADVISE("Using existing state information");
- my $state_fh = IO::File->new("do_it_all_state",'r') or die
- "Unable to open state file for reading: $!";
- local $/;
- my $state_file = <$state_fh>;
- %state = %{thaw($state_file)} or die "Unable to thaw state file";
-}
-else {
- ADVISE("Starting new run");
- %state = (keywords => [],
- databases => [map {lc($_)} @{$options{databases}}],
- done_keywords => {
- get => {},
- parse => {},
- combine => {},
- },
- );
-}
-
-my @new_keywords;
-if (@{$options{keywords}}) {
- # uniqify keywords
- my %old_keywords;
- @old_keywords{@{$state{keywords}}} = (1) x @{$state{keywords}};
- for my $keyword_file (@{$options{keywords}}) {
- my $keyword_fh = IO::File->new($keyword_file,'r') or die
- "Unable to open $keyword_file for reading: $!";
- while (<$keyword_fh>) {
- next if /^\s*[#;]/;
- next unless /\w+/;
- chomp;
- if (not $old_keywords{$_}) {
- DEBUG("Adding new keyword '$_'");
- push @new_keywords, $_;
- }
- else {
- DEBUG("Not adding duplicate keyword '$_'");
- }
- }
- }
- push @{$state{keywords}},@new_keywords;
-}
-
-if (exists $options{restart_at} and length $options{restart_at}) {
- if (lc($options{restart_at}) eq 'get') {
- delete $state{done_keywords}{get};
- delete $state{done_keywords}{parse};
- delete $state{done_keywords}{combine};
- }
- elsif (lc($options{restart_at}) eq 'parse') {
- delete $state{done_keywords}{parse};
- delete $state{done_keywords}{combine};
- }
- elsif (lc($options{restart_at}) eq 'combine') {
- delete $state{done_keywords}{combine};
- }
-}
-
-# now we need to figure out what has to happen
-# for each keyword, we check to see if we've got results, parsed
-# results, and combined it. If not, we queue up those actions.
-
-my %actions = (combine => 0,
- get => {},
- parse => {},
- );
-
-if (not @{$state{keywords}}) {
- ADVISE("There are no keywords specified");
-}
-
-for my $keyword (@{$state{keywords}}) {
- for my $database (@{$state{databases}}) {
- if (not exists $state{done_keywords}{get}{$database}{$keyword}) {
- push @{$actions{get}{$database}}, $keyword;
- delete $state{done_keywords}{parse}{$database}{$keyword} if
- exists $state{done_keywords}{parse}{$database}{$keyword};
- delete $state{done_keywords}{combine}{$database}{$keyword} if
- exists $state{done_keywords}{combine}{$database}{$keyword};
- }
- if (not exists $state{done_keywords}{parse}{$database}{$keyword}) {
- push @{$actions{parse}{$database}},$keyword;
- delete $state{done_keywords}{combine}{$database}{$keyword} if
- exists $state{done_keywords}{combine}{$database}{$keyword};
- }
- if (not exists $state{done_keywords}{combine}{$database}{$keyword}) {
- $actions{combine} = 1;
- }
- }
-}
-
-
-for my $state (qw(get parse)) {
- my %databases;
- for my $database (keys %{$actions{$state}}) {
- next unless @{$actions{$state}{$database}};
- $databases{$database}{queue} = Thread::Queue->new
- or die "Unable to create new thread queue";
- $databases{$database}{thread} = threads->create(\&handle_action,$state,$database,$databases{$database}{queue})
- or die "Unable to create new thread";
- $databases{$database}{queue}->enqueue(@{$actions{$state}{$database}});
- $databases{$database}{queue}->enqueue(undef);
- }
- my $ERRORS=0;
- for my $database (keys %databases) {
- my ($actioned_keywords,$failed_keywords) = @{$databases{$database}{thread}->join||[]};
- if (not defined $failed_keywords) {
- ADVISE("Something bad happened during '$state' of '$database'");
- $ERRORS = 1;
- }
- elsif (@{$failed_keywords}) {
- ADVISE("These keywords failed during '$state' of '$database':",@{$failed_keywords});
- $ERRORS=1;
- }
- @{$state{done_keywords}{$state}{$database}}{@{$actioned_keywords}} = (1) x @{$actioned_keywords};
- delete @{$state{done_keywords}{$state}{$database}}{@{$failed_keywords}};
- }
- save_state(\%state);
- if ($ERRORS) {
- WARN("Stoping, as there are errors");
- exit 1;
- }
-}
-
-if ($actions{combine}) {
- save_state(\%state);
- # deal with combining results
- my @parsed_results = map { my $db = $_;
- map {
- "parsed_results_${db}_${_}.txt"
- } keys %{$state{done_keywords}{parse}{$db}}
- } keys %{$state{done_keywords}{parse}};
-
- write_command_to_file('combined_results.txt',
- "$base_dir/combine_results",
- @parsed_results,
- );
- for my $result (@parsed_results) {
- s/^parsed_results_//;
- s/\.txt$//;
- my ($db,$keyword) = split /_/, $_, 2;
- $state{done_keywords}{combined}{$db}{$keyword} = 1;
- }
- save_state(\%state);
- ADVISE("Finished; results in $options{results}/combined_results");
-}
-else {
- ADVISE('Nothing to do. [Perhaps you wanted --restart-at?]');
-}
-
-sub handle_action{
- my ($state,$database,$queue) = @_;
- my $keyword;
- my $actioned_keywords = [];
- my $failed_keywords = [];
- DEBUG("Beginning to handle actions for state '$state' database '$database'");
- while ($keyword = $queue->dequeue) {
- DEBUG("Handling state '$state' database '$database' keyword '$keyword'");
- # handle the action, baybee
- if ($state eq 'get') {
- my $command_fh;
- eval {
- open($command_fh,'|-',
- "$base_dir/get_${database}_results",
- ) or die "unable to execute '$base_dir/get_${database}_results'";
- print {$command_fh} "$keyword\n" or die "unable to print $keyword to 'get_${database}_results'";
- close($command_fh) or die "Unable to close filehandle";
- if ($? != 0) {
- die "get_${database}_results with keyword $keyword failed with error code ".($?>>8);
- }
- };
- if ($@) {
- WARN($@);
- push @{$failed_keywords}, $keyword;
- next;
- }
- }
- elsif ($state eq 'parse') {
- eval {
- write_command_to_file("parsed_results_${database}_${keyword}.txt",
- "$base_dir/parse_${database}_results",
- '--keywords',
- $keyword,
- );
- };
- if ($@) {
- WARN("parse_${database}_results failed with $@");
- push @{$failed_keywords}, $keyword;
- next;
- }
- }
- else {
- die "I don't know how to handle state $state";
- }
- ADVISE("$state results from '$database' for '$keyword'");
- push @{$actioned_keywords},$keyword;
- }
- return [$actioned_keywords,$failed_keywords];
-}
-
-sub save_state{
- my ($state) = @_;
- my $state_fh = IO::File->new("do_it_all_state",'w') or die
- "Unable to open state file for writing: $!";
- print {$state_fh} freeze($state) or die "Unable to freeze state file";
- close $state_fh or die "Unable to close state file: $!";
-}
-
-sub write_command_to_file{
- my ($file,@command) = @_;
- my $fh = IO::File->new($file,'w') or
- die "Unable to open $file for writing: $!";
- my $command_fh;
- open($command_fh,'-|',
- @command,
- ) or die "Unable to execute $command[0] $!";
- print {$fh} <$command_fh>;
- close $fh;
- close $command_fh or die "$command[0] failed with ".($?>>8);
-}
-
-
-sub ADVISE{
- print STDOUT map {($_,qq(\n))} @_;
-}
-
-sub DEBUG{
- print STDERR map {($_,qq(\n))} @_;
-}
-
-
-sub WARN {
- print STDERR map {($_,qq(\n))} @_;
-}
-
-__END__