From: Don Armstrong Date: Fri, 24 Aug 2007 09:12:28 +0000 (+0000) Subject: add do it all script to call other modules X-Git-Url: https://git.donarmstrong.com/?p=function2gene.git;a=commitdiff_plain;h=15e77fec798b027d7708003da1c099c694c6a45e add do it all script to call other modules git-svn-id: file:///srv/svn/function2gene/trunk@3 a0738b58-4706-0410-8799-fb830574a030 --- diff --git a/bin/do_it_all b/bin/do_it_all new file mode 100755 index 0000000..b85356d --- /dev/null +++ b/bin/do_it_all @@ -0,0 +1,240 @@ +#! /usr/bin/perl +# do_it_all, is part of the gene search suite and is released +# under the terms of the GPL version 2, or any later version, at your +# option. See the file README and COPYING for more information. +# Copyright 2007 by Don Armstrong . +# $Id: perl_script 495 2006-08-10 08:02:01Z don $ + + +use warnings; +use strict; + +use Getopt::Long; +use Pod::Usage; + +use Storable; + +=head1 NAME + +do_it_all - Call out to each of the search modules to search for each +of the terms + +=head1 SYNOPSIS + + do_it_all --keywords keywords.txt --results gene_search_results + + Options: + --keywords newline delineated list of keywords to search for + --results directory to store results in + --database databases to search + --restart-at mode to start searching at + --debug, -d debugging level (Default 0) + --help, -h display this help + --man, -m display manual + +=head1 OPTIONS + +=over + +=item B<--keywords> + +A file which contains a newline delinated list of keywords to search +for. Can be specified multiple times. Lines starting with # or ; are +ignored. + +=item B<--results> + +Directory in which to store results; also stores the current state of +the system + +=item B<--database> + +Databases to search, can be specified multiple times. [Defaults to +NCBI, GeneCards and Harvester, the only currently supported +databases.] + +=item B<--restart-at> + +If you need to restart the process at a particular state (which has +already been completed) specify this option. + +=item B<--debug, -d> + +Debug verbosity. (Default 0) + +=item B<--help, -h> + +Display brief useage information. + +=item B<--man, -m> + +Display this manual. + +=back + +=head1 EXAMPLES + + +=cut + + +use vars qw($DEBUG); + +my %options = (databases => [], + keywords => [], + debug => 0, + help => 0, + man => 0, + directory => '', + ); + +GetOptions(\%options,'keywords=s@','databases=s@', + 'restart_at|restart-at=s', + 'debug|d+','help|h|?','man|m'); + +pod2usage() if $options{help}; +pod2usage({verbose=>2}) if $options{man}; + +my $ERRORS=''; + +$ERRORS.="restart-at must be one of get, parse or combine\n" if + exists $options{restart_at} and $options{restart_at} !~ /^(?:get|parse|combine)$/; + +$ERRORS.="unknown database(s)" if + @{$options{databases}} and + grep {$_ !~ /^(?:ncbi|genecards|harvester)$/i} @{$options{databases}}; + +if (not length $options{directory}) { + $ERRORS.="directory not specified"; +} +elsif (not -d $options{directory} or not -w $options{directory}) { + $ERRORS.="directory $options{directory} does not exist or is not writeable"; +} + +pod2usage($ERRORS) if length $ERRORS; + +if (not @{$options{databases}}) { + $options{databases} = [qw(ncbi genecards harvester)] +} + +$DEBUG = $options{debug}; + +# There are three states for our engine +# Getting results +# Parsing them +# Combining results + +# first, check to see if the state in the result directory exists + +my %state; + +if (-e "$options{directory}/do_it_all_state") { + ADVISE("Using existing state information"); + my $state_fh = IO::File->new("$options{directory}/do_it_all_state",'r') or die + "Unable to open state file for reading: $!"; + local $/; + my $state_file = <$state_fh> or die "Unabel to read state file $!"; + %state = %{thaw($state_file)} or die "Unable to thaw state file"; +} +else { + ADVISE("Starting new run"); + %state = (keywords => [], + databases => [map {lc($_)} @{$options{databases}}], + gotten_keywords => {}, + parsed_keywords => {}, + combined_keywords => {}, + ); +} + +my @new_keywords; +if (@{$options{keywords}}) { + # uniqify keywords + my %old_keywords; + @old_keywords{@{$state{keywords}}} = (1) x @{$state{keywords}}; + for my $keyword_file (@{$options{keywords}}) { + my $keyword_fh = IO::File->new($keyword_file,'r') or die + "Unable to open $keyword_file for reading: $!"; + local $/; + while (<$keyword_fh>) { + next if /^\s*[#;]/; + chomp; + if (not $old_keywords{$_}) { + DEBUG("Adding new keyword '$_'"); + push @new_keywords, $_; + } + else { + DEBUG("Not adding duplicate keyword '$_'"); + } + } + } +} + +if (exists $options{restart_at} and length $options{restart_at}) { + if (lc($options{restart_at}) eq 'get') { + delete $state{gotten_keywords}; + delete $state{parsed_keywords}; + delete $state{combined_keywords}; + } + elsif (lc($options{restart_at}) eq 'parse') { + delete $state{parsed_keywords}; + delete $state{combined_keywords}; + } + elsif (lc($options{restart_at}) eq 'combine') { + delete $state{combined_keywords}; + } +} + +# now we need to figure out what has to happen +# for each keyword, we check to see if we've got results, parsed +# results, and combined it. If not, we queue up those actions. + +my @get_needed = (); +my @parse_needed = (); +my $combine_needed = 0; + +for my $keyword (@{$state{keywords}}) { + for my $database (@{$state{databases}}) { + if (not exists $state{gotten_keywords}{$database}{$keyword}) { + push @get_needed,[$database,$keyword]; + delete $state{parsed_keywords}{$database}{$keyword} if + exists $state{gotten_keywords}{$database}{$keyword}; + delete $state{combined_keywords}{$database}{$keyword} if + exists $state{gotten_keywords}{$database}{$keyword}; + } + if (not exists $state{parsed_keywords}{$database}{$keyword}) { + push @parse_needed,[$database,$keyword]; + delete $state{combined_keywords}{$database}{$keyword} if + exists $state{gotten_keywords}{$database}{$keyword}; + } + if (not exists $state{combined_keywords}{$database}{$keyword}) { + $combine_needed = 1; + } + } +} + +# handle getting needed results +for my $action (@get_needed) { + +} +# handle parsing needed results +for my $action (@parse_needed) { +} + +# handle combining results + + + +sub ADVISE{ + print STDOUT map {($_,qq(\n))} @_; +} + +sub DEBUG{ + print STDERR map {($_,qq(\n))} @_; +} + + +sub WARN { + print STDERR map {($_,qq(\n))} @_; +} + +__END__