2 # do_it_all, is part of the gene search suite and is released
3 # under the terms of the GPL version 2, or any later version, at your
4 # option. See the file README and COPYING for more information.
5 # Copyright 2007 by Don Armstrong <don@donarmstrong.com>.
6 # $Id: perl_script 495 2006-08-10 08:02:01Z don $
19 do_it_all - Call out to each of the search modules to search for each
24 do_it_all --keywords keywords.txt --results gene_search_results
27 --keywords newline delineated list of keywords to search for
28 --results directory to store results in
29 --database databases to search
30 --restart-at mode to start searching at
31 --debug, -d debugging level (Default 0)
32 --help, -h display this help
33 --man, -m display manual
41 A file which contains a newline delinated list of keywords to search
42 for. Can be specified multiple times. Lines starting with # or ; are
47 Directory in which to store results; also stores the current state of
52 Databases to search, can be specified multiple times. [Defaults to
53 NCBI, GeneCards and Harvester, the only currently supported
58 If you need to restart the process at a particular state (which has
59 already been completed) specify this option.
63 Debug verbosity. (Default 0)
67 Display brief useage information.
83 my %options = (databases => [],
91 GetOptions(\%options,'keywords=s@','databases=s@',
92 'restart_at|restart-at=s',
93 'debug|d+','help|h|?','man|m');
95 pod2usage() if $options{help};
96 pod2usage({verbose=>2}) if $options{man};
100 $ERRORS.="restart-at must be one of get, parse or combine\n" if
101 exists $options{restart_at} and $options{restart_at} !~ /^(?:get|parse|combine)$/;
103 $ERRORS.="unknown database(s)" if
104 @{$options{databases}} and
105 grep {$_ !~ /^(?:ncbi|genecards|harvester)$/i} @{$options{databases}};
107 if (not length $options{directory}) {
108 $ERRORS.="directory not specified";
110 elsif (not -d $options{directory} or not -w $options{directory}) {
111 $ERRORS.="directory $options{directory} does not exist or is not writeable";
114 pod2usage($ERRORS) if length $ERRORS;
116 if (not @{$options{databases}}) {
117 $options{databases} = [qw(ncbi genecards harvester)]
120 $DEBUG = $options{debug};
122 # There are three states for our engine
127 # first, check to see if the state in the result directory exists
131 if (-e "$options{directory}/do_it_all_state") {
132 ADVISE("Using existing state information");
133 my $state_fh = IO::File->new("$options{directory}/do_it_all_state",'r') or die
134 "Unable to open state file for reading: $!";
136 my $state_file = <$state_fh> or die "Unabel to read state file $!";
137 %state = %{thaw($state_file)} or die "Unable to thaw state file";
140 ADVISE("Starting new run");
141 %state = (keywords => [],
142 databases => [map {lc($_)} @{$options{databases}}],
143 gotten_keywords => {},
144 parsed_keywords => {},
145 combined_keywords => {},
150 if (@{$options{keywords}}) {
153 @old_keywords{@{$state{keywords}}} = (1) x @{$state{keywords}};
154 for my $keyword_file (@{$options{keywords}}) {
155 my $keyword_fh = IO::File->new($keyword_file,'r') or die
156 "Unable to open $keyword_file for reading: $!";
158 while (<$keyword_fh>) {
161 if (not $old_keywords{$_}) {
162 DEBUG("Adding new keyword '$_'");
163 push @new_keywords, $_;
166 DEBUG("Not adding duplicate keyword '$_'");
172 if (exists $options{restart_at} and length $options{restart_at}) {
173 if (lc($options{restart_at}) eq 'get') {
174 delete $state{gotten_keywords};
175 delete $state{parsed_keywords};
176 delete $state{combined_keywords};
178 elsif (lc($options{restart_at}) eq 'parse') {
179 delete $state{parsed_keywords};
180 delete $state{combined_keywords};
182 elsif (lc($options{restart_at}) eq 'combine') {
183 delete $state{combined_keywords};
187 # now we need to figure out what has to happen
188 # for each keyword, we check to see if we've got results, parsed
189 # results, and combined it. If not, we queue up those actions.
192 my @parse_needed = ();
193 my $combine_needed = 0;
195 for my $keyword (@{$state{keywords}}) {
196 for my $database (@{$state{databases}}) {
197 if (not exists $state{gotten_keywords}{$database}{$keyword}) {
198 push @get_needed,[$database,$keyword];
199 delete $state{parsed_keywords}{$database}{$keyword} if
200 exists $state{gotten_keywords}{$database}{$keyword};
201 delete $state{combined_keywords}{$database}{$keyword} if
202 exists $state{gotten_keywords}{$database}{$keyword};
204 if (not exists $state{parsed_keywords}{$database}{$keyword}) {
205 push @parse_needed,[$database,$keyword];
206 delete $state{combined_keywords}{$database}{$keyword} if
207 exists $state{gotten_keywords}{$database}{$keyword};
209 if (not exists $state{combined_keywords}{$database}{$keyword}) {
215 # handle getting needed results
216 for my $action (@get_needed) {
219 # handle parsing needed results
220 for my $action (@parse_needed) {
223 # handle combining results
228 print STDOUT map {($_,qq(\n))} @_;
232 print STDERR map {($_,qq(\n))} @_;
237 print STDERR map {($_,qq(\n))} @_;