2 # do_it_all, is part of the gene search suite and is released
3 # under the terms of the GPL version 2, or any later version, at your
4 # option. See the file README and COPYING for more information.
5 # Copyright 2007 by Don Armstrong <don@donarmstrong.com>.
6 # $Id: perl_script 495 2006-08-10 08:02:01Z don $
19 do_it_all - Call out to each of the search modules to search for each
24 do_it_all --keywords keywords.txt --results gene_search_results
27 --keywords newline delineated list of keywords to search for
28 --results directory to store results in
29 --database databases to search
30 --restart-at mode to start searching at
31 --debug, -d debugging level (Default 0)
32 --help, -h display this help
33 --man, -m display manual
41 A file which contains a newline delinated list of keywords to search
42 for. Can be specified multiple times. Lines starting with # or ; are
47 Directory in which to store results; also stores the current state of
52 Databases to search, can be specified multiple times. [Defaults to
53 NCBI, GeneCards and Harvester, the only currently supported
58 If you need to restart the process at a particular state (which has
59 already been completed) specify this option.
63 Debug verbosity. (Default 0)
67 Display brief useage information.
84 use Storable qw(thaw freeze);
86 my %options = (databases => [],
94 GetOptions(\%options,'keywords=s@','databases=s@',
95 'restart_at|restart-at=s','results=s',
96 'debug|d+','help|h|?','man|m');
98 pod2usage() if $options{help};
99 pod2usage({verbose=>2}) if $options{man};
103 $ERRORS.="restart-at must be one of get, parse or combine\n" if
104 exists $options{restart_at} and $options{restart_at} !~ /^(?:get|parse|combine)$/;
106 $ERRORS.="unknown database(s)" if
107 @{$options{databases}} and
108 grep {$_ !~ /^(?:ncbi|genecards|harvester)$/i} @{$options{databases}};
110 if (not length $options{results}) {
111 $ERRORS.="results directory not specified";
113 elsif (not -d $options{results} or not -w $options{results}) {
114 $ERRORS.="results directory $options{results} does not exist or is not writeable";
117 pod2usage($ERRORS) if length $ERRORS;
119 if (not @{$options{databases}}) {
120 $options{databases} = [qw(ncbi genecards harvester)]
123 $DEBUG = $options{debug};
125 # There are three states for our engine
130 # first, check to see if the state in the result directory exists
134 $options{keywords} = [map {abs_path($_)} @{$options{keywords}}];
136 chdir $options{results} or die "Unable to chdir to $options{results}";
138 if (-e "do_it_all_state") {
139 ADVISE("Using existing state information");
140 my $state_fh = IO::File->new("do_it_all_state",'r') or die
141 "Unable to open state file for reading: $!";
143 my $state_file = <$state_fh>;
144 %state = %{thaw($state_file)} or die "Unable to thaw state file";
147 ADVISE("Starting new run");
148 %state = (keywords => [],
149 databases => [map {lc($_)} @{$options{databases}}],
159 if (@{$options{keywords}}) {
162 @old_keywords{@{$state{keywords}}} = (1) x @{$state{keywords}};
163 for my $keyword_file (@{$options{keywords}}) {
164 my $keyword_fh = IO::File->new($keyword_file,'r') or die
165 "Unable to open $keyword_file for reading: $!";
167 while (<$keyword_fh>) {
170 if (not $old_keywords{$_}) {
171 DEBUG("Adding new keyword '$_'");
172 push @new_keywords, $_;
175 DEBUG("Not adding duplicate keyword '$_'");
181 if (exists $options{restart_at} and length $options{restart_at}) {
182 if (lc($options{restart_at}) eq 'get') {
183 delete $state{gotten_keywords};
184 delete $state{parsed_keywords};
185 delete $state{combined_keywords};
187 elsif (lc($options{restart_at}) eq 'parse') {
188 delete $state{parsed_keywords};
189 delete $state{combined_keywords};
191 elsif (lc($options{restart_at}) eq 'combine') {
192 delete $state{combined_keywords};
196 # now we need to figure out what has to happen
197 # for each keyword, we check to see if we've got results, parsed
198 # results, and combined it. If not, we queue up those actions.
200 my %actions = (combine => 0,
205 if (not @{$state{keywords}}) {
206 ADVISE("There are no keywords specified");
209 for my $keyword (@{$state{keywords}}) {
210 for my $database (@{$state{databases}}) {
211 if (not exists $state{done_keywords}{get}{$database}{$keyword}) {
212 push @{$actions{get}{$database}}, $keyword;
213 delete $state{done_keywords}{parse}{$database}{$keyword} if
214 exists $state{done_keywords}{parse}{$database}{$keyword};
215 delete $state{done_keywords}{combine}{$database}{$keyword} if
216 exists $state{done_keywords}{combine}{$database}{$keyword};
218 if (not exists $state{done_keywords}{parse}{$database}{$keyword}) {
219 push @{$actions{parse}{$database}},$keyword;
220 delete $state{done_keywords}{combine}{$database}{$keyword} if
221 exists $state{done_keywords}{combine}{$database}{$keyword};
223 if (not exists $state{done_keywords}{combine}{$database}{$keyword}) {
224 $actions{combine} = 1;
232 for my $state (qw(get parse)) {
234 for my $database (keys %{$actions{$state}}) {
235 next unless @{$actions{$state}{$database}};
236 $databases{$database}{queue} = Thread::Queue->new;
237 $databases{$database}{thread} = threads->new(\&handle_action($state,$database,$databases{database}{queue}));
238 $databases{$database}{queue}->enqueue(@{$actions{$state}{$database}});
239 $databases{$database}{queue}->enqueue(undef);
242 for my $database (keys %databases) {
243 my ($actioned_keywords,$failed_keywords) = $databases{$database}{thread}->join;
244 if (@{$failed_keywords}) {
245 ADVISE("These keywords failed during '$state' of '$database':",@{$failed_keywords});
248 @{$state{done_keywords}{$state}{$database}}{@{$actioned_keywords}} = (1) x @{$actioned_keywords};
249 delete @{$state{done_keywords}{$state}{$database}}{@{$failed_keywords}};
253 WARN("Stoping, as there are errors");
259 my ($state,$database,$queue) = @_;
261 my $actioned_keywords = ();
262 my $failed_keywords = ();
263 while ($keyword = $queue->dequeue) {
264 # handle the action, baybee
265 ADVISE("$state results from '$database' for '$keyword'");
266 push @{$actioned_keywords},$keyword;
268 return ($actioned_keywords,$failed_keywords);
273 my $state_fh = IO::File->new("do_it_all_state",'w') or die
274 "Unable to open state file for writing: $!";
275 print {$state_fh} freeze($state) or die "Unable to freeze state file";
276 close $state_fh or die "Unable to close state file: $!";
281 print STDOUT map {($_,qq(\n))} @_;
285 print STDERR map {($_,qq(\n))} @_;
290 print STDERR map {($_,qq(\n))} @_;