2 # function2gene, is part of the function2gene suite and is released
3 # under the terms of the GPL version 2, or any later version, at your
4 # option. See the file README and COPYING for more information.
5 # Copyright 2007 by Don Armstrong <don@donarmstrong.com>.
19 function2gene - Call out to each of the search modules to search for
24 function2gene --keywords keywords.txt --results gene_search_results
27 --keywords newline delineated list of keywords to search for
28 --results directory to store results in
29 --database databases to search
30 --restart-at mode to start searching at
31 --debug, -d debugging level (Default 0)
32 --help, -h display this help
33 --man, -m display manual
41 A file which contains a newline delinated list of keywords to search
42 for. Can be specified multiple times. Lines starting with # or ; are
47 Directory in which to store results; also stores the current state of
52 Databases to search, can be specified multiple times. [Defaults to
53 NCBI, GeneCards and Harvester, the only currently supported
58 If you need to restart the process at a particular state (which has
59 already been completed) specify this option. Valid values are get,
64 Debug verbosity. (Default 0)
68 Display brief useage information.
78 # Search all databases for transferrin
79 echo 'transferrin' > keywords.txt
80 function2gene --keywords keywords.txt --results keyword_results
83 function2gene --keywords keywords.txt --results keyword_results \
92 use Storable qw(thaw freeze);
93 use File::Basename qw(basename dirname);
96 my %options = (databases => [],
104 GetOptions(\%options,'keywords=s@','databases=s@',
105 'restart_at|restart-at=s','results=s',
106 'debug|d+','help|h|?','man|m');
108 pod2usage() if $options{help};
109 pod2usage({verbose=>2}) if $options{man};
111 my $base_dir = dirname(abs_path($0));
115 $ERRORS.="restart-at must be one of get, parse or combine\n" if
116 exists $options{restart_at} and $options{restart_at} !~ /^(?:get|parse|combine)$/;
118 $ERRORS.="unknown database(s)" if
119 @{$options{databases}} and
120 grep {$_ !~ /^(?:ncbi|genecard|harvester)$/i} @{$options{databases}};
122 if (not length $options{results}) {
123 $ERRORS.="results directory not specified";
125 elsif (not -d $options{results} or not -w $options{results}) {
126 $ERRORS.="results directory $options{results} does not exist or is not writeable";
129 pod2usage($ERRORS) if length $ERRORS;
131 if (not @{$options{databases}}) {
132 $options{databases} = [qw(ncbi genecard harvester)]
135 $DEBUG = $options{debug};
137 # There are three states for our engine
142 # first, check to see if the state in the result directory exists
146 $options{keywords} = [map {abs_path($_)} @{$options{keywords}}];
148 chdir $options{results} or die "Unable to chdir to $options{results}";
150 if (-e "do_it_all_state") {
151 ADVISE("Using existing state information");
152 my $state_fh = IO::File->new("do_it_all_state",'r') or die
153 "Unable to open state file for reading: $!";
155 my $state_file = <$state_fh>;
156 %state = %{thaw($state_file)} or die "Unable to thaw state file";
159 ADVISE("Starting new run");
160 %state = (keywords => [],
161 databases => [map {lc($_)} @{$options{databases}}],
171 if (@{$options{keywords}}) {
174 @old_keywords{@{$state{keywords}}} = (1) x @{$state{keywords}};
175 for my $keyword_file (@{$options{keywords}}) {
176 my $keyword_fh = IO::File->new($keyword_file,'r') or die
177 "Unable to open $keyword_file for reading: $!";
178 while (<$keyword_fh>) {
182 if (not $old_keywords{$_}) {
183 DEBUG("Adding new keyword '$_'");
184 push @new_keywords, $_;
187 DEBUG("Not adding duplicate keyword '$_'");
191 push @{$state{keywords}},@new_keywords;
194 if (exists $options{restart_at} and length $options{restart_at}) {
195 if (lc($options{restart_at}) eq 'get') {
196 delete $state{done_keywords}{get};
197 delete $state{done_keywords}{parse};
198 delete $state{done_keywords}{combine};
200 elsif (lc($options{restart_at}) eq 'parse') {
201 delete $state{done_keywords}{parse};
202 delete $state{done_keywords}{combine};
204 elsif (lc($options{restart_at}) eq 'combine') {
205 delete $state{done_keywords}{combine};
209 # now we need to figure out what has to happen
210 # for each keyword, we check to see if we've got results, parsed
211 # results, and combined it. If not, we queue up those actions.
213 my %actions = (combine => 0,
218 if (not @{$state{keywords}}) {
219 ADVISE("There are no keywords specified");
222 for my $keyword (@{$state{keywords}}) {
223 for my $database (@{$state{databases}}) {
224 if (not exists $state{done_keywords}{get}{$database}{$keyword}) {
225 push @{$actions{get}{$database}}, $keyword;
226 delete $state{done_keywords}{parse}{$database}{$keyword} if
227 exists $state{done_keywords}{parse}{$database}{$keyword};
228 delete $state{done_keywords}{combine}{$database}{$keyword} if
229 exists $state{done_keywords}{combine}{$database}{$keyword};
231 if (not exists $state{done_keywords}{parse}{$database}{$keyword}) {
232 push @{$actions{parse}{$database}},$keyword;
233 delete $state{done_keywords}{combine}{$database}{$keyword} if
234 exists $state{done_keywords}{combine}{$database}{$keyword};
236 if (not exists $state{done_keywords}{combine}{$database}{$keyword}) {
237 $actions{combine} = 1;
243 for my $state (qw(get parse)) {
245 for my $database (keys %{$actions{$state}}) {
246 next unless @{$actions{$state}{$database}};
247 $databases{$database}{queue} = Thread::Queue->new
248 or die "Unable to create new thread queue";
249 $databases{$database}{thread} = threads->create(\&handle_action,$state,$database,$databases{$database}{queue})
250 or die "Unable to create new thread";
251 $databases{$database}{queue}->enqueue(@{$actions{$state}{$database}});
252 $databases{$database}{queue}->enqueue(undef);
255 for my $database (keys %databases) {
256 my ($actioned_keywords,$failed_keywords) = @{$databases{$database}{thread}->join||[]};
257 if (not defined $failed_keywords) {
258 ADVISE("Something bad happened during '$state' of '$database'");
261 elsif (@{$failed_keywords}) {
262 ADVISE("These keywords failed during '$state' of '$database':",@{$failed_keywords});
265 @{$state{done_keywords}{$state}{$database}}{@{$actioned_keywords}} = (1) x @{$actioned_keywords};
266 delete @{$state{done_keywords}{$state}{$database}}{@{$failed_keywords}};
270 WARN("Stoping, as there are errors");
275 if ($actions{combine}) {
277 # deal with combining results
278 my @parsed_results = map { my $db = $_;
280 "parsed_results_${db}_${_}.txt"
281 } keys %{$state{done_keywords}{parse}{$db}}
282 } keys %{$state{done_keywords}{parse}};
284 write_command_to_file('combined_results.txt',
285 "$base_dir/combine_results",
288 for my $result (@parsed_results) {
289 $result =~ s/^parsed_results_//;
290 $result =~ s/\.txt$//;
291 my ($db,$keyword) = split /_/, $result, 2;
292 $state{done_keywords}{combined}{$db}{$keyword} = 1;
295 write_command_to_file('combined_results_table.txt',
296 "$base_dir/results_to_table",
297 'combined_results.txt',
299 ADVISE("Finished; results in $options{results}/combined_results.txt");
302 ADVISE('Nothing to do. [Perhaps you wanted --restart-at?]');
306 my ($state,$database,$queue) = @_;
308 my $actioned_keywords = [];
309 my $failed_keywords = [];
310 DEBUG("Beginning to handle actions for state '$state' database '$database'");
311 while ($keyword = $queue->dequeue) {
312 DEBUG("Handling state '$state' database '$database' keyword '$keyword'");
313 # handle the action, baybee
314 if ($state eq 'get') {
317 open($command_fh,'|-',
318 "$base_dir/get_${database}_results",
319 ) or die "unable to execute '$base_dir/get_${database}_results'";
320 print {$command_fh} "$keyword\n" or die "unable to print $keyword to 'get_${database}_results'";
321 close($command_fh) or die "Unable to close filehandle";
323 die "get_${database}_results with keyword $keyword failed with error code ".($?>>8);
328 push @{$failed_keywords}, $keyword;
332 elsif ($state eq 'parse') {
334 write_command_to_file("parsed_results_${database}_${keyword}.txt",
335 "$base_dir/parse_${database}_results",
341 WARN("parse_${database}_results failed with $@");
342 push @{$failed_keywords}, $keyword;
347 die "I don't know how to handle state $state";
349 ADVISE("$state results from '$database' for '$keyword'");
350 push @{$actioned_keywords},$keyword;
352 return [$actioned_keywords,$failed_keywords];
357 my $state_fh = IO::File->new("do_it_all_state",'w') or die
358 "Unable to open state file for writing: $!";
359 print {$state_fh} freeze($state) or die "Unable to freeze state file";
360 close $state_fh or die "Unable to close state file: $!";
363 sub write_command_to_file{
364 my ($file,@command) = @_;
365 my $fh = IO::File->new($file,'w') or
366 die "Unable to open $file for writing: $!";
368 open($command_fh,'-|',
370 ) or die "Unable to execute $command[0] $!";
371 print {$fh} <$command_fh>;
373 close $command_fh or die "$command[0] failed with ".($?>>8);
378 print STDOUT map {($_,qq(\n))} @_;
382 print STDERR map {($_,qq(\n))} @_;
387 print STDERR map {($_,qq(\n))} @_;