--results directory to store results in
--database databases to search
--restart-at mode to start searching at
+ --invalidate-state state to invalidate
--debug, -d debugging level (Default 0)
--help, -h display this help
--man, -m display manual
A file which contains a newline delinated list of keywords to search
for. Can be specified multiple times. Lines starting with # or ; are
-ignored.
+ignored. An optional weight can be specified after the keyword, which
+is separated from the keyword by a tab. (If not specified, 1 is
+assumed.)
=item B<--results>
already been completed) specify this option. Valid values are get,
parse, or combine.
+=item B<--invalidate-state>
+
+This is a more powerful version of --restart-at, which can
+specifically invalidate a certain method,database,keyword combination.
+
+For example, you can request that the keyword foo be retreived again
+from ncbi using --invalidate-state 'get,ncbi,foo'
+
=item B<--debug, -d>
Debug verbosity. (Default 0)
help => 0,
man => 0,
results => '',
+ invalidate_state => [],
);
GetOptions(\%options,'keywords=s@','databases=s@',
'restart_at|restart-at=s','results=s',
+ 'invalidate_state|invalidate-state=s@',
'debug|d+','help|h|?','man|m');
pod2usage() if $options{help};
$ERRORS.="unknown database(s)" if
@{$options{databases}} and
- grep {$_ !~ /^(?:ncbi|genecard|harvester)$/i} @{$options{databases}};
+ grep {$_ !~ /^(?:ncbi|genecard|harvester|ensembl|uniprot)$/i} @{$options{databases}};
if (not length $options{results}) {
$ERRORS.="results directory not specified";
pod2usage($ERRORS) if length $ERRORS;
if (not @{$options{databases}}) {
- $options{databases} = [qw(ncbi genecard harvester)]
+ $options{databases} = [qw(ncbi genecard harvester ensembl uniprot)]
}
$DEBUG = $options{debug};
chdir $options{results} or die "Unable to chdir to $options{results}";
-if (-e "do_it_all_state") {
+if (-e "function2gene_state") {
ADVISE("Using existing state information");
- my $state_fh = IO::File->new("do_it_all_state",'r') or die
+ my $state_fh = IO::File->new("function2gene_state",'r') or die
"Unable to open state file for reading: $!";
local $/;
my $state_file = <$state_fh>;
next if /^\s*[#;]/;
next unless /\w+/;
chomp;
+ s/\r$//;
+ my ($keyword,$weight) = split /\t/, $_;
+ $weight = 1 if not defined $weight;
+ $state{keyword_weight}{$keyword} = $weight;
if (not $old_keywords{$_}) {
DEBUG("Adding new keyword '$_'");
push @new_keywords, $_;
}
}
+if (exists $options{invalidate_state}) {
+ for my $invalidate_state (@{$options{invalidate_state}}) {
+ my ($method,$database,$keyword) = split /,/, $invalidate_state;
+ if (grep {not defined $_ } ($method,$database,$keyword) ) {
+ print STDERR "The invalidate state option '$invalidate_state' is invalid.\n";
+ next;
+ }
+ if (not exists $state{done_keywords}{$method}) {
+ print STDERR "Method '$method' does not exist, and cannot be invalidated\n";
+ next;
+ }
+ if (not exists $state{done_keywords}{$method}{$database}) {
+ print STDERR "Database '$database' does not exist for method '$method', and cannot be invalidated\n";
+ next;
+ }
+ if (not length $keyword) {
+ delete $state{done_keywords}{$method}{$database};
+ if ($method eq 'get') {
+ delete $state{done_keywords}{parse}{$database};
+ delete $state{done_keywords}{combine}{$database};
+ }
+ if ($method eq 'parse') {
+ delete $state{done_keywords}{combine}{$database};
+ }
+ next;
+ }
+ if (not exists $state{done_keywords}{$method}{$database}{$keyword}) {
+ print STDERR "Keyword '$keyword' does not exist for database '$database' and method '$method', and cannot be invalidated\n";
+ next;
+ }
+ delete $state{done_keywords}{$method}{$database}{$keyword};
+ if ($method eq 'get') {
+ delete $state{done_keywords}{parse}{$database}{$keyword};
+ delete $state{done_keywords}{combine}{$database}{$keyword};
+ }
+ if ($method eq 'parse') {
+ delete $state{done_keywords}{combine}{$database}{$keyword};
+ }
+ }
+}
+
# now we need to figure out what has to happen
# for each keyword, we check to see if we've got results, parsed
# results, and combined it. If not, we queue up those actions.
}
save_state(\%state);
if ($ERRORS) {
- WARN("Stoping, as there are errors");
+ WARN("Stoping, asthere are errors");
exit 1;
}
}
} keys %{$state{done_keywords}{parse}{$db}}
} keys %{$state{done_keywords}{parse}};
- write_command_to_file('combined_results.txt',
- "$base_dir/combine_results",
- @parsed_results,
- );
+ # create temporary file to store keyword weights
+ my $file = IO::File->new('combined_keywords.txt','w') or
+ die "Unable to open combined_keywords.txt for writing: $!";
+ for my $keyword (keys %{$state{keyword_weight}}) {
+ print {$file} "$keyword\t$state{keyword_weight}{$keyword}\n";
+ }
+ system("$base_dir/combine_results",
+ '--keywords','combined_keywords.txt',
+ '--results','combined_results.txt',
+ '--results-table','combined_results_table.txt',
+ @parsed_results,
+ ) == 0
+ or die "combine_results failed with ".($?>>8);
for my $result (@parsed_results) {
$result =~ s/^parsed_results_//;
$result =~ s/\.txt$//;
$state{done_keywords}{combined}{$db}{$keyword} = 1;
}
save_state(\%state);
- write_command_to_file('combined_results_table.txt',
- "$base_dir/results_to_table",
- 'combined_results.txt',
- );
ADVISE("Finished; results in $options{results}/combined_results.txt");
}
else {
sub save_state{
my ($state) = @_;
- my $state_fh = IO::File->new("do_it_all_state",'w') or die
+ my $state_fh = IO::File->new("function2gene_state",'w') or die
"Unable to open state file for writing: $!";
print {$state_fh} freeze($state) or die "Unable to freeze state file";
close $state_fh or die "Unable to close state file: $!";