X-Git-Url: https://git.donarmstrong.com/?p=function2gene.git;a=blobdiff_plain;f=bin%2Ffunction2gene;h=f3bd3ddfd7364a89afac2c4d391e43b3787e8b8d;hp=ed9a38ca23a3b948c7c07913c62efde34d1949a5;hb=364cd5a25c2bb985b587a6b0b5d2e23de18d9c10;hpb=a4b9214b7939fbe990f2373684dfd7aa1f9e77e2 diff --git a/bin/function2gene b/bin/function2gene index ed9a38c..f3bd3dd 100755 --- a/bin/function2gene +++ b/bin/function2gene @@ -28,6 +28,7 @@ use Storable; --results directory to store results in --database databases to search --restart-at mode to start searching at + --invalidate-state state to invalidate --debug, -d debugging level (Default 0) --help, -h display this help --man, -m display manual @@ -40,7 +41,9 @@ use Storable; A file which contains a newline delinated list of keywords to search for. Can be specified multiple times. Lines starting with # or ; are -ignored. +ignored. An optional weight can be specified after the keyword, which +is separated from the keyword by a tab. (If not specified, 1 is +assumed.) =item B<--results> @@ -59,6 +62,14 @@ If you need to restart the process at a particular state (which has already been completed) specify this option. Valid values are get, parse, or combine. +=item B<--invalidate-state> + +This is a more powerful version of --restart-at, which can +specifically invalidate a certain method,database,keyword combination. + +For example, you can request that the keyword foo be retreived again +from ncbi using --invalidate-state 'get,ncbi,foo' + =item B<--debug, -d> Debug verbosity. (Default 0) @@ -99,10 +110,12 @@ my %options = (databases => [], help => 0, man => 0, results => '', + invalidate_state => [], ); GetOptions(\%options,'keywords=s@','databases=s@', 'restart_at|restart-at=s','results=s', + 'invalidate_state|invalidate-state=s@', 'debug|d+','help|h|?','man|m'); pod2usage() if $options{help}; @@ -147,9 +160,9 @@ $options{keywords} = [map {abs_path($_)} @{$options{keywords}}]; chdir $options{results} or die "Unable to chdir to $options{results}"; -if (-e "do_it_all_state") { +if (-e "function2gene_state") { ADVISE("Using existing state information"); - my $state_fh = IO::File->new("do_it_all_state",'r') or die + my $state_fh = IO::File->new("function2gene_state",'r') or die "Unable to open state file for reading: $!"; local $/; my $state_file = <$state_fh>; @@ -179,6 +192,9 @@ if (@{$options{keywords}}) { next if /^\s*[#;]/; next unless /\w+/; chomp; + my ($keyword,$weight) = split /\t/, $_; + $weight = 1 if not defined $weight; + $state{keyword_weight}{$keyword} = $weight; if (not $old_keywords{$_}) { DEBUG("Adding new keyword '$_'"); push @new_keywords, $_; @@ -206,6 +222,47 @@ if (exists $options{restart_at} and length $options{restart_at}) { } } +if (exists $options{invalidate_state}) { + for my $invalidate_state (@{$options{invalidate_state}}) { + my ($method,$database,$keyword) = split /,/, $invalidate_state; + if (grep {not defined $_ } ($method,$database,$keyword) ) { + print STDERR "The invalidate state option '$invalidate_state' is invalid.\n"; + next; + } + if (not exists $state{done_keywords}{$method}) { + print STDERR "Method '$method' does not exist, and cannot be invalidated\n"; + next; + } + if (not exists $state{done_keywords}{$method}{$database}) { + print STDERR "Database '$database' does not exist for method '$method', and cannot be invalidated\n"; + next; + } + if (not length $keyword) { + delete $state{done_keywords}{$method}{$database}; + if ($method eq 'get') { + delete $state{done_keywords}{parse}{$database}; + delete $state{done_keywords}{combine}{$database}; + } + if ($method eq 'parse') { + delete $state{done_keywords}{combine}{$database}; + } + next; + } + if (not exists $state{done_keywords}{$method}{$database}{$keyword}) { + print STDERR "Keyword '$keyword' does not exist for database '$database' and method '$method', and cannot be invalidated\n"; + next; + } + delete $state{done_keywords}{$method}{$database}{$keyword}; + if ($method eq 'get') { + delete $state{done_keywords}{parse}{$database}{$keyword}; + delete $state{done_keywords}{combine}{$database}{$keyword}; + } + if ($method eq 'parse') { + delete $state{done_keywords}{combine}{$database}{$keyword}; + } + } +} + # now we need to figure out what has to happen # for each keyword, we check to see if we've got results, parsed # results, and combined it. If not, we queue up those actions. @@ -267,7 +324,7 @@ for my $state (qw(get parse)) { } save_state(\%state); if ($ERRORS) { - WARN("Stoping, as there are errors"); + WARN("Stoping, asthere are errors"); exit 1; } } @@ -281,8 +338,12 @@ if ($actions{combine}) { } keys %{$state{done_keywords}{parse}{$db}} } keys %{$state{done_keywords}{parse}}; + # create temporary file to store keyword weights + write_command_to_file('combined_results.txt', "$base_dir/combine_results", + '--keywords', + @parsed_results, ); for my $result (@parsed_results) { @@ -354,7 +415,7 @@ sub handle_action{ sub save_state{ my ($state) = @_; - my $state_fh = IO::File->new("do_it_all_state",'w') or die + my $state_fh = IO::File->new("function2gene_state",'w') or die "Unable to open state file for writing: $!"; print {$state_fh} freeze($state) or die "Unable to freeze state file"; close $state_fh or die "Unable to close state file: $!";