X-Git-Url: https://git.donarmstrong.com/?p=function2gene.git;a=blobdiff_plain;f=bin%2Ffunction2gene;h=f3bd3ddfd7364a89afac2c4d391e43b3787e8b8d;hp=07dd2aed5f40500c9c7fc59fbae01c8d58444b28;hb=364cd5a25c2bb985b587a6b0b5d2e23de18d9c10;hpb=0486e6434cd182e8b30ba008d98ad3d3f008d2ab diff --git a/bin/function2gene b/bin/function2gene index 07dd2ae..f3bd3dd 100755 --- a/bin/function2gene +++ b/bin/function2gene @@ -28,6 +28,7 @@ use Storable; --results directory to store results in --database databases to search --restart-at mode to start searching at + --invalidate-state state to invalidate --debug, -d debugging level (Default 0) --help, -h display this help --man, -m display manual @@ -40,7 +41,9 @@ use Storable; A file which contains a newline delinated list of keywords to search for. Can be specified multiple times. Lines starting with # or ; are -ignored. +ignored. An optional weight can be specified after the keyword, which +is separated from the keyword by a tab. (If not specified, 1 is +assumed.) =item B<--results> @@ -56,7 +59,16 @@ databases.] =item B<--restart-at> If you need to restart the process at a particular state (which has -already been completed) specify this option. +already been completed) specify this option. Valid values are get, +parse, or combine. + +=item B<--invalidate-state> + +This is a more powerful version of --restart-at, which can +specifically invalidate a certain method,database,keyword combination. + +For example, you can request that the keyword foo be retreived again +from ncbi using --invalidate-state 'get,ncbi,foo' =item B<--debug, -d> @@ -78,6 +90,10 @@ Display this manual. echo 'transferrin' > keywords.txt function2gene --keywords keywords.txt --results keyword_results + # reparse the results + function2gene --keywords keywords.txt --results keyword_results \ + --restart-at parse + =cut @@ -94,10 +110,12 @@ my %options = (databases => [], help => 0, man => 0, results => '', + invalidate_state => [], ); GetOptions(\%options,'keywords=s@','databases=s@', 'restart_at|restart-at=s','results=s', + 'invalidate_state|invalidate-state=s@', 'debug|d+','help|h|?','man|m'); pod2usage() if $options{help}; @@ -142,9 +160,9 @@ $options{keywords} = [map {abs_path($_)} @{$options{keywords}}]; chdir $options{results} or die "Unable to chdir to $options{results}"; -if (-e "do_it_all_state") { +if (-e "function2gene_state") { ADVISE("Using existing state information"); - my $state_fh = IO::File->new("do_it_all_state",'r') or die + my $state_fh = IO::File->new("function2gene_state",'r') or die "Unable to open state file for reading: $!"; local $/; my $state_file = <$state_fh>; @@ -174,6 +192,9 @@ if (@{$options{keywords}}) { next if /^\s*[#;]/; next unless /\w+/; chomp; + my ($keyword,$weight) = split /\t/, $_; + $weight = 1 if not defined $weight; + $state{keyword_weight}{$keyword} = $weight; if (not $old_keywords{$_}) { DEBUG("Adding new keyword '$_'"); push @new_keywords, $_; @@ -201,6 +222,47 @@ if (exists $options{restart_at} and length $options{restart_at}) { } } +if (exists $options{invalidate_state}) { + for my $invalidate_state (@{$options{invalidate_state}}) { + my ($method,$database,$keyword) = split /,/, $invalidate_state; + if (grep {not defined $_ } ($method,$database,$keyword) ) { + print STDERR "The invalidate state option '$invalidate_state' is invalid.\n"; + next; + } + if (not exists $state{done_keywords}{$method}) { + print STDERR "Method '$method' does not exist, and cannot be invalidated\n"; + next; + } + if (not exists $state{done_keywords}{$method}{$database}) { + print STDERR "Database '$database' does not exist for method '$method', and cannot be invalidated\n"; + next; + } + if (not length $keyword) { + delete $state{done_keywords}{$method}{$database}; + if ($method eq 'get') { + delete $state{done_keywords}{parse}{$database}; + delete $state{done_keywords}{combine}{$database}; + } + if ($method eq 'parse') { + delete $state{done_keywords}{combine}{$database}; + } + next; + } + if (not exists $state{done_keywords}{$method}{$database}{$keyword}) { + print STDERR "Keyword '$keyword' does not exist for database '$database' and method '$method', and cannot be invalidated\n"; + next; + } + delete $state{done_keywords}{$method}{$database}{$keyword}; + if ($method eq 'get') { + delete $state{done_keywords}{parse}{$database}{$keyword}; + delete $state{done_keywords}{combine}{$database}{$keyword}; + } + if ($method eq 'parse') { + delete $state{done_keywords}{combine}{$database}{$keyword}; + } + } +} + # now we need to figure out what has to happen # for each keyword, we check to see if we've got results, parsed # results, and combined it. If not, we queue up those actions. @@ -262,7 +324,7 @@ for my $state (qw(get parse)) { } save_state(\%state); if ($ERRORS) { - WARN("Stoping, as there are errors"); + WARN("Stoping, asthere are errors"); exit 1; } } @@ -276,8 +338,12 @@ if ($actions{combine}) { } keys %{$state{done_keywords}{parse}{$db}} } keys %{$state{done_keywords}{parse}}; + # create temporary file to store keyword weights + write_command_to_file('combined_results.txt', "$base_dir/combine_results", + '--keywords', + @parsed_results, ); for my $result (@parsed_results) { @@ -287,7 +353,11 @@ if ($actions{combine}) { $state{done_keywords}{combined}{$db}{$keyword} = 1; } save_state(\%state); - ADVISE("Finished; results in $options{results}/combined_results"); + write_command_to_file('combined_results_table.txt', + "$base_dir/results_to_table", + 'combined_results.txt', + ); + ADVISE("Finished; results in $options{results}/combined_results.txt"); } else { ADVISE('Nothing to do. [Perhaps you wanted --restart-at?]'); @@ -345,7 +415,7 @@ sub handle_action{ sub save_state{ my ($state) = @_; - my $state_fh = IO::File->new("do_it_all_state",'w') or die + my $state_fh = IO::File->new("function2gene_state",'w') or die "Unable to open state file for writing: $!"; print {$state_fh} freeze($state) or die "Unable to freeze state file"; close $state_fh or die "Unable to close state file: $!";