handle new genecards site

[function2gene.git] / bin / combine_results
diff --git a/bin/combine_results b/bin/combine_results

index 8125c11fb517f270e953bf30c0c42574b65e07ca..21b1c9276f32e8995dccb5bc52e9560643cc5cc1 100755 (executable)
--- a/bin/combine_results
+++ b/bin/combine_results
@@ -1,13 +1,10 @@
  #! /usr/bin/perl
  
-# parse_ncbi_results retreives files of search results from ncbi,
-# and is released under the terms of the GPL version 2, or any later
-# version, at your option. See the file README and COPYING for more
-# information.
+# combine_results, is part of the gene search suite, and is released
+# under the terms of the GPL version 2, or any later version, at your
+# option. See the file README and COPYING for more information.
  
-# Copyright 2004 by Don Armstrong <don@donarmstrong.com>.
-
-# $Id: ss,v 1.1 2004/06/29 05:26:35 don Exp $
+# Copyright 2006,2007 by Don Armstrong <don@donarmstrong.com>.
  
  
  use warnings;
@@ -19,15 +16,13 @@ use Pod::Usage;
  
  =head1 NAME
  
-  parse_ncbi_results [options]
+  combine_results -- combines parsed result files; outputs to stdout.
  
  =head1 SYNOPSIS
  
+ combine_results parsed_results_1.txt [parsedresultfiles ...]
  
   Options:
-  --dir, -D directory to stick results into [default .]
-  --name, -n file naming scheme [default ${search}_results.$format]
-  --terms, -t file of search terms [default -]
    --debug, -d debugging level [default 0]
    --help, -h display this help
    --man, -m display manual
@@ -52,7 +47,7 @@ Display this manual.
  
  =head1 EXAMPLES
  
-  parse_ncbi_results -D ./ncbi_results/ -n '${search}_name.html' < search_parameters
+  combine_results foo_1.txt
  
  Will pretty much do what you want
  
@@ -60,10 +55,9 @@ Will pretty much do what you want
  
  
  
-use vars qw($DEBUG $REVISION);
+use vars qw($DEBUG);
  
  BEGIN{
-     ($REVISION) = q$LastChangedRevision: 1$ =~ /LastChangedRevision:\s+([^\s]+)/;
       $DEBUG = 0 unless defined $DEBUG;
  }
  
@@ -75,8 +69,6 @@ use IO::File;
  my %options = (debug    => 0,
                help     => 0,
                man      => 0,
-              dir      => '.',
-              keyword  => undef,
               );
  
  GetOptions(\%options,'keyword|k=s','debug|d+','help|h|?','man|m');
@@ -112,6 +104,7 @@ for my $file_name (@ARGV) {
           $genes{$gene[NAME]}{database}{$gene[DBNAME]}++;
           $genes{$gene[NAME]}{hits}++;
           $genes{$gene[NAME]}{terms}{$gene[KEYWORD]}++;
+         $genes{$gene[NAME]}{terms}{$gene[KEYWORD].'['.$gene[DBNAME].']'}++;
           add_unique_parts($genes{$gene[NAME]},'refseq',$gene[REFSEQ]);
           add_if_better($genes{$gene[NAME]},'description',$gene[DESCRIPTION]);
           add_if_better($genes{$gene[NAME]},'location',$gene[LOCATION]);
@@ -122,7 +115,7 @@ for my $file_name (@ARGV) {
  
  print join(',',map {qq("$_")} @csv_fields),qq(\n);
  for my $gene (keys %genes) {
-     $genes{$gene}{rzscore} = scalar keys %{$genes{$gene}{terms}};
+     $genes{$gene}{rzscore} = scalar grep {$_ !~ /\[/} keys %{$genes{$gene}{terms}};
       next if $genes{$gene}{rzscore} == 1 and exists $genes{$gene}{terms}{antigen};
       $genes{$gene}{rzscore} -= 1 if exists $genes{$gene}{terms}{antigen};
       print STDOUT join (',',