6 use List::Util qw(sum);
9 die "search_resultsfile" unless @ARGV == 1;
10 my ($search_results_fn) = @ARGV;
12 my $search_results_fh = new IO::File $search_results_fn, 'r' or die "Unable to read $search_results_fn: $!";
14 # read in the search results
20 while (<$search_results_fh>) {
21 # "name","hits","rzscore","refseq","location","alias","database","terms","description","function"
22 next if $_ =~ /^\"name\"\,/;
25 @record{qw(name hits rzscore refseq location alias database terms description function)} = map {s/^"//; s/"$//; $_;} split /\"\,\"/;
26 push @results,{%record};
27 push @{$name_idx{lc($record{name})}}, $#results;
28 foreach (map {lc($_)} split /\s*;\s*/, $record{alias}) {
29 push @{$name_idx{$_}}, $#results;
31 die "Duplicate refseq at record $." if exists $refseq_idx{lc $record{refseq}};
32 $refseq_idx{lc $record{name}} = $#results;
33 my @terms = split '; ', $record{terms};
38 for my $term (@terms) {
40 my ($keyword,$database,$hits) = $term =~ /([^[]+)\[([^\]]+)\]:(\d+)/;
41 $keyword =~ s/[-+_]/ /g;
44 $gene_temp{$keyword}{$database} = 1;
45 $gene_temp2{$database}{$keyword} = 1;
46 $databases{$database}{$keyword}{count}++;
47 $db_temp{$database}++;
48 $terms{$keyword}{$database}{count}++;
51 my ($keyword,$hits) = $term =~ /([^:]+):(\d+)/;
52 $keyword =~ s/[-+_]/ /g;
55 $terms{$keyword}{total}{count}++;
58 if (keys %gene_temp == 1) {
59 $terms{[keys %gene_temp]->[0]}{total}{unique}++;
60 if (keys %{$gene_temp{[keys %gene_temp]->[0]}} == 1) {
61 $databases{total}{total}{unique}++
64 if (keys %gene_temp2 == 1) {
65 $databases{[keys %gene_temp2]->[0]}{total}{unique}++;
67 for my $keyword (keys %gene_temp) {
68 if (keys %{$gene_temp{$keyword}} == 1) {
69 $terms{$keyword}{[keys %{$gene_temp{$keyword}}]->[0]}{unique}++;
72 for my $database (keys %db_temp) {
73 $databases{$database}{total}{count}++;
75 $databases{total}{total}{count}++;
78 our ($keyword,$gct,$hvt,$nct,$t) = ('Keyword','GeneCards','Harvester','NCBI','Total');
80 @<<<<<<<<<<<<<<<<<<<<<< & @>>>>>>>>>> & @>>>>>>>>>> & @>>>>>>>>>> & @>>>>>>>>>> \\
81 $keyword, $gct, $hvt, $nct, $t
85 for $keyword (sort keys %terms) {
93 "$_->{count} ($_->{unique})";
95 } @{$terms{$keyword}}{qw(genecard harvester ncbi total)};
100 ($gct,$hvt,$nct,$t) =
102 if (not defined $_) {
107 "$_->{count} ($_->{unique})";
109 } map {$_->{total}} @databases{qw(genecard harvester ncbi total)};
110 #($gct,$hvt,$nct,$t) = map {$_->{total}} @databases{qw(genecard harvester ncbi total)};