#! /usr/bin/perl use warnings; use strict; use List::Util qw(sum); use IO::File; die "search_resultsfile" unless @ARGV == 1; my ($search_results_fn) = @ARGV; my $search_results_fh = new IO::File $search_results_fn, 'r' or die "Unable to read $search_results_fn: $!"; # read in the search results my @results; my %name_idx; my %refseq_idx; my %databases; my %terms; while (<$search_results_fh>) { # "name","hits","rzscore","refseq","location","alias","database","terms","description","function" next if $_ =~ /^\"name\"\,/; chomp; my %record; @record{qw(name hits rzscore refseq location alias database terms description function)} = map {s/^"//; s/"$//; $_;} split /\"\,\"/; push @results,{%record}; push @{$name_idx{lc($record{name})}}, $#results; foreach (map {lc($_)} split /\s*;\s*/, $record{alias}) { push @{$name_idx{$_}}, $#results; } die "Duplicate refseq at record $." if exists $refseq_idx{lc $record{refseq}}; $refseq_idx{lc $record{name}} = $#results; my @terms = split '; ', $record{terms}; my %term_temp; my %db_temp; my %gene_temp; my %gene_temp2; for my $term (@terms) { if ($term =~ /\[/) { my ($keyword,$database,$hits) = $term =~ /([^[]+)\[([^\]]+)\]:(\d+)/; $keyword =~ s/[-+_]/ /g; $keyword =~ s/\s*$//; $keyword =~ s/[*]//; $gene_temp{$keyword}{$database} = 1; $gene_temp2{$database}{$keyword} = 1; $databases{$database}{$keyword}{count}++; $db_temp{$database}++; $terms{$keyword}{$database}{count}++; } else { my ($keyword,$hits) = $term =~ /([^:]+):(\d+)/; $keyword =~ s/[-+_]/ /g; $keyword =~ s/\s*$//; $keyword =~ s/[*]//; $terms{$keyword}{total}{count}++; } } if (keys %gene_temp == 1) { $terms{[keys %gene_temp]->[0]}{total}{unique}++; if (keys %{$gene_temp{[keys %gene_temp]->[0]}} == 1) { $databases{total}{total}{unique}++ } } if (keys %gene_temp2 == 1) { $databases{[keys %gene_temp2]->[0]}{total}{unique}++; } for my $keyword (keys %gene_temp) { if (keys %{$gene_temp{$keyword}} == 1) { $terms{$keyword}{[keys %{$gene_temp{$keyword}}]->[0]}{unique}++; } } for my $database (keys %db_temp) { $databases{$database}{total}{count}++; } $databases{total}{total}{count}++; } our ($keyword,$gct,$hvt,$nct,$t) = ('Keyword','GeneCards','Harvester','NCBI','Total'); format STDOUT = @<<<<<<<<<<<<<<<<<<<<<< & @>>>>>>>>>> & @>>>>>>>>>> & @>>>>>>>>>> & @>>>>>>>>>> \\ $keyword, $gct, $hvt, $nct, $t . write; for $keyword (sort keys %terms) { ($gct,$hvt,$nct,$t) = map { if (not defined $_) { '$-$'; } else { $_->{unique} ||= 0; "$_->{count} ($_->{unique})"; } } @{$terms{$keyword}}{qw(genecard harvester ncbi total)}; write; } $keyword = 'Total'; ($gct,$hvt,$nct,$t) = map { if (not defined $_) { '$-$'; } else { $_->{unique} ||= 0; "$_->{count} ($_->{unique})"; } } map {$_->{total}} @databases{qw(genecard harvester ncbi total)}; #($gct,$hvt,$nct,$t) = map {$_->{total}} @databases{qw(genecard harvester ncbi total)}; write;