+ my %term_temp;
+ my %db_temp;
+ my %gene_temp;
+ my %gene_temp2;
+ for my $term (keys %{$genes{$gene}{terms}}) {
+ if ($term =~ /\[/) {
+ my ($keyword,$database) = $term =~ /([^[]+)\[([^\]]+)\]/;
+ my $hits = $genes{$gene}{terms}{$term};
+ $keyword =~ s/[-+_]/ /g;
+ $keyword =~ s/\s*$//;
+ $keyword =~ s/[*]//;
+ $gene_temp{$keyword}{$database} = 1;
+ $gene_temp2{$database}{$keyword} = 1;
+ $databases{$database}{$keyword}{count}++;
+ $db_temp{$database}++;
+ $terms{$keyword}{$database}{count}++;
+ }
+ else {
+ my $keyword = $term;
+ my $hits = $genes{$gene}{terms}{$term};
+ $keyword =~ s/[-+_]/ /g;
+ $keyword =~ s/\s*$//;
+ $keyword =~ s/[*]//;
+ $terms{$keyword}{total}{count}++;
+ }
+ }
+ if (keys %gene_temp == 1) {
+ $terms{[keys %gene_temp]->[0]}{total}{unique}++;
+ if (keys %{$gene_temp{[keys %gene_temp]->[0]}} == 1) {
+ $databases{total}{total}{unique}++
+ }
+ }
+ if (keys %gene_temp2 == 1) {
+ $databases{[keys %gene_temp2]->[0]}{total}{unique}++;
+ }
+ for my $keyword (keys %gene_temp) {
+ if (keys %{$gene_temp{$keyword}} == 1) {
+ $terms{$keyword}{[keys %{$gene_temp{$keyword}}]->[0]}{unique}++;
+ }
+ for my $keyword2 (keys %gene_temp) {
+ $keyword_keyword{$keyword}{$keyword2}++
+ }
+ }
+ for my $database (keys %db_temp) {
+ $databases{$database}{total}{count}++;
+ }
+ $databases{total}{total}{count}++;
+
+}
+
+for my $keyword (keys %keyword_keyword) {
+ # the autoweight table is the diagonal over the sum of the column of the keyword/keyword table
+ # we use max here to avoid 0/0 problems.
+ my $results_by_this_keyword = max(1,$keyword_keyword{$keyword}{$keyword});
+ my $results_combined = max(1,grep {defined $_}
+ sum(map {$keyword_keyword{$keyword}{$_}}
+ grep {$_ ne $keyword}
+ keys %{$keyword_keyword{$keyword}}
+ )
+ );
+ $auto_weight{$keyword} = $results_by_this_keyword/$results_combined;
+}
+
+print {$results_fh} join(',',map {qq("$_")} @csv_fields),qq(\n);
+for my $gene (keys %genes) {
+ $genes{$gene}{rzscore} = scalar grep {$_ !~ /\[/} keys %{$genes{$gene}{terms}};
+ $genes{$gene}{weightedscore}= sum(0,
+ map {defined $keyword_weight{$_}?$keyword_weight{$_}:1}
+ grep {$_ !~ /\[/} keys %{$genes{$gene}{terms}}
+ );
+ $genes{$gene}{autoscore}= sum(0,
+ map {defined $auto_weight{$_}?$auto_weight{$_}:1}
+ grep {$_ !~ /\[/} keys %{$genes{$gene}{terms}}
+ );
+}
+
+my $sort = 'autoscore';
+if (scalar grep {$_ != 1 } values %keyword_weight) {
+ $sort='weightedscore';
+}
+for my $gene (sort {$genes{$b}{$sort} <=> $genes{$a}{$sort}} keys %genes) {
+ print {$results_fh} join (',',