+my %databases;
+my %terms;
+my %auto_weight;
+my %keyword_keyword;
+for my $gene (keys %genes) {
+ my %term_temp;
+ my %db_temp;
+ my %gene_temp;
+ my %gene_temp2;
+ for my $term (keys %{$genes{$gene}{terms}}) {
+ if ($term =~ /\[/) {
+ my ($keyword,$database) = $term =~ /([^[]+)\[([^\]]+)\]/;
+ my $hits = $genes{$gene}{terms}{$term};
+ $keyword =~ s/[-+_]/ /g;
+ $keyword =~ s/\s*$//;
+ $keyword =~ s/[*]//;
+ $gene_temp{$keyword}{$database} = 1;
+ $gene_temp2{$database}{$keyword} = 1;
+ $databases{$database}{$keyword}{count}++;
+ $db_temp{$database}++;
+ $terms{$keyword}{$database}{count}++;
+ }
+ else {
+ my $keyword = $term;
+ my $hits = $genes{$gene}{terms}{$term};
+ $keyword =~ s/[-+_]/ /g;
+ $keyword =~ s/\s*$//;
+ $keyword =~ s/[*]//;
+ $terms{$keyword}{total}{count}++;
+ }
+ }
+ if (keys %gene_temp == 1) {
+ $terms{[keys %gene_temp]->[0]}{total}{unique}++;
+ if (keys %{$gene_temp{[keys %gene_temp]->[0]}} == 1) {
+ $databases{total}{total}{unique}++
+ }
+ }
+ if (keys %gene_temp2 == 1) {
+ $databases{[keys %gene_temp2]->[0]}{total}{unique}++;
+ }
+ for my $keyword (keys %gene_temp) {
+ if (keys %{$gene_temp{$keyword}} == 1) {
+ $terms{$keyword}{[keys %{$gene_temp{$keyword}}]->[0]}{unique}++;
+ }
+ for my $keyword2 (keys %gene_temp) {
+ $keyword_keyword{$keyword}{$keyword2}++
+ }
+ }
+ for my $database (keys %db_temp) {
+ $databases{$database}{total}{count}++;
+ }
+ $databases{total}{total}{count}++;
+
+}
+
+for my $keyword (keys %keyword_keyword) {
+ # the autoweight table is the diagonal over the sum of the column of the keyword/keyword table
+ # we use max here to avoid 0/0 problems.
+ my $results_by_this_keyword = max(1,$keyword_keyword{$keyword}{$keyword});
+ my $results_combined = max(1,grep {defined $_}
+ sum(map {$keyword_keyword{$keyword}{$_}}
+ grep {$_ ne $keyword}
+ keys %{$keyword_keyword{$keyword}}
+ )
+ );
+ $auto_weight{$keyword} = $results_by_this_keyword/$results_combined;
+}
+
+print {$results_fh} join(',',map {qq("$_")} @csv_fields),qq(\n);