}
}
+sub space_fill{
+ my ($value,$length,$right) = @_;
+ $right ||= 0;
+ if (length($value) > $length) {
+ $value =~ m/(.{$length})/;
+ return $1;
+ }
+ if (length($value) == $length) {
+ return $value
+ }
+ if ($right) {
+ return join('',
+ ' ' x ($length - length($value)),
+ $value,
+ );
+ }
+ else {
+ return join('',
+ $value,
+ ' ' x ($length - length($value)),
+ );
+ }
+}
+
+sub results_table_line {
+ my ($keyword,@fields) = @_;
+ return join( ' & ',
+ space_fill($keyword,23),
+ map {space_fill($_,11,1)} @fields
+ )."\n";
+}
+my @database_order = grep {lc($_) ne 'total'} keys %databases;
if (defined $results_table_fh) {
- our ($keyword,$weight,$autoweight,$gct,$hvt,$nct,$t) = ('Keyword','Weight','Autoweight','GeneCards','Harvester','NCBI','Total');
- format RESULTS_TABLE =
-@<<<<<<<<<<<<<<<<<<<<<< & @>>>>>>>>>> & @>>>>>>>>>> & @>>>>>>>>>> & @>>>>>>>>>> & @>>>>>>>>>> & @>>>>>>>>>> \\
-$keyword, $weight, $autoweight, $gct, $hvt, $nct, $t
-.
- $results_table_fh->format_name('RESULTS_TABLE');
- write $results_table_fh;
+ my $keyword;
+ print {$results_table_fh} results_table_line('Keyword','Weight','Autoweight',
+ map {ucfirst($_)} @database_order,
+ 'Total',
+ );
for $keyword (sort keys %terms) {
- ($gct,$hvt,$nct,$t) =
+ my @fields =
map {
if (not defined $_) {
'$-$';
$_->{unique} ||= 0;
"$_->{count} ($_->{unique})";
}
- } @{$terms{$keyword}}{qw(genecard harvester ncbi total)};
- $weight = $keyword_weight{$keyword} || 1;
- $autoweight = $auto_weight{$keyword};
- write $results_table_fh;
+ } @{$terms{$keyword}}{@database_order,'total'};
+ unshift @fields, $auto_weight{$keyword};
+ unshift @fields, $keyword_weight{$keyword} || 1;
+ print {$results_table_fh} results_table_line($keyword,
+ @fields
+ );
}
-
$keyword = 'Total';
- ($gct,$hvt,$nct,$t) =
+ my @fields = ('','');
+ push @fields,
map {
if (not defined $_) {
'$-$';
$_->{unique} ||= 0;
"$_->{count} ($_->{unique})";
}
- } map {$_->{total}} @databases{qw(genecard harvester ncbi total)};
- #($gct,$hvt,$nct,$t) = map {$_->{total}} @databases{qw(genecard harvester ncbi total)};
- $weight = '';
- $autoweight = '';
- write $results_table_fh;
+ } map {$_->{total}} @databases{@database_order,'total'};
+ print {$results_table_fh} results_table_line($keyword,
+ @fields
+ );
}
__END__
$ERRORS.="unknown database(s)" if
@{$options{databases}} and
- grep {$_ !~ /^(?:ncbi|genecard|harvester)$/i} @{$options{databases}};
+ grep {$_ !~ /^(?:ncbi|genecard|harvester|ensembl)$/i} @{$options{databases}};
if (not length $options{results}) {
$ERRORS.="results directory not specified";
pod2usage($ERRORS) if length $ERRORS;
if (not @{$options{databases}}) {
- $options{databases} = [qw(ncbi genecard harvester)]
+ $options{databases} = [qw(ncbi genecard harvester ensembl)]
}
$DEBUG = $options{debug};
if (@ARGV != 1) {
pod2usage("If the --keywords option is used, exactly one argument (the keyword) must be passed");
}
- $options{dir} = "$ARGV[0]_results_genecard";
+ $options{dir} = "$ARGV[0]_results_ensembl";
}
if (not -d $options{dir}) {
print join(",", map {qq("$_");} qw(Name RefSeq Location Alias Function Description Keyword DBName Filename)),qq(\n);
-my ($keyword) = $options{keyword} || $options{dir} =~ m#(?:^|/)([^\/]+)_results_genecard#;
+my ($keyword) = $options{keyword} || $options{dir} =~ m#(?:^|/)([^\/]+)_results_ensembl#;
while ($_ = $dir->read) {
my $file_name = $_;
my @results;
# Find gene name
- ($results[NAME]) = map {s/^[^:]+://; $_;}$result =~ m{a\s+href=\"[^"]+genenames.org[^"]+">\s*([^<]+?)\s*</a>}xis;
+ ($results[NAME]) = $result =~ m{a\s+href=\"[^"]+genenames.org[^"]+">\s*([^<]+?)\s*</a>}xis;
$results[NAME] ||= 'NO NAME';
+ # strip of leading : bits
+ $results[NAME] =~ s/^[^\:]+\://;
# Find REF SEQ number
($results[REFSEQ]) = $result =~ m{for\s*(ENSG\d+)}xis;