man => 0,
dir => '.',
keyword => undef,
+ keywords => 0,
);
-GetOptions(\%options,'keyword|k=s','dir|D=s','debug|d+','help|h|?','man|m');
+GetOptions(\%options,'keyword|k=s','dir|D=s','debug|d+','help|h|?','man|m',
+ 'keywords',
+ );
pod2usage() if $options{help};
FILENAME => 8,
};
+if ($options{keywords}) {
+ if (@ARGV != 1) {
+ pod2usage("If the --keywords option is used, exactly one argument (the keyword) must be passed");
+ }
+ $options{dir} = "$ARGV[0]_results_harvester";
+}
+
+
+
if (not -d $options{dir}) {
die "$options{dir} does not exist or is not a directory";
}
($results[NAME]) = $result =~ m&<TR>\s*<TD\s*BGCOLOR="\#FEFE99"\s*VALIGN="top"\s*NOWRAP>Entry\s*name</TD>\s*
<TD\s*VALIGN="top"\s*COLSPAN="5">\s*<b>\s*([^<]+?)\s*</b></TD>\s*</TR>&xis;
}
+ if (not defined $results[NAME]) {
+ ($results[NAME]) = $result =~ m{<TITLE>[^:]+:\s*[^\*]+\*[^\*]+\*\s*([^-]+)}xis;
+ $results[NAME] =~ s/\s*$// if defined $results[NAME];
+ $results[NAME] =~ s/^\s*$// if defined $results[NAME];
+ $results[NAME] =~ s/\d+\s*kDa\s*protein// if defined $results[NAME];
+ $results[NAME] =~ s/\s*similar to .+// if defined $results[NAME];
+ }
$results[NAME] ||= 'NO NAME';
+ $results[NAME] =~ s/_HUMAN//;
# Find REF SEQ number
($results[REFSEQ]) = $result =~ m&<a\s+href="http://www.ncbi.nlm.nih.gov/entrez/