if (@ARGV != 1) {
pod2usage("If the --keywords option is used, exactly one argument (the keyword) must be passed");
}
- $option{dir} = "$ARGV[0]_results_genecard";
+ $options{dir} = "$ARGV[0]_results_genecard";
}
if (not -d $options{dir}) {
my @results;
# Find gene name
- ($results[NAME]) = $result =~ m&(?:Lean|Gene)Card\s+for\s+(?:(?:disorder\s+locus|uncategorized|
- hugo\s*reserved\s*symbol|cluster|
- potentially\s*expressed\s*sequence)|(?:predicted\s+|pseudo|rna\s+|)gene)
- \s*(?:with\s*support\s*|)<FONT\s+COLOR=\"[^\"]+\">\s*<FONT\s+SIZE=\+2>\s*([^\s]+)\s*&xis;
+ ($results[NAME]) = $result =~ m{(?:Lean|Gene)Card\s+for\s+[^<]+<FONT[^>]+>\s*([^<]+)}xis;
$results[NAME] ||= 'NO NAME';
# Find REF SEQ number
- ($results[REFSEQ]) = $result =~ m|http://www.ncbi.nlm.nih.gov/entrez/query.fcgi\?
- cmd=Search\&db=nucleotide\&doptcmdl=GenBank\&term=([^\"]+)\"|xis;
+ ($results[REFSEQ]) = $result =~ m{http://www.ncbi.nlm.nih.gov/entrez/query.fcgi\?
+ (?:cmd=Search\&db=nucleotide|db=nucleotide\&cmd=search)
+ \&doptcmdl=GenBank\&term=([^\"]+)\"}xis;
$results[REFSEQ] ||= 'NO REFSEQ';
my @functions = $result =~ m&<li><b>Function:</b>\s+(.+?)(?:<li>)|(?:</ul>)&gis;
# GO Functions
- push @functions, (map {s#\s*</a>\s*# #g; $_;} $result =~ m&(GO:\d+\s*</a>.+?)(?:<dd>|<p>)&gis);
+ push @functions, (map {s/\n//g}
+ map {s#\s*\s*</a>(?:</td><td>\s*)?\s*# #g; $_;}
+ $result =~ m{(GO:\d+\s*</a>(?:</td><td>\s*)?.+?)(?:</td><dd>|<p>)}gis
+ );
$results[FUNCTION] = join('; ', map {(defined $_)?($_):()} @functions);
$results[FUNCTION] ||= 'NO FUNCTION';