use IO::File;
use IO::Dir;
+use HTML::TreeBuilder;
+use HTML::ElementTable;
+
my %options = (debug => 0,
help => 0,
man => 0,
$results[REFSEQ] ||= 'NO REFSEQ';
# Find Gene Location
- ($results[LOCATION]) = $result =~ m&<I>LocusLink\s+cytogenetic\s+band:</I><b>\s+
- <a\s+href="[^\"]+"\s+target\s+=\s+"aaa">\s*([^\<]+?)\s*</a>&xis;
+ ($results[LOCATION]) = $result =~ m{cytogenetic\s+band:</I><b>\s+
+ <a\s+href="[^\"]+"\s+target\s*=\s*"aaa"[^>]*>\s*([^\<]+?)\s*</a>}xis;
$results[LOCATION] ||= 'NO LOCATION';
# Find gene aliases
- my ($alias_table) = $result =~ m|<b>Aliases and Descriptions</b>(.+?)</TR>|is;
- $alias_table ||='';
-
- my @gene_aliases = $alias_table =~ m|<li>\s*([^\(]{0,20}?)\s*\(<FONT|gis;
+ my ($alias_table) = $result =~ m{(<table[^>]+><tr><th[^>]+>Aliases.+?</table>)}is;
+ $alias_table ||= '';
+ my @gene_aliases = map {s/\s*$//; $_;} $alias_table =~ m{<td(?: nowrap)?>\s*([^<]+)<}gis;
$results[ALIAS] = join('; ', @gene_aliases);
$results[ALIAS] ||= 'NO ALIASES';
# Find gene function(s)
- # Swiss prot functions
- my @functions = $result =~ m&<li><b>Function:</b>\s+(.+?)(?:<li>)|(?:</ul>)&gis;
-
+ my @functions;
# GO Functions
- push @functions, (map {s/\n//g}
- map {s#\s*\s*</a>(?:</td><td>\s*)?\s*# #g; $_;}
- $result =~ m{(GO:\d+\s*</a>(?:</td><td>\s*)?.+?)(?:</td><dd>|<p>)}gis
+ push @functions, (map {s/\n//g; $_;}
+ map {s#\s*</a>(?:</td><td>\s*)?\s*# #g; $_;}
+ $result =~ m{(GO:\d+\s*</a>(?:</td><td>\s*)?.+?)(?:</font>|</td>|<dd>|<p>)}gis
);
$results[FUNCTION] = join('; ', map {(defined $_)?($_):()} @functions);
$results[FUNCTION] ||= 'NO FUNCTION';
$results[KEYWORD] ||= 'NO KEYWORD';
+ # Swiss prot functions
+ my @description = (map {s/<[^>]+>/ /g;
+ s/\s+/ /g;
+ $_;
+ }
+ $result =~ m{<(?:dd|li)><b>Function(?::</b>|</b>:)\s+
+ (.+?)<(?:/dd|li)>}xgis
+ );
+
# Figure out what the description is
- $results[DESCRIPTION] = '';
+ $results[DESCRIPTION] = join('; ',
+ map {(defined $_)?($_):()}
+ @description);
# Database searched
$results[DBNAME] = 'genecard';