2 # parse_geneimprint.pl Parses output from the geneimprint website and returns a table of imprinted genes
3 # and is released under the terms of the GNU GPL version 3, or any
4 # later version, at your option. See the file README and COPYING for
6 # Copyright 2014 by Don Armstrong <don@donarmstrong.com>.
17 parse_geneimprint.pl - Parses output from the geneimprint website and returns a table of imprinted genes
21 parse_geneimprint.pl [options]
24 --debug, -d debugging level (Default 0)
25 --help, -h display this help
26 --man, -m display manual
34 Debug verbosity. (Default 0)
38 Display brief usage information.
48 parse_geneimprint.pl geneimprint_human.html > geneimprint_human.txt
57 my %options = (debug => 0,
63 'debug|d+','help|h|?','man|m');
65 pod2usage() if $options{help};
66 pod2usage({verbose=>2}) if $options{man};
68 $DEBUG = $options{debug};
72 push @USAGE_ERRORS,"You must provide a single html file";
75 pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
77 my $t = HTML::Tree->new_from_file($ARGV[0]) or
78 die "Unable to parse $ARGV[0]";
80 # the table we want is currently the first table
81 my $table = $t->look_down(_tag=>'table');
82 for my $row ($table->look_down(_tag=>'tr')) {
83 print join("\t",map{my $a = $_->as_text(); $a =~ s/[\xA0\t]/ /g; $a;} $row->look_down(_tag => qr/^(?:th|td)$/))."\n";