2 # parse_parent_of_origin.pl Parses output from the parent_of_origin website and returns a table of imprinted genes
3 # and is released under the terms of the GNU GPL version 3, or any
4 # later version, at your option. See the file README and COPYING for
6 # Copyright 2014 by Don Armstrong <don@donarmstrong.com>.
17 parse_parent_of_origin.pl - Parses output from the parent_of_origin website and returns a table of imprinted genes
21 parse_parent_of_origin.pl [options]
24 --debug, -d debugging level (Default 0)
25 --help, -h display this help
26 --man, -m display manual
34 Debug verbosity. (Default 0)
38 Display brief usage information.
48 parse_parent_of_origin.pl parent_of_origin.html > parent_of_origin.txt
57 my %options = (debug => 0,
65 'debug|d+','help|h|?','man|m');
67 pod2usage() if $options{help};
68 pod2usage({verbose=>2}) if $options{man};
70 $DEBUG = $options{debug};
74 push @USAGE_ERRORS,"You must provide a single html file";
77 pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
79 my $t = HTML::Tree->new_from_file($ARGV[0]) or
80 die "Unable to parse $ARGV[0]";
84 # the table we want is currently the first table
85 for my $table ($t->look_down(_tag=>'table')) {
86 my @elements = map {$_->as_text()} $table->look_down(_tag => 'td');
87 my %row = map {s/:$//g if defined $_; $_;} @elements[0..7];
88 if (defined $row{Taxon} and
89 lc($row{'Taxon'}) eq lc($options{taxon})) {
90 push @imprinted_genes,\%row;
94 print "chromosome\tgene\ttaxon\n";
95 for my $gene (@imprinted_genes) {
96 print join("\t",map {$_ = defined $_?$_:''; s/[\x93"\x94]//g; $_}
97 @{$gene}{qw(Chromosome Gene Taxon)})."\n";