]> git.donarmstrong.com Git - imprinted_genes.git/commitdiff
add parent of origin database
authorDon Armstrong <don@donarmstrong.com>
Fri, 8 May 2015 16:40:39 +0000 (09:40 -0700)
committerDon Armstrong <don@donarmstrong.com>
Fri, 8 May 2015 16:40:39 +0000 (09:40 -0700)
Makefile
README.md
parse_parent_of_origin.pl [new file with mode: 0755]

index 4f964ae3f882181f3b1ca020c7842f00d8035946..cb3d0351a80415120c5430118aba313fb4c0c059 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -10,3 +10,5 @@ parent_of_origin.html:
 geneimprint_human.txt: geneimprint_human.html parse_geneimprint.pl
        ./parse_geneimprint.pl $< > $@
 
+parent_of_origin.txt: parent_of_origin.html parse_parent_of_origin.pl
+       ./parse_parent_of_origin.pl $< > $@
index a6f6832342e53a1a77fa7e9cf9d2b567e137ed80..dca29a8a4e1836da1b7e815df9f9ff828a722814 100644 (file)
--- a/README.md
+++ b/README.md
@@ -11,6 +11,15 @@ Data Sources
 geneimprint.org
 ---------------
 1. [http://www.geneimprint.com/site/home](geneimprint)
-2. make geneimprint_human.txt
+2. `make geneimprint_human.txt`
+
+Catalog of Parent of Origin Effects Database
+--------------------------------------------
+
+1. [http://igc.otago.ac.nz/Search.html](Catalog of Parent of Origin Effects Database)
+2. `make parent_of_origin.txt`
+
+Combined Data Files
+===================
 
 
diff --git a/parse_parent_of_origin.pl b/parse_parent_of_origin.pl
new file mode 100755 (executable)
index 0000000..9de4a2e
--- /dev/null
@@ -0,0 +1,105 @@
+#!/usr/bin/perl
+# parse_parent_of_origin.pl Parses output from the parent_of_origin website and returns a table of imprinted genes
+# and is released under the terms of the GNU GPL version 3, or any
+# later version, at your option. See the file README and COPYING for
+# more information.
+# Copyright 2014 by Don Armstrong <don@donarmstrong.com>.
+
+
+use warnings;
+use strict;
+
+use Getopt::Long;
+use Pod::Usage;
+
+=head1 NAME
+
+parse_parent_of_origin.pl - Parses output from the parent_of_origin website and returns a table of imprinted genes
+
+=head1 SYNOPSIS
+
+parse_parent_of_origin.pl [options]
+
+ Options:
+   --debug, -d debugging level (Default 0)
+   --help, -h display this help
+   --man, -m display manual
+
+=head1 OPTIONS
+
+=over
+
+=item B<--debug, -d>
+
+Debug verbosity. (Default 0)
+
+=item B<--help, -h>
+
+Display brief usage information.
+
+=item B<--man, -m>
+
+Display this manual.
+
+=back
+
+=head1 EXAMPLES
+
+parse_parent_of_origin.pl parent_of_origin.html > parent_of_origin.txt
+
+=cut
+
+
+use vars qw($DEBUG);
+
+use HTML::Tree;
+
+my %options = (debug           => 0,
+               help            => 0,
+               man             => 0,
+               taxon           => 'Human',
+              );
+
+GetOptions(\%options,
+           'taxon=s',
+           'debug|d+','help|h|?','man|m');
+
+pod2usage() if $options{help};
+pod2usage({verbose=>2}) if $options{man};
+
+$DEBUG = $options{debug};
+
+my @USAGE_ERRORS;
+if (@ARGV != 1) {
+    push @USAGE_ERRORS,"You must provide a single html file";
+}
+
+pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
+
+my $t = HTML::Tree->new_from_file($ARGV[0]) or
+    die "Unable to parse $ARGV[0]";
+
+my @imprinted_genes;
+use Data::Printer;
+# the table we want is currently the first table
+for my $table ($t->look_down(_tag=>'table')) {
+    my @elements = map {$_->as_text()} $table->look_down(_tag => 'td');
+    my %row = map {s/:$//g if defined $_; $_;} @elements[0..7];
+    if (defined $row{Taxon} and
+        lc($row{'Taxon'}) eq lc($options{taxon})) {
+        push @imprinted_genes,\%row;
+    }
+}
+
+print "chromosome\tgene\ttaxon\n";
+for my $gene (@imprinted_genes) {
+    print join("\t",map {$_ = defined $_?$_:''; s/[\x93"\x94]//g; $_}
+               @{$gene}{qw(Chromosome Gene Taxon)})."\n";
+}
+
+
+
+
+
+
+__END__