From 65811378ba84a321015f6fea8051d9a2c927b706 Mon Sep 17 00:00:00 2001 From: Don Armstrong Date: Fri, 8 May 2015 09:08:25 -0700 Subject: [PATCH] add geneimprint.org data source for imprinted genes --- Makefile | 12 ++++++ README.md | 16 ++++++++ parse_geneimprint.pl | 91 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 119 insertions(+) create mode 100644 Makefile create mode 100644 README.md create mode 100755 parse_geneimprint.pl diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..4f964ae --- /dev/null +++ b/Makefile @@ -0,0 +1,12 @@ +#!/usr/bin/make -f + +geneimprint_human.html: + wget -O $@ "http://www.geneimprint.com/site/genes-by-species.Homo+sapiens" + + +parent_of_origin.html: + wget -O $@ "http://igc.otago.ac.nz/FMPro?-DB=Catalogue.fm&-error=Error.html&-Format=Record3.html&Genetype=maingene&-SortField=Species&custom=Species&-SortField=Chr&custom=Chromosome&-SortField=Location&-SortOrder=Ascending&-Max=all&-Find" + +geneimprint_human.txt: geneimprint_human.html parse_geneimprint.pl + ./parse_geneimprint.pl $< > $@ + diff --git a/README.md b/README.md new file mode 100644 index 0000000..a6f6832 --- /dev/null +++ b/README.md @@ -0,0 +1,16 @@ +Overview +======== + +This project contains a set of makefiles and data to return genes +which are known to be imprinted in humans, and can then be used in +follow-on analyses. + +Data Sources +============ + +geneimprint.org +--------------- +1. [http://www.geneimprint.com/site/home](geneimprint) +2. make geneimprint_human.txt + + diff --git a/parse_geneimprint.pl b/parse_geneimprint.pl new file mode 100755 index 0000000..4f36a9a --- /dev/null +++ b/parse_geneimprint.pl @@ -0,0 +1,91 @@ +#!/usr/bin/perl +# parse_geneimprint.pl Parses output from the geneimprint website and returns a table of imprinted genes +# and is released under the terms of the GNU GPL version 3, or any +# later version, at your option. See the file README and COPYING for +# more information. +# Copyright 2014 by Don Armstrong . + + +use warnings; +use strict; + +use Getopt::Long; +use Pod::Usage; + +=head1 NAME + +parse_geneimprint.pl - Parses output from the geneimprint website and returns a table of imprinted genes + +=head1 SYNOPSIS + +parse_geneimprint.pl [options] + + Options: + --debug, -d debugging level (Default 0) + --help, -h display this help + --man, -m display manual + +=head1 OPTIONS + +=over + +=item B<--debug, -d> + +Debug verbosity. (Default 0) + +=item B<--help, -h> + +Display brief usage information. + +=item B<--man, -m> + +Display this manual. + +=back + +=head1 EXAMPLES + +parse_geneimprint.pl geneimprint_human.html > geneimprint_human.txt + +=cut + + +use vars qw($DEBUG); + +use HTML::Tree; + +my %options = (debug => 0, + help => 0, + man => 0, + ); + +GetOptions(\%options, + 'debug|d+','help|h|?','man|m'); + +pod2usage() if $options{help}; +pod2usage({verbose=>2}) if $options{man}; + +$DEBUG = $options{debug}; + +my @USAGE_ERRORS; +if (@ARGV != 1) { + push @USAGE_ERRORS,"You must provide a single html file"; +} + +pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS; + +my $t = HTML::Tree->new_from_file($ARGV[0]) or + die "Unable to parse $ARGV[0]"; + +# the table we want is currently the first table +my $table = $t->look_down(_tag=>'table'); +for my $row ($table->look_down(_tag=>'tr')) { + print join("\t",map{my $a = $_->as_text(); $a =~ s/\xA0/ /g; $a;} $row->descendents())."\n"; +} + + + + + + +__END__ -- 2.39.5