]> git.donarmstrong.com Git - imprinted_genes.git/commitdiff
add geneimprint.org data source for imprinted genes
authorDon Armstrong <don@donarmstrong.com>
Fri, 8 May 2015 16:08:25 +0000 (09:08 -0700)
committerDon Armstrong <don@donarmstrong.com>
Fri, 8 May 2015 16:08:25 +0000 (09:08 -0700)
Makefile [new file with mode: 0644]
README.md [new file with mode: 0644]
parse_geneimprint.pl [new file with mode: 0755]

diff --git a/Makefile b/Makefile
new file mode 100644 (file)
index 0000000..4f964ae
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,12 @@
+#!/usr/bin/make -f
+
+geneimprint_human.html:
+       wget -O $@ "http://www.geneimprint.com/site/genes-by-species.Homo+sapiens"
+
+
+parent_of_origin.html:
+       wget -O $@ "http://igc.otago.ac.nz/FMPro?-DB=Catalogue.fm&-error=Error.html&-Format=Record3.html&Genetype=maingene&-SortField=Species&custom=Species&-SortField=Chr&custom=Chromosome&-SortField=Location&-SortOrder=Ascending&-Max=all&-Find"
+
+geneimprint_human.txt: geneimprint_human.html parse_geneimprint.pl
+       ./parse_geneimprint.pl $< > $@
+
diff --git a/README.md b/README.md
new file mode 100644 (file)
index 0000000..a6f6832
--- /dev/null
+++ b/README.md
@@ -0,0 +1,16 @@
+Overview
+========
+
+This project contains a set of makefiles and data to return genes
+which are known to be imprinted in humans, and can then be used in
+follow-on analyses.
+
+Data Sources
+============
+
+geneimprint.org
+---------------
+1. [http://www.geneimprint.com/site/home](geneimprint)
+2. make geneimprint_human.txt
+
+
diff --git a/parse_geneimprint.pl b/parse_geneimprint.pl
new file mode 100755 (executable)
index 0000000..4f36a9a
--- /dev/null
@@ -0,0 +1,91 @@
+#!/usr/bin/perl
+# parse_geneimprint.pl Parses output from the geneimprint website and returns a table of imprinted genes
+# and is released under the terms of the GNU GPL version 3, or any
+# later version, at your option. See the file README and COPYING for
+# more information.
+# Copyright 2014 by Don Armstrong <don@donarmstrong.com>.
+
+
+use warnings;
+use strict;
+
+use Getopt::Long;
+use Pod::Usage;
+
+=head1 NAME
+
+parse_geneimprint.pl - Parses output from the geneimprint website and returns a table of imprinted genes
+
+=head1 SYNOPSIS
+
+parse_geneimprint.pl [options]
+
+ Options:
+   --debug, -d debugging level (Default 0)
+   --help, -h display this help
+   --man, -m display manual
+
+=head1 OPTIONS
+
+=over
+
+=item B<--debug, -d>
+
+Debug verbosity. (Default 0)
+
+=item B<--help, -h>
+
+Display brief usage information.
+
+=item B<--man, -m>
+
+Display this manual.
+
+=back
+
+=head1 EXAMPLES
+
+parse_geneimprint.pl geneimprint_human.html > geneimprint_human.txt
+
+=cut
+
+
+use vars qw($DEBUG);
+
+use HTML::Tree;
+
+my %options = (debug           => 0,
+               help            => 0,
+               man             => 0,
+              );
+
+GetOptions(\%options,
+           'debug|d+','help|h|?','man|m');
+
+pod2usage() if $options{help};
+pod2usage({verbose=>2}) if $options{man};
+
+$DEBUG = $options{debug};
+
+my @USAGE_ERRORS;
+if (@ARGV != 1) {
+    push @USAGE_ERRORS,"You must provide a single html file";
+}
+
+pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
+
+my $t = HTML::Tree->new_from_file($ARGV[0]) or
+    die "Unable to parse $ARGV[0]";
+
+# the table we want is currently the first table
+my $table = $t->look_down(_tag=>'table');
+for my $row ($table->look_down(_tag=>'tr')) {
+    print join("\t",map{my $a = $_->as_text(); $a =~ s/\xA0/ /g; $a;} $row->descendents())."\n";
+}
+
+
+
+
+
+
+__END__