From 180791b59cd8e522f9607c016c561a0c2cc9d3b4 Mon Sep 17 00:00:00 2001 From: Don Armstrong Date: Mon, 16 Dec 2013 18:28:37 -0800 Subject: [PATCH] add gene to alases --- gene_to_aliases | 122 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100755 gene_to_aliases diff --git a/gene_to_aliases b/gene_to_aliases new file mode 100755 index 0000000..563512f --- /dev/null +++ b/gene_to_aliases @@ -0,0 +1,122 @@ +#! /usr/bin/perl +# gene_to_aliases takes a list of genes and returns their aliases, +# and is released under the terms of the GPL version 2, or any later +# version, at your option. See the file README and COPYING for more +# information. +# Copyright 2009 by Don Armstrong . +# $Id: perl_script 1432 2009-04-21 02:42:41Z don $ + + +use warnings; +use strict; + +use Getopt::Long; +use Pod::Usage; + +=head1 NAME + +gene_to_aliases - take a list of genes and return aliases + +=head1 SYNOPSIS + + [options] + + Options: + --debug, -d debugging level (Default 0) + --help, -h display this help + --man, -m display manual + +=head1 OPTIONS + +=over + +=item B<--debug, -d> + +Debug verbosity. (Default 0) + +=item B<--help, -h> + +Display brief usage information. + +=item B<--man, -m> + +Display this manual. + +=back + +=head1 EXAMPLES + + +=cut + + +use vars qw($DEBUG); + +use WWW::Mechanize; +use URI::Escape; + +my %options = (debug => 0, + help => 0, + man => 0, + ); + +GetOptions(\%options, + 'debug|d+','help|h|?','man|m'); + +pod2usage() if $options{help}; +pod2usage({verbose=>2}) if $options{man}; + +$DEBUG = $options{debug}; + +my @USAGE_ERRORS; +# if (1) { +# push @USAGE_ERRORS,"You must pass something"; +# } + +pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS; + +my @genes; +while (<>) { + chomp; + next unless length $_; + push @genes,$_; +} + +my $m = WWW::Mechanize->new(autocheck => 0); + +for my $gene (@genes) { + my $url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=gene&retmax=1&term='. + uri_escape($gene.'[Symbol] AND Homo[Orgn]'); + my $content = get_loop($url); + my @id = $content =~ + m|(\d+)|gis; + if (not @id) { + print $gene,qq(\n); + next; + } + $content = get_loop('http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&retmax=1&retmode=xml&id='.$id[0]); + my @syn = $content =~ + m{<(?:Gene|Prot)-ref_(?:syn|name)_E>([^<]+)}g; + print join("\t",$gene,@syn),qq(\n); + print $content if $DEBUG; +} +sub get_loop { + my $url = shift; + my $counter = 0; + my $content=''; + LOOP: {do { + if ($m->get($url + )) { + $content = $m->content; + last; + } + $counter++; + sleep 10; + } while ($counter < 100); + } + return $content; +} + + + +__END__ -- 2.39.2