#! /usr/bin/perl # gene_to_aliases takes a list of genes and returns their aliases, # and is released under the terms of the GPL version 2, or any later # version, at your option. See the file README and COPYING for more # information. # Copyright 2009 by Don Armstrong . # $Id: perl_script 1432 2009-04-21 02:42:41Z don $ use warnings; use strict; use Getopt::Long; use Pod::Usage; =head1 NAME gene_to_aliases - take a list of genes and return aliases =head1 SYNOPSIS [options] Options: --debug, -d debugging level (Default 0) --help, -h display this help --man, -m display manual =head1 OPTIONS =over =item B<--debug, -d> Debug verbosity. (Default 0) =item B<--help, -h> Display brief usage information. =item B<--man, -m> Display this manual. =back =head1 EXAMPLES =cut use vars qw($DEBUG); use WWW::Mechanize; use URI::Escape; my %options = (debug => 0, help => 0, man => 0, ); GetOptions(\%options, 'debug|d+','help|h|?','man|m'); pod2usage() if $options{help}; pod2usage({verbose=>2}) if $options{man}; $DEBUG = $options{debug}; my @USAGE_ERRORS; # if (1) { # push @USAGE_ERRORS,"You must pass something"; # } pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS; my @genes; while (<>) { chomp; next unless length $_; push @genes,$_; } my $m = WWW::Mechanize->new(autocheck => 0); for my $gene (@genes) { my $url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=gene&retmax=1&term='. uri_escape($gene.'[Symbol] AND Homo[Orgn]'); my $content = get_loop($url); my @id = $content =~ m|(\d+)|gis; if (not @id) { print $gene,qq(\n); next; } $content = get_loop('http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&retmax=1&retmode=xml&id='.$id[0]); my @syn = $content =~ m{<(?:Gene|Prot)-ref_(?:syn|name)_E>([^<]+)}g; print join("\t",$gene,@syn),qq(\n); print $content if $DEBUG; } sub get_loop { my $url = shift; my $counter = 0; my $content=''; LOOP: {do { if ($m->get($url )) { $content = $m->content; last; } $counter++; sleep 10; } while ($counter < 100); } return $content; } __END__