#! /usr/bin/perl # get_location_from_uniprot retreives files of search results from ncbi, # and is released under the terms of the GPL version 2, or any later # version, at your option. See the file README and COPYING for more # information. # Copyright 2004 by Don Armstrong . # $Id: ss,v 1.1 2004/06/29 05:26:35 don Exp $ use warnings; use strict; use Getopt::Long; use Pod::Usage; =head1 NAME get_location_from_uniprot [options] =head1 SYNOPSIS Options: --terms, -t file of search terms [default -] --debug, -d debugging level [default 0] --help, -h display this help --man, -m display manual =head1 OPTIONS =over =item B<--debug, -d> Debug verbosity. (Default 0) =item B<--help, -h> Display brief useage information. =item B<--man, -m> Display this manual. =back =head1 EXAMPLES get_location_from_uniprot -t terms.txt > output.txt Will pretty much do what you want =cut # http://www.ebi.uniprot.org/uniprot-srv/extendedView.do?proteinId=1A01_HUMAN use vars qw($DEBUG $REVISION); BEGIN{ ($REVISION) = q$LastChangedRevision: 1$ =~ /LastChangedRevision:\s+([^\s]+)/; $DEBUG = 0 unless defined $DEBUG; } use IO::File; use URI::ParamMunge; use LWP::UserAgent; # XXX parse config file my %options = (debug => 0, help => 0, man => 0, format => 'xml', database => 'gene', dir => '.', name => '${search}_results_harvester', terms => '-', uniprot_site => 'http://www.ebi.uniprot.org', uniprot_search_url => '/uniprot-srv/extendedView.do?proteinId=1A01_HUMAN', ); GetOptions(\%options,'terms|t=s','dir|D=s','debug|d+','help|h|?','man|m'); pod2usage() if $options{help}; pod2usage({verbose=>2}) if $options{man}; $DEBUG = $options{debug}; use constant {NAME => 0, LOCATION => 1, FULLNAME => 2, }; #open search terms file my $terms; if ($options{terms} eq '-') { $terms = \*STDIN; } else { $terms = new IO::File $options{terms}, 'r' or die "Unable to open file $options{terms}: $!"; } my $ua = new LWP::UserAgent(agent=>"DA_get_location_from_uniprot/$REVISION"); #For every term print STDOUT qq("NAME","LOCATION","FULL NAME"\n); while (<$terms>) { my @gene; # Get uids to retrieve chomp; my $search = $_; my $url = uri_param_munge($options{uniprot_site}.$options{uniprot_search_url}, {proteinId => $search, }, ); my $request = HTTP::Request->new('GET', $url); my $response = $ua->request($request); $response = $response->content; $gene[NAME] = $search; ($gene[LOCATION]) = $response =~ m{\s*\s*  \s* \s* Gene\s+name:[^\&]+  Location:([^\<]+)\s* \s* }xis; ($gene[FULLNAME]) = $response =~ m{>Protein\s+name\s* \s*\s* ([^\<]+)\s* \s*\s*}xis; print STDOUT join(',', map {if (defined $_) {qq("$_");} else {qq("NO DATA");}} @gene[0..2]),qq(\n); sleep 2; } __END__