3 # get_location_from_uniprot retreives files of search results from ncbi,
4 # and is released under the terms of the GPL version 2, or any later
5 # version, at your option. See the file README and COPYING for more
8 # Copyright 2004 by Don Armstrong <don@donarmstrong.com>.
10 # $Id: ss,v 1.1 2004/06/29 05:26:35 don Exp $
22 get_location_from_uniprot [options]
28 --terms, -t file of search terms [default -]
29 --debug, -d debugging level [default 0]
30 --help, -h display this help
31 --man, -m display manual
39 Debug verbosity. (Default 0)
43 Display brief useage information.
53 get_location_from_uniprot -t terms.txt > output.txt
55 Will pretty much do what you want
59 # http://www.ebi.uniprot.org/uniprot-srv/extendedView.do?proteinId=1A01_HUMAN
61 use vars qw($DEBUG $REVISION);
64 ($REVISION) = q$LastChangedRevision: 1$ =~ /LastChangedRevision:\s+([^\s]+)/;
65 $DEBUG = 0 unless defined $DEBUG;
72 # XXX parse config file
74 my %options = (debug => 0,
80 name => '${search}_results_harvester',
82 uniprot_site => 'http://www.ebi.uniprot.org',
83 uniprot_search_url => '/uniprot-srv/extendedView.do?proteinId=1A01_HUMAN',
86 GetOptions(\%options,'terms|t=s','dir|D=s','debug|d+','help|h|?','man|m');
88 pod2usage() if $options{help};
89 pod2usage({verbose=>2}) if $options{man};
91 $DEBUG = $options{debug};
93 use constant {NAME => 0,
98 #open search terms file
100 if ($options{terms} eq '-') {
104 $terms = new IO::File $options{terms}, 'r' or die "Unable to open file $options{terms}: $!";
107 my $ua = new LWP::UserAgent(agent=>"DA_get_location_from_uniprot/$REVISION");
110 print STDOUT qq("NAME","LOCATION","FULL NAME"\n);
113 # Get uids to retrieve
116 my $url = uri_param_munge($options{uniprot_site}.$options{uniprot_search_url},
117 {proteinId => $search,
120 my $request = HTTP::Request->new('GET', $url);
121 my $response = $ua->request($request);
122 $response = $response->content;
123 $gene[NAME] = $search;
124 ($gene[LOCATION]) = $response =~ m{<!--Chromosome\s+locus-->\s*<tr>\s*
125 <td\s+class="import_title"\s+valign="top"> </td>\s*
126 <td\s+class="value"\s+colspan="5">\s*
127 <table\s+width="100%"><tr\s+class="value"><td>Gene\s+name:[^\&]+ Location:([^\<]+)</td></tr>\s*
128 </table></td></tr>\s*
129 <!--\s*end\s+chromosome\s+locus\s+-->}xis;
130 ($gene[FULLNAME]) = $response =~ m{>Protein\s+name</a>\s*
131 </td>\s*<td\s+class="value"\s+colspan="5">\s*
133 </td>\s*</tr>\s*<!--end\s+title-->}xis;
134 print STDOUT join(',', map {if (defined $_) {qq("$_");} else {qq("NO DATA");}} @gene[0..2]),qq(\n);