--- /dev/null
+#! /usr/bin/perl
+
+
+=head1 NAME
+
+stupid_missing_names -
+
+=head1 SYNOPSIS
+
+Some of the genes don't actually have locations. This misnamed script
+is designed to take the names of those missing locations and try to
+figure out where the actual genes are located.
+
+=head1 DESCRIPTION
+
+
+
+=cut
+
+
+use warnings;
+use strict;
+
+
+use vars qw($DEBUG $REVISION);
+
+BEGIN{
+ ($REVISION) = q$LastChangedRevision: 1$ =~ /LastChangedRevision:\s+([^\s]+)/;
+ $DEBUG = 0 unless defined $DEBUG;
+}
+
+use URI::ParamMunge;
+use LWP::UserAgent;
+
+# XXX parse config file
+
+my $LOCATION = 0;
+
+my %options = (debug => 0,
+ help => 0,
+ man => 0,
+ format => 'xml',
+ database => 'gene',
+ dir => '.',
+ name => '${search}_results_genecard',
+ terms => '-',
+ );
+
+my $terms;
+if ($options{terms} eq '-') {
+ $terms = \*STDIN;
+}
+
+my $ua = new LWP::UserAgent(agent=>"DA_get_harvester_results/$REVISION");
+
+sub get_url($){
+ my $url = shift;
+
+ my $request = HTTP::Request->new('GET', $url);
+ my $response = $ua->request($request);
+ $response = $response->content;
+ return $response;
+}
+
+#For every term
+while (<$terms>) {
+ # Get uids to retrieve
+ chomp;
+ my $search = $_;
+
+ my $response = get_url(uri_param_munge('http://www.ensembl.org/Homo_sapiens/textview?type=All&x=0&y=0',
+ {q => $search,
+ },
+ )
+ );
+
+ my ($url) = $response =~ m&<blockquote><b>1.\s+Ensembl\s+[^<]+\s+</B>
+ <A\s+HREF="(/Homo[^"]+)">[^\"]+</A><BR>&xis;
+
+ print "NO DATA:1\n" and next if not defined $url;
+
+ $response = get_url("http://www.ensembl.org$url");
+
+ ($url) = $response =~ m{<tr\s+align="left"\s+valign="middle">\s*
+ <th\s+width="20%">Gene</th>\s*
+ <td\s+width="80%"><b><a\s+href="http://www.gene.ucl.ac.uk/cgi-bin/nomenclature/get_data.pl?[^"]+">
+ ([^<]+)</a>\s*</b>\s*<small>\(HUGO\s*ID\)</small> }xis;
+
+ print "NO DATA:2\n" and next if not defined $url;
+
+ $response = get_url("http://www.gene.ucl.ac.uk/cgi-bin/nomenclature/gdlw.pl?title=&col=gd_hgnc_id&col=gd_app_name&col=gd_status&col=gd_aliases&col=gd_pub_chrom_map&col=gd_pub_refseq_ids&status=Approved&status=Approved+Non-Human&status=Entry+Withdrawn&status_opt=3&=on&where=gd_app_sym+like+%27%25${url}%25%27&order_by=gd_app_sym_sort&limit=&format=html&submit=submit&.cgifields=&.cgifields=status&.cgifields=chr");
+
+ if ($LOCATION) {
+ my ($location) = $response =~ m{<th\s+valign="TOP"\s+align="LEFT"\s+bgcolor="\#E6E6FF">
+ Chromosome<a\s+href="[^"]+">
+ \s*\+\s*</a></th><td\s+valign="TOP"\s+align="LEFT">\s*
+ ([^<]+?) # The chromosome location
+ \s*</td><th}xis;
+
+ print "NO LOCATION\n" and next if not defined $location;
+ print $location,"\n";
+ }
+#http://www.gene.ucl.ac.uk/cgi-bin/nomenclature/gdlw.pl?title=&col=gd_hgnc_id&col=gd_app_name&col=gd_status&col=gd_aliases&col=gd_pub_chrom_map&col=gd_pub_refseq_ids&status=Approved&status=Approved+Non-Human&status=Entry+Withdrawn&status_opt=3&=on&where=gd_app_sym+like+%27%25HLA-A%25%27&order_by=gd_app_sym_sort&limit=&format=html&submit=submit&.cgifields=&.cgifields=status&.cgifields=chr
+ else{
+ my ($ref_seq) = $response =~ m{<td><a\s+href="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi\?val=[^"]+">([^<]+)</a></td></tr>}xis;
+
+ print "NO SEQUENCE\n" and next if not defined $ref_seq;
+ print $ref_seq, "\n";
+ }
+}
+
+
+
+
+
+
+__END__