3 # get_genecard_results retreives files of search results from ncbi,
4 # and is released under the terms of the GPL version 2, or any later
5 # version, at your option. See the file README and COPYING for more
8 # Copyright 2004 by Don Armstrong <don@donarmstrong.com>.
10 # $Id: ss,v 1.1 2004/06/29 05:26:35 don Exp $
22 get_genecard_results [options]
28 --dir, -D directory to stick results into [default .]
29 --name, -n file naming scheme [default ${search}_results.$format]
30 --terms, -t file of search terms [default -]
31 --debug, -d debugging level [default 0]
32 --help, -h display this help
33 --man, -m display manual
41 Debug verbosity. (Default 0)
45 Display brief useage information.
55 get_harvester_results -D ./harvester_results/ -n '${search}_name.html' < search_parameters
57 Will pretty much do what you want
63 use vars qw($DEBUG $REVISION);
66 ($REVISION) = q$LastChangedRevision: 1$ =~ /LastChangedRevision:\s+([^\s]+)/;
67 $DEBUG = 0 unless defined $DEBUG;
74 # XXX parse config file
76 my %options = (debug => 0,
82 name => '${search}_results_genecard',
84 genecard_site => 'http://bioinfo.weizmann.ac.il/cards-bin/',
85 genecard_search_url => 'cardsearch.pl?search_type=kwd&mini=no&speed=fast&matches=999999',
88 GetOptions(\%options,'format|f=s','database|b=s','name|n=s',
89 'terms|t=s','dir|D=s','debug|d+','help|h|?','man|m');
91 pod2usage() if $options{help};
92 pod2usage({verbose=>2}) if $options{man};
94 $DEBUG = $options{debug};
96 if (not -d $options{dir}) {
97 die "$options{dir} does not exist or is not a directory";
100 #open search terms file
102 if ($options{terms} eq '-') {
106 $terms = new IO::File $options{terms}, 'r' or die "Unable to open file $options{terms}: $!";
111 # Get uids to retrieve
114 my $uri = URI->new($options{genecard_site}.$options{genecard_search_url});
115 $uri->query_form($uri->query_form(),
118 my $url = $uri->as_string;
119 my $mech = WWW::Mechanize->new(agent=>"DA_get_harvester_results/$REVISION");
121 my $response = $mech->content();
122 my @result_urls = $response =~ m#<a\s+target\=\'card\'\s+href=\"(carddisp\.pl\?[^\"]+)\"\s*>#sg;
123 my $dir_name = eval qq("$options{name}") or die $@;
124 if (not -d "$options{dir}/$dir_name") {
125 mkdir("$options{dir}/$dir_name") or die "Unable to make directory $options{dir}/$dir_name $!";
129 while (@current_urls = map{$options{genecard_site}.$_} splice(@result_urls,0,30)) {
130 system(q(wget),'-nd','-nH','-w','2','--random-wait','-P',qq($options{dir}/$dir_name),@current_urls) == 0 or warn "$!";