From 1e9070660246b20b55997f628e69a2f0f4878e8f Mon Sep 17 00:00:00 2001 From: Don Armstrong Date: Mon, 16 Dec 2013 18:28:50 -0800 Subject: [PATCH] add start of geo downloader --- geo_downloader | 124 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100755 geo_downloader diff --git a/geo_downloader b/geo_downloader new file mode 100755 index 0000000..541d319 --- /dev/null +++ b/geo_downloader @@ -0,0 +1,124 @@ +#!/usr/bin/perl +# geo_downloader downloads expression files from GEO (NCBI) +# and is released under the terms of the GNU GPL version 3, or any +# later version, at your option. See the file README and COPYING for +# more information. +# Copyright 2013 by Don Armstrong . + + +use warnings; +use strict; + +use Getopt::Long; +use Pod::Usage; + +=head1 NAME + +geo_downloader - downloads expression files from GEO (NCBI) + +=head1 SYNOPSIS + +geo_downloader [options] [GSE...] + + Options: + --debug, -d debugging level (Default 0) + --help, -h display this help + --man, -m display manual + +=head1 OPTIONS + +=over + +=item B<--debug, -d> + +Debug verbosity. (Default 0) + +=item B<--help, -h> + +Display brief usage information. + +=item B<--man, -m> + +Display this manual. + +=back + +=head1 EXAMPLES + +geo_downloader GSE20400 + +=cut + + +use vars qw($DEBUG); +use Net::FTP; + +my %options = (debug => 0, + help => 0, + man => 0, + host => 'ftp.ncbi.nlm.nih.gov', + ); + +GetOptions(\%options, + 'debug|d+','help|h|?','man|m'); + +pod2usage() if $options{help}; +pod2usage({verbose=>2}) if $options{man}; + +$DEBUG = $options{debug}; + +my @USAGE_ERRORS; +if (not @ARGV) { + push @USAGE_ERRORS,"You must give at least one GEO accession"; +} + +if (@ARGV != grep {/^(gpl|gse|gsm|gds)\d+$/i} @ARGV) { + push @USAGE_ERRORS,"Invalid GEO accession(s): ". + join(',',grep {$_ !~ /^(gpl|gse|gsm)\d+$/} @ARGV); +} + + +pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS; + +my $ftp = Net::FTP->new($options{host},Debug=>$DEBUG,Passive=>1) or + die "Unable to create new Net::FTP object"; +print STDERR "Connected\n" if $DEBUG; + +$ftp->login('anonymous') or + die "Unable to login"; +print STDERR "logged in\n" if $DEBUG; + +$ftp->binary(); +print STDERR "binary\n" if $DEBUG; + + + +for my $geo_acc (@ARGV) { + my $geo_directory = geo_directory($geo_acc); + $ftp->cwd($geo_directory); + print STDERR "changed to $geo_directory\n" if $DEBUG; + print STDERR "(really) changed to ".$ftp->pwd()."\n" if $DEBUG; + my @files = $ftp->ls(); + print map {$_."\n"} @files; + print STDERR "transferred listing\n" if $DEBUG; +} + +sub geo_directory { + my $geo_acc = shift; + $geo_acc = uc($geo_acc); + my $geo_acc_dir = $geo_acc; + $geo_acc_dir =~ s/\d{3}$/nnn/; + my $geo_type_dir = undef; + if ($geo_acc =~ /^GSE/) { + $geo_type_dir = 'series'; + } elsif ($geo_acc =~ /^GDS/) { + $geo_type_dir = 'datasets'; + } elsif ($geo_acc =~ /^GPL/) { + $geo_type_dir = 'platforms'; + } elsif ($geo_acc =~ /^GSM/) { + $geo_type_dir = 'samples'; + } + return "/geo/".$geo_type_dir.'/'.$geo_acc_dir.'/'.$geo_acc; +} + +__END__ -- 2.39.2