2 # geo_downloader downloads expression files from GEO (NCBI)
3 # and is released under the terms of the GNU GPL version 3, or any
4 # later version, at your option. See the file README and COPYING for
6 # Copyright 2013 by Don Armstrong <don@donarmstrong.com>.
17 geo_downloader - downloads expression files from GEO (NCBI)
21 geo_downloader [options] [GSE...]
24 --debug, -d debugging level (Default 0)
25 --help, -h display this help
26 --man, -m display manual
34 Debug verbosity. (Default 0)
38 Display brief usage information.
48 geo_downloader GSE20400
58 my %options = (debug => 0,
61 host => 'ftp.ncbi.nlm.nih.gov',
65 'debug|d+','help|h|?','man|m');
67 pod2usage() if $options{help};
68 pod2usage({verbose=>2}) if $options{man};
70 $DEBUG = $options{debug};
74 push @USAGE_ERRORS,"You must give at least one GEO accession";
77 if (@ARGV != grep {/^(gpl|gse|gsm|gds)\d+$/i} @ARGV) {
78 push @USAGE_ERRORS,"Invalid GEO accession(s): ".
79 join(',',grep {$_ !~ /^(gpl|gse|gsm)\d+$/} @ARGV);
83 pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
85 my $ftp = Net::FTP->new($options{host},Debug=>$DEBUG,Passive=>1) or
86 die "Unable to create new Net::FTP object";
87 print STDERR "Connected\n" if $DEBUG;
89 $ftp->login('anonymous') or
90 die "Unable to login";
91 print STDERR "logged in\n" if $DEBUG;
94 print STDERR "binary\n" if $DEBUG;
98 for my $geo_acc (@ARGV) {
99 my $geo_directory = geo_directory($geo_acc);
100 my $listing = recursive_file_listing($ftp,$geo_directory);
101 p($listing) if $DEBUG;
102 my $orig_dir = getcwd;
103 mkdir($geo_acc) unless -d $geo_acc;
105 for my $dir (qw(matrix miniml suppl)) {
106 # we want all of the raw files, the xml file, and the matrix file
107 if (exists $listing->{$dir} and ref($listing->{$dir})) {
108 for my $file (keys %{$listing->{$dir}}) {
109 next if ref($listing->{$dir}{$file});
110 $ftp->get($listing->{$dir}{$file});
118 sub recursive_file_listing {
121 my $orig_dir = $ftp->pwd();
123 $ftp->cwd($dir) or die "Not a directory $dir";
125 my @files = $ftp->ls();
127 for my $file (@files) {
128 print STDERR "file: $file\n";
129 my $subdirs = recursive_file_listing($ftp,$file);
130 print STDERR "subdirs is :";
131 print STDERR p($subdirs);
132 if (defined $subdirs) {
133 $listing->{$file} = $subdirs;
135 $listing->{$file} = $orig_dir.'/'.$dir.'/'.$file;
139 $ftp->cwd($orig_dir);
147 $geo_acc = uc($geo_acc);
148 my $geo_acc_dir = $geo_acc;
149 $geo_acc_dir =~ s/\d{3}$/nnn/;
150 my $geo_type_dir = undef;
151 if ($geo_acc =~ /^GSE/) {
152 $geo_type_dir = 'series';
153 } elsif ($geo_acc =~ /^GDS/) {
154 $geo_type_dir = 'datasets';
155 } elsif ($geo_acc =~ /^GPL/) {
156 $geo_type_dir = 'platforms';
157 } elsif ($geo_acc =~ /^GSM/) {
158 $geo_type_dir = 'samples';
160 return "/geo/".$geo_type_dir.'/'.$geo_acc_dir.'/'.$geo_acc;