--- /dev/null
+#! /usr/bin/perl
+# , and is released
+# under the terms of the GPL version 2, or any later version, at your
+# option. See the file README and COPYING for more information.
+# Copyright 2009 by Don Armstrong <don@donarmstrong.com>.
+# $Id: perl_script 1432 2009-04-21 02:42:41Z don $
+
+
+use warnings;
+use strict;
+
+use Getopt::Long;
+use Pod::Usage;
+
+=head1 NAME
+
+get_bleachexile - get_bleachexile [manga]
+
+=head1 SYNOPSIS
+
+ [options]
+
+ Options:
+ --debug, -d debugging level (Default 0)
+ --help, -h display this help
+ --man, -m display manual
+
+=head1 OPTIONS
+
+=over
+
+=item B<--debug, -d>
+
+Debug verbosity. (Default 0)
+
+=item B<--help, -h>
+
+Display brief usage information.
+
+=item B<--man, -m>
+
+Display this manual.
+
+=back
+
+=head1 EXAMPLES
+
+
+=cut
+
+
+use URI::Escape;
+use WWW::Mechanize;
+use HTTP::Cookies;
+use IO::Dir;
+use IO::File;
+use vars qw($DEBUG);
+
+my %options = (debug => 0,
+ help => 0,
+ man => 0,
+ bleachexile => 'http://manga.bleachexile.com',
+ );
+
+GetOptions(\%options,
+ 'debug|d+','help|h|?','man|m');
+
+pod2usage() if $options{help};
+pod2usage({verbose=>2}) if $options{man};
+
+$DEBUG = $options{debug};
+
+my @USAGE_ERRORS;
+# if (1) {
+# push @USAGE_ERRORS,"You must give the name of a manga";
+# }
+
+pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
+
+
+my @manga_to_get = @ARGV;
+
+if (not @ARGV) {
+ my $d = IO::Dir->new('.') or die "Unable to open directory . for reading";
+ while (defined($_ = $d->read)) {
+ next if /^\./;
+ next unless -d $_;
+ push @manga_to_get,$_;
+ }
+}
+
+
+my $failure = 0;
+my $m = WWW::Mechanize->new();
+$m->cookie_jar(HTTP::Cookies->new());
+$m->cookie_jar()->set_cookie(1,'age_verified','42','/','www.1000manga.com');
+# use Data::Dumper;
+# print STDERR Dumper($m->cookie_jar());
+for my $manga (@manga_to_get) {
+ # see if the manga exists
+ mm_get($m,$options{bleachexile}.'/'.lc($manga).'.html');
+ if ($m->status() != 200) {
+ print STDERR "Manga $manga doesn't exist\n";
+ $failure ||= 1;
+ next;
+ }
+ if (! -d $manga) {
+ mkdir($manga);
+ }
+ # Find out how many chapters there are
+
+ my @chapters = $m->content() =~ m{<option\s+value="(\d+)"\s*(?:selected="selected"\s*)?>\s*Chapter\s+\#?\d+\s*</option>}gi;
+
+ for my $chapter (sort uniq(@chapters)) {
+ my ($chapter) = +$chapter;
+ my $chapter_long = $chapter =~ /\./ ? join('.',map {sprintf'%04d',$_} split /\./,$chapter) : sprintf('%04d',$chapter);
+ my $chapter_url = $options{bleachexile}.'/'.lc($manga).'-chapter-'.$chapter.'.html';
+ if (! -d "$manga/$chapter_long") {
+ print $chapter_url,qq(\n);
+ mm_get($m,$chapter_url);
+ # Find out how many pages there are
+ my @pages = $m->content() =~ m{<option\s+value="(\d+)"\s*(?:selected="selected"\s*)?>\s*Page\s+\#?[\d\.\-]+\s*</option>}gi;
+ mkdir("$manga/$chapter_long");
+ for my $page (sort uniq(@pages)) {
+ my $page_url = $options{bleachexile}.'/'.lc($manga).'-chapter-'.$chapter.'-page-'.$page.'.html';
+ print $page_url.qq(\n);
+ mm_get($m,$page_url);
+ my $image = $m->find_image(url_abs_regex => qr{static\.bleachexile\.com/manga/});
+ mm_get($m,$image->url_abs());
+ print "getting ".$image->url_abs()."\n";
+ my ($page_long) = $image->url_abs =~ m/([^\/]+)$/;
+ $page_long = sprintf('%04d',$page).'_'.$page_long;
+ $page_long =~ s/(?:\%20)+/_/g;
+ $page_long =~ s/[\s_-]+/_/;
+ $m->save_content("$manga/$chapter_long/${page_long}");
+ sleep 3;
+ }
+ }
+ }
+}
+
+sub mm_get{
+ my ($m,$url) = @_;
+ my $rerun = 8;
+ my $return;
+ do {
+ eval {
+ $return = $m->get($url);
+ };
+ } while ($@ and
+ ($rerun-- > 0) and sleep 5);
+ return $return;
+}
+
+sub uniq {
+ my @return;
+ my %existing;
+ for (@_) {
+ push @return,$_ unless exists $existing{$_};
+ $existing{$_} = 1;
+ }
+ return @return;
+}
+
+
+__END__