#! /usr/bin/perl # , and is released # under the terms of the GPL version 2, or any later version, at your # option. See the file README and COPYING for more information. # Copyright 2009 by Don Armstrong . # $Id: perl_script 1432 2009-04-21 02:42:41Z don $ use warnings; use strict; use Getopt::Long; use Pod::Usage; =head1 NAME get_bleachexile - get_bleachexile [manga] =head1 SYNOPSIS [options] Options: --debug, -d debugging level (Default 0) --help, -h display this help --man, -m display manual =head1 OPTIONS =over =item B<--debug, -d> Debug verbosity. (Default 0) =item B<--help, -h> Display brief usage information. =item B<--man, -m> Display this manual. =back =head1 EXAMPLES =cut use URI::Escape; use WWW::Mechanize; use HTTP::Cookies; use IO::Dir; use IO::File; use vars qw($DEBUG); my %options = (debug => 0, help => 0, man => 0, bleachexile => 'http://manga.bleachexile.com', ); GetOptions(\%options, 'debug|d+','help|h|?','man|m'); pod2usage() if $options{help}; pod2usage({verbose=>2}) if $options{man}; $DEBUG = $options{debug}; my @USAGE_ERRORS; # if (1) { # push @USAGE_ERRORS,"You must give the name of a manga"; # } pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS; my @manga_to_get = @ARGV; if (not @ARGV) { my $d = IO::Dir->new('.') or die "Unable to open directory . for reading"; while (defined($_ = $d->read)) { next if /^\./; next unless -d $_; push @manga_to_get,$_; } } my $failure = 0; my $m = WWW::Mechanize->new(); $m->cookie_jar(HTTP::Cookies->new()); $m->cookie_jar()->set_cookie(1,'age_verified','42','/','www.1000manga.com'); # use Data::Dumper; # print STDERR Dumper($m->cookie_jar()); for my $manga (@manga_to_get) { # see if the manga exists mm_get($m,$options{bleachexile}.'/'.lc($manga).'.html'); if ($m->status() != 200) { print STDERR "Manga $manga doesn't exist\n"; $failure ||= 1; next; } if (! -d $manga) { mkdir($manga); } # Find out how many chapters there are my @chapters = $m->content() =~ m{\s*Chapter\s+\#?\d+\s*}gi; for my $chapter (uniq(@chapters)) { my ($chapter) = +$chapter; my $chapter_long = $chapter =~ /\./ ? join('.',map {sprintf'%04d',$_} split /\./,$chapter) : sprintf('%04d',$chapter); my $chapter_url = $options{bleachexile}.'/'.lc($manga).'-chapter-'.$chapter.'.html'; if (! -d "$manga/$chapter_long") { print $chapter_url,qq(\n); mm_get($m,$chapter_url); # Find out how many pages there are my @pages = $m->content() =~ m{\s*Page\s+\#?[\d\.\-]+\s*}gi; mkdir("$manga/$chapter_long"); @pages = uniq(map {+$_} @pages); for my $page (@pages) { my $page_url = $options{bleachexile}.'/'.lc($manga).'-chapter-'.$chapter.'-page-'.$page.'.html'; print $page_url.qq(\n); mm_get($m,$page_url); my $image = $m->find_image(url_abs_regex => qr{static\.bleachexile\.com/manga/}); mm_get($m,$image->url_abs()); print "getting ".$image->url_abs()."\n"; my ($page_long) = $image->url_abs =~ m/([^\/]+)$/; $page_long = sprintf('%04d',$page).'_'.$page_long; $page_long =~ s/(?:\%20)+/_/g; $page_long =~ s/[\s_-]+/_/; $m->save_content("$manga/$chapter_long/${page_long}"); sleep 3; } } } } sub mm_get{ my ($m,$url) = @_; my $rerun = 8; my $return; do { eval { $return = $m->get($url); }; } while ($@ and ($rerun-- > 0) and sleep 5); return $return; } sub uniq { my @return; my %existing; for (@_) { push @return,$_ unless exists $existing{$_}; $existing{$_} = 1; } return @return; } __END__