From: Don Armstrong Date: Tue, 2 Nov 2010 22:23:33 +0000 (+0000) Subject: * fix bleach exile X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=0bc0d1413c6217be7ff6292595587f377f3f3c2d;p=bin.git * fix bleach exile --- diff --git a/get_bleachexile b/get_bleachexile new file mode 100755 index 0000000..0d457c0 --- /dev/null +++ b/get_bleachexile @@ -0,0 +1,166 @@ +#! /usr/bin/perl +# , and is released +# under the terms of the GPL version 2, or any later version, at your +# option. See the file README and COPYING for more information. +# Copyright 2009 by Don Armstrong . +# $Id: perl_script 1432 2009-04-21 02:42:41Z don $ + + +use warnings; +use strict; + +use Getopt::Long; +use Pod::Usage; + +=head1 NAME + +get_bleachexile - get_bleachexile [manga] + +=head1 SYNOPSIS + + [options] + + Options: + --debug, -d debugging level (Default 0) + --help, -h display this help + --man, -m display manual + +=head1 OPTIONS + +=over + +=item B<--debug, -d> + +Debug verbosity. (Default 0) + +=item B<--help, -h> + +Display brief usage information. + +=item B<--man, -m> + +Display this manual. + +=back + +=head1 EXAMPLES + + +=cut + + +use URI::Escape; +use WWW::Mechanize; +use HTTP::Cookies; +use IO::Dir; +use IO::File; +use vars qw($DEBUG); + +my %options = (debug => 0, + help => 0, + man => 0, + bleachexile => 'http://manga.bleachexile.com', + ); + +GetOptions(\%options, + 'debug|d+','help|h|?','man|m'); + +pod2usage() if $options{help}; +pod2usage({verbose=>2}) if $options{man}; + +$DEBUG = $options{debug}; + +my @USAGE_ERRORS; +# if (1) { +# push @USAGE_ERRORS,"You must give the name of a manga"; +# } + +pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS; + + +my @manga_to_get = @ARGV; + +if (not @ARGV) { + my $d = IO::Dir->new('.') or die "Unable to open directory . for reading"; + while (defined($_ = $d->read)) { + next if /^\./; + next unless -d $_; + push @manga_to_get,$_; + } +} + + +my $failure = 0; +my $m = WWW::Mechanize->new(); +$m->cookie_jar(HTTP::Cookies->new()); +$m->cookie_jar()->set_cookie(1,'age_verified','42','/','www.1000manga.com'); +# use Data::Dumper; +# print STDERR Dumper($m->cookie_jar()); +for my $manga (@manga_to_get) { + # see if the manga exists + mm_get($m,$options{bleachexile}.'/'.lc($manga).'.html'); + if ($m->status() != 200) { + print STDERR "Manga $manga doesn't exist\n"; + $failure ||= 1; + next; + } + if (! -d $manga) { + mkdir($manga); + } + # Find out how many chapters there are + + my @chapters = $m->content() =~ m{\s*Chapter\s+\#?\d+\s*}gi; + + for my $chapter (sort uniq(@chapters)) { + my ($chapter) = +$chapter; + my $chapter_long = $chapter =~ /\./ ? join('.',map {sprintf'%04d',$_} split /\./,$chapter) : sprintf('%04d',$chapter); + my $chapter_url = $options{bleachexile}.'/'.lc($manga).'-chapter-'.$chapter.'.html'; + if (! -d "$manga/$chapter_long") { + print $chapter_url,qq(\n); + mm_get($m,$chapter_url); + # Find out how many pages there are + my @pages = $m->content() =~ m{\s*Page\s+\#?[\d\.\-]+\s*}gi; + mkdir("$manga/$chapter_long"); + for my $page (sort uniq(@pages)) { + my $page_url = $options{bleachexile}.'/'.lc($manga).'-chapter-'.$chapter.'-page-'.$page.'.html'; + print $page_url.qq(\n); + mm_get($m,$page_url); + my $image = $m->find_image(url_abs_regex => qr{static\.bleachexile\.com/manga/}); + mm_get($m,$image->url_abs()); + print "getting ".$image->url_abs()."\n"; + my ($page_long) = $image->url_abs =~ m/([^\/]+)$/; + $page_long = sprintf('%04d',$page).'_'.$page_long; + $page_long =~ s/(?:\%20)+/_/g; + $page_long =~ s/[\s_-]+/_/; + $m->save_content("$manga/$chapter_long/${page_long}"); + sleep 3; + } + } + } +} + +sub mm_get{ + my ($m,$url) = @_; + my $rerun = 8; + my $return; + do { + eval { + $return = $m->get($url); + }; + } while ($@ and + ($rerun-- > 0) and sleep 5); + return $return; +} + +sub uniq { + my @return; + my %existing; + for (@_) { + push @return,$_ unless exists $existing{$_}; + $existing{$_} = 1; + } + return @return; +} + + +__END__