#! /usr/bin/perl # , and is released # under the terms of the GPL version 2, or any later version, at your # option. See the file README and COPYING for more information. # Copyright 2009 by Don Armstrong . # $Id: perl_script 1432 2009-04-21 02:42:41Z don $ use warnings; use strict; use Getopt::Long; use Pod::Usage; =head1 NAME get_one_manga - get_one_manga [manga] =head1 SYNOPSIS [options] Options: --debug, -d debugging level (Default 0) --help, -h display this help --man, -m display manual =head1 OPTIONS =over =item B<--debug, -d> Debug verbosity. (Default 0) =item B<--help, -h> Display brief usage information. =item B<--man, -m> Display this manual. =back =head1 EXAMPLES =cut use URI::Escape; use WWW::Mechanize; use IO::Dir; use IO::File; use vars qw($DEBUG); my %options = (debug => 0, help => 0, man => 0, onemanga => 'http://www.onemanga.com', ); GetOptions(\%options, 'debug|d+','help|h|?','man|m'); pod2usage() if $options{help}; pod2usage({verbose=>2}) if $options{man}; $DEBUG = $options{debug}; my @USAGE_ERRORS; # if (1) { # push @USAGE_ERRORS,"You must give the name of a manga"; # } pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS; my @manga_to_get = @ARGV; if (not @ARGV) { my $d = IO::Dir->new('.') or die "Unable to open directory . for reading"; while (defined($_ = $d->read)) { next if /^\./; next unless -d $_; push @manga_to_get,$_; } } exit 0 unless @manga_to_get; my $failure = 0; my $m = WWW::Mechanize->new(); mm_get($m,"http://www.mangareader.net/alphabetical"); if ($m->status() != 200) { print STDERR "Unable to get alphabetical listing of manga"; exit 1; } my $alpha_list = $m->clone(); for my $manga (@manga_to_get) { # see if the manga exists my $manga_regex = $manga; $manga_regex =~ s/\W+/[_-]/g; my $manga_url = $alpha_list->find_link(url_abs_regex=>qr{$manga_regex}); mm_get($m,$manga_url->url_abs()); if ($m->status() != 200) { print STDERR "Manga $manga doesn't exist\n"; $failure ||= 1; next; } if (! -d $manga) { mkdir($manga); } # figure out where to start getting stuff # we need to escape ! apparently; there are probably other characters as well my $manga_escaped = uri_escape($manga,'!'); my @chapter_links = $m->find_all_links(url_abs_regex => qr{\Q$manga\E\/\d+}); for my $chapter_link (reverse @chapter_links) { my ($chapter) = $chapter_link->url() =~ m/([\d.-]+)\/?$/; my $chapter_long = $chapter =~ /\./ ? join('.',map {sprintf'%04d',$_} split /\./,$chapter) : sprintf('%04d',$chapter); if (! -d "$manga/$chapter_long") { print $chapter_link->url(),qq(\n); mkdir("$manga/$chapter_long"); mm_get($m,$chapter_link->url_abs()); # my $link = $m->find_link(text_regex => qr{Begin reading}); # print $m->content(); # exit 0; # mm_get($m,$link->url_abs()); print $m->uri()."\n"; while ($m->uri() =~ m{\Q$chapter\E/?(\d\d[^\/]*)?/?$}) { my $image = $m->find_image(alt_regex => qr{ - Page \d+}); print $image->url_abs()."\n"; my $next_link = $m->find_link(url_regex => qr{\Q$manga_escaped\E/\Q$chapter\E/(\d\d[^\/]*)}); print "getting ".$image->url_abs()."\n"; mm_get($m,$image->url_abs()); my ($page) = $image->url_abs =~ m/([^\/]+)$/; $m->save_content("$manga/$chapter_long/$page"); last if not defined $next_link; mm_get($m,$next_link->url_abs()); print $m->uri()."\n"; sleep 3; } } } } sub manga_url{ my ($m,$manga) = @_; } sub mm_get{ my ($m,$url) = @_; my $rerun = 8; my $return; do { eval { $return = $m->get($url); }; } while ($@ and ($rerun-- > 0) and sleep 5); return $return; } __END__