X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=get_one_manga;h=8e9e8cc0fea7f4182976cc9c5c6669c73f7bbe02;hb=3d5241a316e3ff729b19b878b0841558120f75e9;hp=2bdf03b513f506b0d398285173b430ce7835218b;hpb=f6cafb7868094e218931db5bbaad7325032ce865;p=bin.git diff --git a/get_one_manga b/get_one_manga index 2bdf03b..8e9e8cc 100755 --- a/get_one_manga +++ b/get_one_manga @@ -49,7 +49,9 @@ Display this manual. =cut +use URI::Escape; use WWW::Mechanize; +use HTTP::Cookies; use IO::Dir; use IO::File; use vars qw($DEBUG); @@ -81,7 +83,7 @@ my @manga_to_get = @ARGV; if (not @ARGV) { my $d = IO::Dir->new('.') or die "Unable to open directory . for reading"; while (defined($_ = $d->read)) { - next if /^./; + next if /^\./; next unless -d $_; push @manga_to_get,$_; } @@ -90,42 +92,71 @@ if (not @ARGV) { my $failure = 0; my $m = WWW::Mechanize->new(); +$m->cookie_jar(HTTP::Cookies->new()); +$m->cookie_jar()->set_cookie(1,'age_verified','42','/','www.1000manga.com'); +# use Data::Dumper; +# print STDERR Dumper($m->cookie_jar()); for my $manga (@manga_to_get) { # see if the manga exists - $m->get($options{onemanga}.'/'.$manga); + mm_get($m,$options{onemanga}.'/'.$manga); if ($m->status() != 200) { print STDERR "Manga $manga doesn't exist\n"; $failure ||= 1; next; } if (! -d $manga) { - #mkdir($manga); + mkdir($manga); } # figure out where to start getting stuff + # we need to escape ! apparently; there are probably other characters as well + my $manga_escaped = uri_escape($manga,'!'); my @chapter_links = $m->find_all_links(url_abs_regex => qr{\Q$manga\E\/\d+}); for my $chapter_link (reverse @chapter_links) { - print $chapter_link->url(),qq(\n); - my ($chapter) = $chapter_link->url() =~ m/(\d+)\/?$/; - if (! -d "$manga/$chapter_link") { - #mkdir("$manga/$chapter"); - my $page = 0; - $m->get($chapter_link->url_abs()); - $m->follow_link(text_regex => qr{Begin reading}); - while ($m->uri() =~ m{\Q$chapter\E\/\d+/?$}) { - $page++; - my $image = $m->find_image(alt_regex => qr{Loading\.+\s+media}); - my $next_link = $m->find_link(url_regex => qr{\Q$manga\E/\Q$chapter\E/\d+}); - $m->get($image->url_abs()); + my ($chapter) = $chapter_link->url() =~ m/([\d.-]+)\/?$/; + my $chapter_long = $chapter =~ /\./ ? join('.',map {sprintf'%04d',$_} split /\./,$chapter) : sprintf('%04d',$chapter); + if (! -d "$manga/$chapter_long") { + print $chapter_link->url(),qq(\n); + mm_get($m,$chapter_link->url_abs()); + my $link = $m->find_link(text_regex => qr{Begin [Rr]eading}); + if (not defined $link) { + #print $m->content(); + my $temp = $m->find_link(text_regex => qr{Read.*at.*1000manga\.com}); + mm_get($m,$temp->url_abs()); + #print $m->content(); + $link = $m->find_link(text_regex => qr{Begin [Rr]eading}); + } + mm_get($m,$link->url_abs()); + # print $link->url_abs(); + # print $m->content(); + mkdir("$manga/$chapter_long"); + while ($m->uri() =~ m{\Q$chapter\E/(\d\d[^\/]*)/?$}) { + my $image = $m->find_image(alt_regex => qr{Loading\.+\s+(media|img)}); + my $next_link = $m->find_link(url_regex => qr{\Q$manga_escaped\E/\Q$chapter\E/(\d\d[^\/]*)}); + mm_get($m,$image->url_abs()); print "getting ".$image->url_abs()."\n"; - # $m->save_content("$manga/$chapter/".sprintf('%04d',$page).".jpg"); - $m->get($next_link->url_abs()); - print $m->uri(); + my ($page) = $image->url_abs =~ m/([^\/]+)$/; + $m->save_content("$manga/$chapter_long/$page"); + last if not defined $next_link; + mm_get($m,$next_link->url_abs()); + print $m->uri()."\n"; sleep 3; } - exit 0; } } } +sub mm_get{ + my ($m,$url) = @_; + my $rerun = 8; + my $return; + do { + eval { + $return = $m->get($url); + }; + } while ($@ and + ($rerun-- > 0) and sleep 5); + return $return; +} + __END__