=cut
+use URI::Escape;
use WWW::Mechanize;
+use HTTP::Cookies;
use IO::Dir;
use IO::File;
use vars qw($DEBUG);
if (not @ARGV) {
my $d = IO::Dir->new('.') or die "Unable to open directory . for reading";
while (defined($_ = $d->read)) {
- next if /^./;
+ next if /^\./;
next unless -d $_;
push @manga_to_get,$_;
}
my $failure = 0;
my $m = WWW::Mechanize->new();
+$m->cookie_jar(HTTP::Cookies->new());
+$m->cookie_jar()->set_cookie(1,'age_verified','42','/','www.1000manga.com');
+# use Data::Dumper;
+# print STDERR Dumper($m->cookie_jar());
for my $manga (@manga_to_get) {
# see if the manga exists
- $m->get($options{onemanga}.'/'.$manga);
+ mm_get($m,$options{onemanga}.'/'.$manga);
if ($m->status() != 200) {
print STDERR "Manga $manga doesn't exist\n";
$failure ||= 1;
next;
}
if (! -d $manga) {
- #mkdir($manga);
+ mkdir($manga);
}
# figure out where to start getting stuff
+ # we need to escape ! apparently; there are probably other characters as well
+ my $manga_escaped = uri_escape($manga,'!');
my @chapter_links = $m->find_all_links(url_abs_regex => qr{\Q$manga\E\/\d+});
for my $chapter_link (reverse @chapter_links) {
- print $chapter_link->url(),qq(\n);
- my ($chapter) = $chapter_link->url() =~ m/(\d+)\/?$/;
- if (! -d "$manga/$chapter_link") {
- #mkdir("$manga/$chapter");
- my $page = 0;
- $m->get($chapter_link->url_abs());
- $m->follow_link(text_regex => qr{Begin reading});
- while ($m->uri() =~ m{\Q$chapter\E\/\d+/?$}) {
- $page++;
- my $image = $m->find_image(alt_regex => qr{Loading\.+\s+media});
- my $next_link = $m->find_link(url_regex => qr{\Q$manga\E/\Q$chapter\E/\d+});
- $m->get($image->url_abs());
+ my ($chapter) = $chapter_link->url() =~ m/([\d.-]+)\/?$/;
+ my $chapter_long = $chapter =~ /\./ ? join('.',map {sprintf'%04d',$_} split /\./,$chapter) : sprintf('%04d',$chapter);
+ if (! -d "$manga/$chapter_long") {
+ print $chapter_link->url(),qq(\n);
+ mm_get($m,$chapter_link->url_abs());
+ my $link = $m->find_link(text_regex => qr{Begin [Rr]eading});
+ if (not defined $link) {
+ #print $m->content();
+ my $temp = $m->find_link(text_regex => qr{Read.*at.*1000manga\.com});
+ mm_get($m,$temp->url_abs());
+ #print $m->content();
+ $link = $m->find_link(text_regex => qr{Begin [Rr]eading});
+ }
+ mm_get($m,$link->url_abs());
+ # print $link->url_abs();
+ # print $m->content();
+ mkdir("$manga/$chapter_long");
+ while ($m->uri() =~ m{\Q$chapter\E/(\d\d[^\/]*)/?$}) {
+ my $image = $m->find_image(alt_regex => qr{Loading\.+\s+(media|img)});
+ my $next_link = $m->find_link(url_regex => qr{\Q$manga_escaped\E/\Q$chapter\E/(\d\d[^\/]*)});
+ mm_get($m,$image->url_abs());
print "getting ".$image->url_abs()."\n";
- # $m->save_content("$manga/$chapter/".sprintf('%04d',$page).".jpg");
- $m->get($next_link->url_abs());
- print $m->uri();
+ my ($page) = $image->url_abs =~ m/([^\/]+)$/;
+ $m->save_content("$manga/$chapter_long/$page");
+ last if not defined $next_link;
+ mm_get($m,$next_link->url_abs());
+ print $m->uri()."\n";
sleep 3;
}
- exit 0;
}
}
}
+sub mm_get{
+ my ($m,$url) = @_;
+ my $rerun = 8;
+ my $return;
+ do {
+ eval {
+ $return = $m->get($url);
+ };
+ } while ($@ and
+ ($rerun-- > 0) and sleep 5);
+ return $return;
+}
+
__END__