From: Don Armstrong Date: Fri, 16 Apr 2010 04:22:28 +0000 (+0000) Subject: handle manga escapes in get_one_manga X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=d4e59a710e1c3033c1c815904d876a368723892e;p=bin.git handle manga escapes in get_one_manga --- diff --git a/get_one_manga b/get_one_manga index cc0cbd0..9bc60aa 100755 --- a/get_one_manga +++ b/get_one_manga @@ -49,6 +49,7 @@ Display this manual. =cut +use URI::Escape; use WWW::Mechanize; use IO::Dir; use IO::File; @@ -102,6 +103,8 @@ for my $manga (@manga_to_get) { mkdir($manga); } # figure out where to start getting stuff + # we need to escape ! apparently; there are probably other characters as well + my $manga_escaped = uri_escape($manga,'!'); my @chapter_links = $m->find_all_links(url_abs_regex => qr{\Q$manga\E\/\d+}); for my $chapter_link (reverse @chapter_links) { my ($chapter) = $chapter_link->url() =~ m/([\d.-]+)\/?$/; @@ -114,7 +117,7 @@ for my $manga (@manga_to_get) { mm_get($m,$link->url_abs()); while ($m->uri() =~ m{\Q$chapter\E/(\d\d[^\/]*)/?$}) { my $image = $m->find_image(alt_regex => qr{Loading\.+\s+media}); - my $next_link = $m->find_link(url_regex => qr{\Q$manga\E/\Q$chapter\E/(\d\d[^\/]*)}); + my $next_link = $m->find_link(url_regex => qr{\Q$manga_escaped\E/\Q$chapter\E/(\d\d[^\/]*)}); mm_get($m,$image->url_abs()); print "getting ".$image->url_abs()."\n"; my ($page) = $image->url_abs =~ m/([^\/]+)$/;