add initial stab at get_mangareader

author Don Armstrong <don@donarmstrong.com>

Fri, 10 Sep 2010 23:37:47 +0000 (23:37 +0000)

committer Don Armstrong <don@donarmstrong.com>

Fri, 10 Sep 2010 23:37:47 +0000 (23:37 +0000)
author Don Armstrong <don@donarmstrong.com>
Fri, 10 Sep 2010 23:37:47 +0000 (23:37 +0000)
committer Don Armstrong <don@donarmstrong.com>
Fri, 10 Sep 2010 23:37:47 +0000 (23:37 +0000)
diff --git a/get_mangareader b/get_mangareader

new file mode 100755 (executable)

index 0000000..8789b8e
--- /dev/null
+++ b/get_mangareader
@@ -0,0 +1,164 @@
+#! /usr/bin/perl
+# , and is released
+# under the terms of the GPL version 2, or any later version, at your
+# option. See the file README and COPYING for more information.
+# Copyright 2009 by Don Armstrong <don@donarmstrong.com>.
+# $Id: perl_script 1432 2009-04-21 02:42:41Z don $
+
+
+use warnings;
+use strict;
+
+use Getopt::Long;
+use Pod::Usage;
+
+=head1 NAME
+
+get_one_manga - get_one_manga [manga] 
+
+=head1 SYNOPSIS
+
+ [options]
+
+ Options:
+  --debug, -d debugging level (Default 0)
+  --help, -h display this help
+  --man, -m display manual
+
+=head1 OPTIONS
+
+=over
+
+=item B<--debug, -d>
+
+Debug verbosity. (Default 0)
+
+=item B<--help, -h>
+
+Display brief usage information.
+
+=item B<--man, -m>
+
+Display this manual.
+
+=back
+
+=head1 EXAMPLES
+
+
+=cut
+
+
+use URI::Escape;
+use WWW::Mechanize;
+use IO::Dir;
+use IO::File;
+use vars qw($DEBUG);
+
+my %options = (debug           => 0,
+              help            => 0,
+              man             => 0,
+              onemanga        => 'http://www.onemanga.com',
+              );
+
+GetOptions(\%options,
+          'debug|d+','help|h|?','man|m');
+
+pod2usage() if $options{help};
+pod2usage({verbose=>2}) if $options{man};
+
+$DEBUG = $options{debug};
+
+my @USAGE_ERRORS;
+# if (1) {
+#      push @USAGE_ERRORS,"You must give the name of a manga";
+# }
+
+pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
+
+
+my @manga_to_get = @ARGV;
+
+if (not @ARGV) {
+    my $d = IO::Dir->new('.') or die "Unable to open directory . for reading";
+    while (defined($_ = $d->read)) {
+       next if /^\./;
+       next unless -d $_;
+       push @manga_to_get,$_;
+    }
+}
+
+exit 0 unless @manga_to_get;
+
+
+my $failure = 0;
+my $m = WWW::Mechanize->new();
+mm_get($m,"http://www.mangareader.net/alphabetical");
+if ($m->status() != 200) {
+    print STDERR "Unable to get alphabetical listing of manga";
+    exit 1;
+}
+my $alpha_list = $m->clone();
+
+for my $manga (@manga_to_get) {
+    # see if the manga exists
+    my $manga_regex = $manga;
+    $manga_regex =~ s/\W+/[_-]/g;
+    my $manga_url = $alpha_list->find_link(url_abs_regex=>qr{$manga_regex});
+    mm_get($m,$manga_url->url_abs());
+    if ($m->status() != 200) {
+       print STDERR "Manga $manga doesn't exist\n";
+       $failure ||= 1;
+       next;
+    }
+    if (! -d $manga) {
+       mkdir($manga);
+    }
+    # figure out where to start getting stuff
+    # we need to escape ! apparently; there are probably other characters as well
+    my $manga_escaped = uri_escape($manga,'!');
+    my @chapter_links = $m->find_all_links(url_abs_regex => qr{\Q$manga\E\/\d+});
+    for my $chapter_link (reverse @chapter_links) {
+       my ($chapter) = $chapter_link->url() =~ m/([\d.-]+)\/?$/;
+       my $chapter_long = $chapter =~ /\./ ? join('.',map {sprintf'%04d',$_} split /\./,$chapter) : sprintf('%04d',$chapter);
+       if (! -d "$manga/$chapter_long") {
+           print $chapter_link->url(),qq(\n);
+           mkdir("$manga/$chapter_long");
+           mm_get($m,$chapter_link->url_abs());
+           my $link = $m->find_link(text_regex => qr{Begin reading});
+           mm_get($m,$link->url_abs());
+           while ($m->uri() =~ m{\Q$chapter\E/(\d\d[^\/]*)/?$}) {
+               my $image = $m->find_image(alt_regex => qr{Loading\.+\s+(media|img)});
+               my $next_link = $m->find_link(url_regex => qr{\Q$manga_escaped\E/\Q$chapter\E/(\d\d[^\/]*)});
+               mm_get($m,$image->url_abs());
+               print "getting ".$image->url_abs()."\n";
+               my ($page) = $image->url_abs =~ m/([^\/]+)$/;
+               $m->save_content("$manga/$chapter_long/$page");
+               last if not defined $next_link;
+               mm_get($m,$next_link->url_abs());
+               print $m->uri()."\n";
+               sleep 3;
+           }
+       }
+    }
+}
+
+sub manga_url{
+    my ($m,$manga) = @_;
+}
+
+sub mm_get{
+    my ($m,$url) = @_;
+    my $rerun = 8;
+    my $return;
+    do {
+       eval {
+           $return = $m->get($url);
+       };
+    } while ($@ and
+            ($rerun-- > 0) and sleep 5);
+    return $return;
+}
+
+
+__END__
author	Don Armstrong <don@donarmstrong.com>
	Fri, 10 Sep 2010 23:37:47 +0000 (23:37 +0000)
committer	Don Armstrong <don@donarmstrong.com>
	Fri, 10 Sep 2010 23:37:47 +0000 (23:37 +0000)