* handle relative urls properly
* look in meta tags too
$m->get($inputs[0]->value);
print STDERR $m->content() if $DEBUG;
}
$m->get($inputs[0]->value);
print STDERR $m->content() if $DEBUG;
}
- my @possible_links = $m->find_all_links(text_regex => qr/pdf/i);
+ my @possible_links;
+ push @possible_links, $m->find_all_links(tag_regex => qr/meta/,
+ url_regex => qr/(reprint|\.pdf)/i,
+ );
+ push @possible_links, $m->find_all_links(text_regex => qr/pdf/i);
push @possible_links,$m->find_all_links(text_regex => qr/manual\s*download/i);
push @possible_links,$m->find_all_links(text_regex => qr/manual\s*download/i);
- print STDERR map{$_->url,qq(\n)} @possible_links if $DEBUG;
+ print STDERR $m->uri() if $DEBUG;
+ print STDERR $m->content() if $DEBUG;
+ print STDERR map{$_->url_abs(),qq(\n)} @possible_links if $DEBUG;
if (not @possible_links and $DEBUG) {
print STDERR $m->content();
}
my $best_guess = $possible_links[0] if @possible_links;
for my $link (@possible_links) {
if (not @possible_links and $DEBUG) {
print STDERR $m->content();
}
my $best_guess = $possible_links[0] if @possible_links;
for my $link (@possible_links) {
- my $r = $m->get($link->url());
+ my $r = $m->get($link->url_abs());
if ($r->header('Content-Type') =~ /pdf/) {
return $m;
}
}
my @sub_frames = $m->find_all_links(tag_regex=>qr/^i?frame$/);
for my $frame (@sub_frames) {
if ($r->header('Content-Type') =~ /pdf/) {
return $m;
}
}
my @sub_frames = $m->find_all_links(tag_regex=>qr/^i?frame$/);
for my $frame (@sub_frames) {
- $m->get($frame->url());
+ $m->get($frame->url_abs());
my $pdf_m = find_pdf_link($m,
0,
$call+1,
my $pdf_m = find_pdf_link($m,
0,
$call+1,
}
}
if ($guess and defined $best_guess) {
}
}
if ($guess and defined $best_guess) {
- $m->get($best_guess->url());
+ $m->get($best_guess->url_abs());
return $m;
}
return undef;
return $m;
}
return undef;