From: Don Armstrong Date: Wed, 20 Nov 2013 23:26:17 +0000 (-0800) Subject: handle more craziness at Science X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=e5728e16aa74ce036fd8cf33b28a525f89d23735;p=bin.git handle more craziness at Science --- diff --git a/get_pdf b/get_pdf index 935bfc8..1b43148 100755 --- a/get_pdf +++ b/get_pdf @@ -213,8 +213,15 @@ sub find_pdf_link { print STDERR $m->content() if $DEBUG > 1; } my @possible_links; + # this brings forward the actual link at Science + push @possible_links, + grep {my $temp = $_->attrs(); + exists $temp->{rel} and $temp->{rel} =~ qr/view-/i and + defined $_->text() and $_->text() =~ qr/Full\s*Text.*PDF/i + } + $m->find_all_links(text_regex => qr/PDF/i); # this is to prioritize the real link at science direct - push @possible_links, + push @possible_links, grep {my $temp = $_->attrs(); use Data::Dumper; print STDERR Dumper($temp);