X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=get_pdf;h=1b431484b40a7bc96ec3ff9602ec66cfb39442e4;hb=3d5241a316e3ff729b19b878b0841558120f75e9;hp=935bfc82ca09dfe5aec7dc3b8fb4e91ae3702d25;hpb=1b8e7d36c575e5a19feaf0d2b9c3d79c67cca0b7;p=bin.git diff --git a/get_pdf b/get_pdf index 935bfc8..1b43148 100755 --- a/get_pdf +++ b/get_pdf @@ -213,8 +213,15 @@ sub find_pdf_link { print STDERR $m->content() if $DEBUG > 1; } my @possible_links; + # this brings forward the actual link at Science + push @possible_links, + grep {my $temp = $_->attrs(); + exists $temp->{rel} and $temp->{rel} =~ qr/view-/i and + defined $_->text() and $_->text() =~ qr/Full\s*Text.*PDF/i + } + $m->find_all_links(text_regex => qr/PDF/i); # this is to prioritize the real link at science direct - push @possible_links, + push @possible_links, grep {my $temp = $_->attrs(); use Data::Dumper; print STDERR Dumper($temp);