From e5728e16aa74ce036fd8cf33b28a525f89d23735 Mon Sep 17 00:00:00 2001 From: Don Armstrong Date: Wed, 20 Nov 2013 15:26:17 -0800 Subject: [PATCH] handle more craziness at Science --- get_pdf | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/get_pdf b/get_pdf index 935bfc8..1b43148 100755 --- a/get_pdf +++ b/get_pdf @@ -213,8 +213,15 @@ sub find_pdf_link { print STDERR $m->content() if $DEBUG > 1; } my @possible_links; + # this brings forward the actual link at Science + push @possible_links, + grep {my $temp = $_->attrs(); + exists $temp->{rel} and $temp->{rel} =~ qr/view-/i and + defined $_->text() and $_->text() =~ qr/Full\s*Text.*PDF/i + } + $m->find_all_links(text_regex => qr/PDF/i); # this is to prioritize the real link at science direct - push @possible_links, + push @possible_links, grep {my $temp = $_->attrs(); use Data::Dumper; print STDERR Dumper($temp); -- 2.39.2