]> git.donarmstrong.com Git - bin.git/commitdiff
handle more craziness at Science
authorDon Armstrong <don@donarmstrong.com>
Wed, 20 Nov 2013 23:26:17 +0000 (15:26 -0800)
committerDon Armstrong <don@donarmstrong.com>
Wed, 20 Nov 2013 23:26:17 +0000 (15:26 -0800)
get_pdf

diff --git a/get_pdf b/get_pdf
index 935bfc82ca09dfe5aec7dc3b8fb4e91ae3702d25..1b431484b40a7bc96ec3ff9602ec66cfb39442e4 100755 (executable)
--- a/get_pdf
+++ b/get_pdf
@@ -213,8 +213,15 @@ sub find_pdf_link {
         print STDERR $m->content() if $DEBUG > 1;
     }
     my @possible_links;
+    # this brings forward the actual link at Science
+    push @possible_links,
+        grep {my $temp = $_->attrs();
+              exists $temp->{rel} and $temp->{rel} =~ qr/view-/i and
+                  defined $_->text() and $_->text() =~ qr/Full\s*Text.*PDF/i
+              }
+        $m->find_all_links(text_regex => qr/PDF/i);
     # this is to prioritize the real link at science direct
-    push @possible_links, 
+    push @possible_links,
         grep {my $temp = $_->attrs();
               use Data::Dumper;
               print STDERR Dumper($temp);