]> git.donarmstrong.com Git - bin.git/commitdiff
use chromium and UTF-8 encoding
authorDon Armstrong <don@donarmstrong.com>
Wed, 10 May 2017 18:37:49 +0000 (11:37 -0700)
committerDon Armstrong <don@donarmstrong.com>
Wed, 10 May 2017 18:37:49 +0000 (11:37 -0700)
get_pdf

diff --git a/get_pdf b/get_pdf
index 1b431484b40a7bc96ec3ff9602ec66cfb39442e4..0c1ad2fa9a5ad90825be15bae55124648911c11d 100755 (executable)
--- a/get_pdf
+++ b/get_pdf
@@ -83,7 +83,8 @@ pod2usage({verbose=>2}) if $options{man};
 
 $DEBUG = $options{debug};
 
-
+binmode(STDOUT,":encoding(UTF-8)");
+binmode(STDERR,":encoding(UTF-8)");
 
 if (not grep {exists $options{$_} and
                  defined $options{$_} and
@@ -127,7 +128,9 @@ if ($options{pmid}) {
            my @possible_links = $m->find_all_links(text_regex => qr/to\s*read/i);
            # try to find the other links
            push @possible_links,
-               grep {my $attr = $_->attrs(); exists $attr->{title} and $attr->{title} =~ qr/(?:Full\s*Text|PMC)/i}
+            grep {my $attr = $_->attrs();
+                  exists $attr->{title} and
+                      $attr->{title} =~ qr/(?:Full\s*Text|PMC)/i}
                    $m->links();
            print STDERR map {"article link: ".$_->url_abs()."\n"} @possible_links if $DEBUG;
            die "No links" unless @possible_links;
@@ -155,7 +158,16 @@ if ($options{pmid}) {
        };
        if ($@) {
            print STDERR "$@\n" if $DEBUG;
-            if ($options{use_links}) {
+        if ($options{use_links}) {
+            if ($ENV{DISPLAY}) {
+                system('chromium',
+                       # links2 doesn't like the leading http:// of proxies for some reason
+                       exists $options{http_proxy}?('--proxy-server',(map {s{http://}{}; $_} $options{http_proxy})):(),
+                       '--temp-profile',
+                       $url,
+                      ) == 0 or next;
+                rename('temp.pdf',"${pmid}.pdf") if -e 'temp.pdf';
+            } else {
                 system('links2',
                        # links2 doesn't like the leading http:// of proxies for some reason
                        exists $options{http_proxy}?('-http-proxy',(map {s{http://}{}; $_} $options{http_proxy})):(),
@@ -163,7 +175,7 @@ if ($options{pmid}) {
                       ) == 0 or next;
                 rename('temp.pdf',"${pmid}.pdf") if -e 'temp.pdf';
             }
-        }
+        }}
     }
 }
 
@@ -220,6 +232,12 @@ sub find_pdf_link {
                   defined $_->text() and $_->text() =~ qr/Full\s*Text.*PDF/i
               }
         $m->find_all_links(text_regex => qr/PDF/i);
+    push @possible_links,
+        grep {my $temp = $_->attrs();
+              exists $temp->{rel} and $temp->{rel} =~ qr/alternate/i and
+                  exists $temp->{type} and $temp->{type} =~ qr/pdf/i
+              }
+        $m->find_all_links(url_regex => qr/pdf/);
     # this is to prioritize the real link at science direct
     push @possible_links,
         grep {my $temp = $_->attrs();