X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=get_pdf;h=2184c1c80a2314a8e480f5100f1b6974c9739e53;hb=eaba4c2d2d2c7438d78aa0c9313ff861d2a87006;hp=e5624546d22e39a67e1581fd621d6b10899e59fd;hpb=80f2a9ed154cb4aa2b76b38de31ec6f9cf0f1b97;p=bin.git diff --git a/get_pdf b/get_pdf index e562454..2184c1c 100755 --- a/get_pdf +++ b/get_pdf @@ -65,12 +65,14 @@ use WWW::Mechanize; my %options = (debug => 0, help => 0, man => 0, + use_links => 1, ); my %REFERENCE_TYPES = (pmid => 'pmid|p'); GetOptions(\%options, values %REFERENCE_TYPES, + 'use_links|use-links!', 'cgi_proxy|cgi-proxy|C=s', 'http_proxy|http-proxy|H=s', 'debug|d+','help|h|?','man|m'); @@ -124,7 +126,7 @@ if ($options{pmid}) { my @possible_links = $m->find_all_links(text_regex => qr/to\s*read/i); # try to find the other links push @possible_links, - grep {my $attr = $_->attrs(); exists $attr->{title} and $attr->{title} =~ qr/Full\s*Text/i} + grep {my $attr = $_->attrs(); exists $attr->{title} and $attr->{title} =~ qr/(?:Full\s*Text|PMC)/i} $m->links(); print STDERR map {"article link: ".$_->url_abs()."\n"} @possible_links if $DEBUG; die "No links" unless @possible_links; @@ -152,12 +154,15 @@ if ($options{pmid}) { }; if ($@) { print STDERR "$@\n" if $DEBUG; - system('links2', - exists $options{http_proxy}?('-http-proxy',$options{http_proxy}):(), - $url - ) == 0 or next; - rename('temp.pdf',"${pmid}.pdf") if -e 'temp.pdf'; - } + if ($options{use_links}) { + system('links2', + # links2 doesn't like the leading http:// of proxies for some reason + exists $options{http_proxy}?('-http-proxy',(map {s{http://}{}; $_} $options{http_proxy})):(), + $url + ) == 0 or next; + rename('temp.pdf',"${pmid}.pdf") if -e 'temp.pdf'; + } + } } }