- print STDERR $m->uri() if $DEBUG;
- print STDERR $m->content() if $DEBUG;
- my @inputs = $m->find_all_inputs(type => 'hidden',
- name => q(urls['sd']),
- );
- return unless @inputs;
- $m->get($inputs[0]->value);
- print STDERR $m->content() if $DEBUG;
+ print STDERR $m->uri() if $DEBUG;
+ print STDERR $m->content() if $DEBUG > 1;
+ my @inputs = $m->find_all_inputs(type => 'hidden',
+ name => q(urls['sd']),
+ );
+ return unless @inputs;
+ $m->get($inputs[0]->value);
+ print STDERR $m->content() if $DEBUG > 1;
+ }
+ my @possible_links;
+ # this brings forward the actual link at Science
+ push @possible_links,
+ grep {my $temp = $_->attrs();
+ exists $temp->{rel} and $temp->{rel} =~ qr/view-/i and
+ defined $_->text() and $_->text() =~ qr/Full\s*Text.*PDF/i
+ }
+ $m->find_all_links(text_regex => qr/PDF/i);
+ push @possible_links,
+ grep {my $temp = $_->attrs();
+ exists $temp->{rel} and $temp->{rel} =~ qr/alternate/i and
+ exists $temp->{type} and $temp->{type} =~ qr/pdf/i
+ }
+ $m->find_all_links(url_regex => qr/pdf/);
+ # this is to prioritize the real link at science direct
+ push @possible_links,
+ grep {my $temp = $_->attrs();
+ use Data::Dumper;
+ print STDERR Dumper($temp);
+ (exists $temp->{title} and $temp->{title} =~ qr/(Download|Full\s*Text)\s*PDF/i) or
+ (defined $_->text() and $_->text() =~ qr/(Full\s*Text|Download).*PDF/i)
+ }
+ $m->find_all_links(text_regex => qr/PDF/i);
+ my $possible_links = 0;
+ if ($DEBUG) {
+ $possible_links++;
+ print STDERR "possible links[$possible_links]:\n";
+ p @possible_links;
+ }
+ push @possible_links, grep { $_->url_abs() !~ /_orig(?:in)?=article/} $m->find_all_links(text_regex => qr/PDF/i);
+ if ($DEBUG) {
+ $possible_links++;
+ print STDERR "possible links[$possible_links]:\n";
+ p @possible_links;
+ }
+ push @possible_links, $m->find_all_links(tag_regex => qr/meta/,
+ url_regex => qr/(reprint|\.pdf)/i,
+ );
+ if ($DEBUG) {
+ $possible_links++;
+ print STDERR "possible links[$possible_links]:\n";
+ p @possible_links;
+ }
+ # The masthead grep here is to handle PNAS, which has a link to their masthead in every article.
+ push @possible_links,
+ grep {my $temp = $_->attrs(); (not defined $temp->{title}) or $temp->{title} !~ qr/Masthead/i;}
+ $m->find_all_links(text_regex => qr/pdf/i);
+ if ($DEBUG) {
+ $possible_links++;
+ print STDERR "possible links[$possible_links]:\n";
+ p @possible_links;