X-Git-Url: https://git.donarmstrong.com/?p=reference.git;a=blobdiff_plain;f=lib%2FReference%2FRetrieve%2FPubMed.pm;h=78cf93533641c6b190f04c039e6f1ca77b303ba9;hp=81bef475cdaa565b806a3017c17ded13eb6b35d7;hb=HEAD;hpb=0cd432d9cb4205a2d93aa1d442d8283117971369 diff --git a/lib/Reference/Retrieve/PubMed.pm b/lib/Reference/Retrieve/PubMed.pm index 81bef47..78cf935 100644 --- a/lib/Reference/Retrieve/PubMed.pm +++ b/lib/Reference/Retrieve/PubMed.pm @@ -189,9 +189,7 @@ sub _create_reference_from_xml($$){ } } if (not defined $ref_type) { - warn "Unsupported PublicationType: ".(ref($ref->{MedlineCitation}->{Article}->{PublicationTypeList}->{PublicationType})? - join(',',@{$ref->{MedlineCitation}->{Article}->{PublicationTypeList}->{PublicationType}}): - $ref->{MedlineCitation}->{Article}->{PublicationTypeList}->{PublicationType}); + warn "Unsupported PublicationType: ".Dumper($ref->{MedlineCitation}->{Article}->{PublicationTypeList}->{PublicationType}); print STDERR Dumper($ref) if $DEBUG; $ref_type = 'article'; } @@ -203,8 +201,8 @@ sub _create_reference_from_xml($$){ title => [_fix_medline_title($ref->{MedlineCitation}->{Article}->{ArticleTitle})], abstract => [_fix_medline_abstract($ref->{MedlineCitation}->{Article}->{Abstract}->{AbstractText})], journal => [_fix_medline_journal($ref->{MedlineCitation}->{Article}->{Journal}, - $ref->{MedlineCitation}->{Article}->{MedlineJournalInfo}, - $ua, + $ref->{MedlineCitation}->{MedlineJournalInfo}, + $ua, #@_, # configuration )], _fix_ids($ref), @@ -334,47 +332,36 @@ sub _fix_medline_journal($$$;){ # # - my $ISSN = $journal->{ISSN}; + my $ISSN = length($journal->{ISSN})? $journal->{ISSN} : $medline_journal->{ISSNLinking}; if (ref $ISSN) { $ISSN = $ISSN->{content}; + } + if (not length($ISSN)) { + use Data::Printer; + p $journal; + die "No ISSN"; } - my $url = qq(http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=journals&term=$ISSN); - print STDERR "url: $url" if $DEBUG; + my $url = qq(http://www.ncbi.nlm.nih.gov/nlmcatalog/?term=${ISSN}[ISSN]&format=text&report=xml); + print STDERR "url: $url\n" if $DEBUG; my $request = HTTP::Request->new('GET', $url); my $response = $ua->request($request); $response = $response->content; - print STDERR "response: $response" if $DEBUG; + $response =~ s/\>/>/gso; + $response =~ s/\<///; - my ($journal_id) = $response =~ m#\s*(\d+)\s*#i; - - # http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=journals&id=4559 - # - # - # - # - # 4559 - # The Journal of biological chemistry. - # J Biol Chem - # J. Biol. Chem. - # 2985121R - # - # 0021-9258 - # 1083-351X - # 1905 - # - # American Society for Biochemistry and Molecular Biology - # eng - # - # United States - # - # - # - $url = qq(http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=journals&id=$journal_id); - print STDERR "url: $url" if $DEBUG; - $request = HTTP::Request->new('GET', $url); - $response = $ua->request($request); - $response = $response->content; - print STDERR "response: $response" if $DEBUG; + print STDERR "response: $response\n" if $DEBUG; + my $xa = new XML::Simple; + my $ref_struct = $xa->XMLin($response,ForceArray=>['NCBICatalogRecord'],KeyAttr=>[]); + my $ref; + for (0..@{$ref_struct->{NCBICatalogRecord}}) { + if (exists $ref_struct->{NCBICatalogRecord}[$_]{JrXml}) { + $ref = $ref_struct->{NCBICatalogRecord}[$_]; + last; + } + } + $ref = $ref_struct->{NCBICatalogRecord}[0] if not defined $ref; + print STDERR Dumper($ref) if $DEBUG; my %journal; while ($response =~ m{^\s*(?:(?:\s*(\d+))| # Match ids @@ -386,20 +373,24 @@ sub _fix_medline_journal($$$;){ else { $journal{lc($2)} = $3; } + } + print STDERR Dumper($ref) if $DEBUG; + if (ref $ref->{JrXml}{Serial}{ISSN} ne 'ARRAY') { + $ref->{JrXml}{Serial}{ISSN} = [$ref->{JrXml}{Serial}{ISSN}]; } - my %journal_mapping = (title => q(title), - medlineabbr => q(medabbr), - isoabbr => q(isoabbr), - nlmid => q(nlmid), - issn => q(pissn), - eissn => q(eissn), - publisher => q(publisher), - pmid => q(id) - ); - my @journal_entry; - foreach my $key (keys %journal_mapping) { - push @journal_entry,($key=>$journal{$journal_mapping{$key}}); - } + print STDERR Dumper($ref->{JrXml}) if $DEBUG; + my $print_issn = $ref->{JrXml}{Serial}{ISSN}[0]{IssnType} eq 'Print' ? 0 : 1; + my @journal_entry = + (title => $ref->{JrXml}{Serial}{Title}, + medlineabbr => $ref->{JrXml}{Serial}{MedlineTA}, + isoabbr => $ref->{JrXml}{Serial}{ISOAbbreviation}, + nlmid => $ref->{JrXml}{Serial}{NlmUniqueID}, + issn => $ref->{JrXml}{Serial}{ISSN}[$print_issn]{ISSN}, + eissn => $ref->{JrXml}{Serial}{ISSN}[1-$print_issn]{ISSN}, + publisher => $ref->{JrXml}{Serial}{publicationInfo}{Publisher}, + pmid => $ref->{JrXml}{id}, + ); + print STDERR Dumper(\@journal_entry) if $DEBUG; return @journal_entry; } @@ -446,7 +437,7 @@ sub _fix_medline_pages($){ if (not defined $start) { ($start) = $pagination =~ /(\d+)/ } - if ($start > $stop and defined $stop) { + if ($start > $stop and defined $stop and length($stop)) { # this must be a reduced page listing; fix it up $stop+=$start - $start % 10 ** (int(log($stop)/log(10))+1); }