]> git.donarmstrong.com Git - reference.git/blobdiff - lib/Reference/Retrieve/PubMed.pm
update how journal information and issn are handled
[reference.git] / lib / Reference / Retrieve / PubMed.pm
index 81bef475cdaa565b806a3017c17ded13eb6b35d7..78cf93533641c6b190f04c039e6f1ca77b303ba9 100644 (file)
@@ -189,9 +189,7 @@ sub _create_reference_from_xml($$){
          }
      }
      if (not defined $ref_type) {
          }
      }
      if (not defined $ref_type) {
-         warn "Unsupported PublicationType: ".(ref($ref->{MedlineCitation}->{Article}->{PublicationTypeList}->{PublicationType})?
-                                               join(',',@{$ref->{MedlineCitation}->{Article}->{PublicationTypeList}->{PublicationType}}):
-                                               $ref->{MedlineCitation}->{Article}->{PublicationTypeList}->{PublicationType});
+         warn "Unsupported PublicationType: ".Dumper($ref->{MedlineCitation}->{Article}->{PublicationTypeList}->{PublicationType});
          print STDERR Dumper($ref) if $DEBUG;
          $ref_type = 'article';
      }
          print STDERR Dumper($ref) if $DEBUG;
          $ref_type = 'article';
      }
@@ -203,8 +201,8 @@ sub _create_reference_from_xml($$){
                             title      => [_fix_medline_title($ref->{MedlineCitation}->{Article}->{ArticleTitle})],
                             abstract   => [_fix_medline_abstract($ref->{MedlineCitation}->{Article}->{Abstract}->{AbstractText})],
                             journal    => [_fix_medline_journal($ref->{MedlineCitation}->{Article}->{Journal},
                             title      => [_fix_medline_title($ref->{MedlineCitation}->{Article}->{ArticleTitle})],
                             abstract   => [_fix_medline_abstract($ref->{MedlineCitation}->{Article}->{Abstract}->{AbstractText})],
                             journal    => [_fix_medline_journal($ref->{MedlineCitation}->{Article}->{Journal},
-                                                                $ref->{MedlineCitation}->{Article}->{MedlineJournalInfo},
-                                                                $ua,
+                                                     $ref->{MedlineCitation}->{MedlineJournalInfo},
+                                                     $ua,
                                                                 #@_, # configuration
                                                                )],
                             _fix_ids($ref),
                                                                 #@_, # configuration
                                                                )],
                             _fix_ids($ref),
@@ -334,47 +332,36 @@ sub _fix_medline_journal($$$;){
      #         </TranslationStack>
      # </eSearchResult>
 
      #         </TranslationStack>
      # </eSearchResult>
 
-     my $ISSN = $journal->{ISSN};
+     my $ISSN = length($journal->{ISSN})? $journal->{ISSN} : $medline_journal->{ISSNLinking};
      if (ref $ISSN) {
          $ISSN = $ISSN->{content};
      if (ref $ISSN) {
          $ISSN = $ISSN->{content};
+  }
+     if (not length($ISSN)) {
+         use Data::Printer;
+         p $journal;
+         die "No ISSN";
      }
      }
-     my $url = qq(http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=journals&term=$ISSN);
-     print STDERR "url: $url" if $DEBUG;
+     my $url = qq(http://www.ncbi.nlm.nih.gov/nlmcatalog/?term=${ISSN}[ISSN]&format=text&report=xml);
+     print STDERR "url: $url\n" if $DEBUG;
      my $request = HTTP::Request->new('GET', $url);
      my $response = $ua->request($request);
      $response = $response->content;
      my $request = HTTP::Request->new('GET', $url);
      my $response = $ua->request($request);
      $response = $response->content;
-     print STDERR "response: $response" if $DEBUG;
+     $response =~ s/\&gt;/>/gso;
+     $response =~ s/\&lt;/</gso;
+     $response =~ s/^<\/?pre>//;
 
 
-     my ($journal_id) = $response =~ m#<Id>\s*(\d+)\s*</Id>#i;
-
-     # http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=journals&id=4559
-     #      <?xml version="1.0"?>
-     # <!DOCTYPE eSummaryResult PUBLIC "-//NLM//DTD eSummaryResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eSummary_020511.dtd">
-     # <eSummaryResult>
-     # <DocSum>
-     #         <Id>4559</Id>
-     #         <Item Name="Title" Type="String">The Journal of biological chemistry.</Item>
-     #         <Item Name="MedAbbr" Type="String">J Biol Chem</Item>
-     #         <Item Name="IsoAbbr" Type="String">J. Biol. Chem.</Item>
-     #         <Item Name="NlmId" Type="String">2985121R</Item>
-     #
-     #         <Item Name="pISSN" Type="String">0021-9258</Item>
-     #         <Item Name="eISSN" Type="String">1083-351X</Item>
-     #         <Item Name="PublicationStartYear" Type="String">1905</Item>
-     #         <Item Name="PublicationEndYear" Type="String"></Item>
-     #         <Item Name="Publisher" Type="String">American Society for Biochemistry and Molecular Biology</Item>
-     #         <Item Name="Language" Type="String">eng</Item>
-     #
-     #         <Item Name="Country" Type="String">United States</Item>
-     # </DocSum>
-     #
-     # </eSummaryResult>
-     $url = qq(http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=journals&id=$journal_id);
-     print STDERR "url: $url" if $DEBUG;
-     $request = HTTP::Request->new('GET', $url);
-     $response = $ua->request($request);
-     $response = $response->content;
-     print STDERR "response: $response" if $DEBUG;
+     print STDERR "response: $response\n" if $DEBUG;
+     my $xa = new XML::Simple;
+     my $ref_struct = $xa->XMLin($response,ForceArray=>['NCBICatalogRecord'],KeyAttr=>[]);
+     my $ref;
+     for (0..@{$ref_struct->{NCBICatalogRecord}}) {
+         if (exists $ref_struct->{NCBICatalogRecord}[$_]{JrXml}) {
+             $ref = $ref_struct->{NCBICatalogRecord}[$_];
+             last;
+         }
+     }
+     $ref = $ref_struct->{NCBICatalogRecord}[0] if not defined $ref;
+     print STDERR Dumper($ref) if $DEBUG;
 
      my %journal;
      while ($response =~ m{^\s*(?:(?:<id>\s*(\d+)</id>)| # Match ids
 
      my %journal;
      while ($response =~ m{^\s*(?:(?:<id>\s*(\d+)</id>)| # Match ids
@@ -386,20 +373,24 @@ sub _fix_medline_journal($$$;){
          else {
               $journal{lc($2)} = $3;
          }
          else {
               $journal{lc($2)} = $3;
          }
+  }
+     print STDERR Dumper($ref) if $DEBUG;
+     if (ref $ref->{JrXml}{Serial}{ISSN} ne 'ARRAY') {
+         $ref->{JrXml}{Serial}{ISSN} = [$ref->{JrXml}{Serial}{ISSN}];
      }
      }
-     my %journal_mapping = (title       => q(title),
-                           medlineabbr => q(medabbr),
-                           isoabbr     => q(isoabbr),
-                           nlmid       => q(nlmid),
-                           issn        => q(pissn),
-                           eissn       => q(eissn),
-                           publisher   => q(publisher),
-                           pmid    => q(id)
-                          );
-     my @journal_entry;
-     foreach my $key (keys %journal_mapping) {
-         push @journal_entry,($key=>$journal{$journal_mapping{$key}});
-     }
+     print STDERR Dumper($ref->{JrXml}) if $DEBUG;
+     my $print_issn = $ref->{JrXml}{Serial}{ISSN}[0]{IssnType} eq 'Print' ? 0 : 1;
+     my @journal_entry =
+         (title       => $ref->{JrXml}{Serial}{Title},
+          medlineabbr => $ref->{JrXml}{Serial}{MedlineTA},
+          isoabbr     => $ref->{JrXml}{Serial}{ISOAbbreviation},
+          nlmid       => $ref->{JrXml}{Serial}{NlmUniqueID},
+          issn        => $ref->{JrXml}{Serial}{ISSN}[$print_issn]{ISSN},
+          eissn       => $ref->{JrXml}{Serial}{ISSN}[1-$print_issn]{ISSN},
+          publisher   => $ref->{JrXml}{Serial}{publicationInfo}{Publisher},
+          pmid        => $ref->{JrXml}{id},
+         );
+     print STDERR Dumper(\@journal_entry) if $DEBUG;
      return @journal_entry;
 }
 
      return @journal_entry;
 }
 
@@ -446,7 +437,7 @@ sub _fix_medline_pages($){
      if (not defined $start) {
          ($start) = $pagination =~ /(\d+)/
      }
      if (not defined $start) {
          ($start) = $pagination =~ /(\d+)/
      }
-     if ($start > $stop and defined $stop) {
+     if ($start > $stop and defined $stop and length($stop)) {
          # this must be a reduced page listing; fix it up
          $stop+=$start - $start % 10 ** (int(log($stop)/log(10))+1);
      }
          # this must be a reduced page listing; fix it up
          $stop+=$start - $start % 10 ** (int(log($stop)/log(10))+1);
      }