]> git.donarmstrong.com Git - reference.git/blobdiff - blib/lib/Reference/Retrieve/PubMed.pm
delete built files which never should have been in the archive
[reference.git] / blib / lib / Reference / Retrieve / PubMed.pm
diff --git a/blib/lib/Reference/Retrieve/PubMed.pm b/blib/lib/Reference/Retrieve/PubMed.pm
deleted file mode 100644 (file)
index 553245d..0000000
+++ /dev/null
@@ -1,580 +0,0 @@
-# This module is part of Refence, and is released under the terms of
-# the GPL version 2, or any later version. See the file README and
-# COPYING for more information.
-# Copyright 2003 by Don Armstrong <don@donarmstrong.com>.
-# $Id: PubMed.pm 45 2013-09-10 18:05:31Z don $
-
-package Reference::Retrieve::PubMed;
-
-=head1 NAME
-
-Reference::Retrieve::PubMed -- Reference Retrieval from PubMed
-
-=head1 SYNOPSIS
-
-     my $reference = Reference::Retrieve::PubMed::get_reference(-pmid=>123456);
-     my @references = Reference::Retrieve::PubMed::get_reference(-query=>'John Smith[AUTHOR] AND 230[Pages]',limit=>50);
-
-
-=head1 DESCRIPTION
-
-Uh. Retreives references from pubmed. Yeah.
-
-=head1 BUGS
-
-None known.
-
-=cut
-
-
-use strict;
-use vars qw($REVISION $DEBUG);
-use Carp;
-
-use LWP::UserAgent;
-use XML::Simple qw(:strict);
-use Reference;
-
-use HTML::Entities;
-
-use Params::Validate qw(:types validate_with);
-
-BEGIN{
-     ($REVISION) = q$LastChangedRevision: 45 $ =~ /\$LastChangedRevision:\s+([^\s+])/;
-     $DEBUG = 0 unless defined $DEBUG;
-}
-
-
-=head2 get_reference
-
-=head3 Usage
-
-     my $reference = Reference::Retrieve::PubMed::get_reference(-pmid=>123456);
-     my @references = Reference::Retrieve::PubMed::get_reference(-query=>'John Smith[AUTHOR] AND 230[Pages]',-limit=>50);
-     my @references = Reference::Retrieve::PubMed::get_reference(-query=>{author=>'John Smith', pages=>'230'},-limit=>50)
-
-=head3 Function
-
-Retrives a reference from pubmed
-
-=head3 Returns
-
-In scalar context, effectively assumes -limit=>1 and returns the
-highest listed reference according to the order, etc. [Probably only
-usefull with -pmid.] In list context, returns all results (or until it
-hits the -limit.)
-
-=head3 Args
-
-list of arguments to select a reference or collection of references from.
-
-
-=cut
-
-sub get_reference{
-     my %options = validate_with(params => @_,
-                                spec   => {pubmed_site  => {default => 'http://www.ncbi.nlm.nih.gov'},
-                                           pmid_query   => {default => '/entrez/query.fcgi?cmd=Text&db=PubMed&dopt=XML&uid='},
-                                           search_query => {default => '/htbin-post/Entrez/query?db=m&form=4&dispmax=100&html=no&dopt=u&term='},
-                                           ua_agent     => {default => "DA Reference::Retreive::PubMed/$REVISION"},
-                                           email        => {default => "don+referenceretrieve$REVISION\@donarmstrong.com"},
-                                          },
-                                allow_extra => 1,
-                               );
-     my $ua = new LWP::UserAgent(agent => $options{ua_agent});
-}
-
-sub get_reference_by_pmid($;@){
-     my %options = validate_with(params => \@_,
-                                spec   => {pmid => {type => SCALAR|ARRAYREF,
-                                                    #regex => qr/^\d+$/,
-                                                   },
-                                           pubmed_site  => {default => 'http://www.ncbi.nlm.nih.gov'},
-                                           pmid_query   => {default => '/entrez/eutils/efetch.fcgi?db=pubmed&retmode=xml&rettype=full&id='},
-                                           search_query => {default => '/htbin-post/Entrez/query?db=m&form=4&dispmax=100&html=no&dopt=u&term='},
-                                           ua_agent     => {default => "DA Reference::Retreive::PubMed/$REVISION"},
-                                           email        => {default => "don+referenceretrieve$REVISION\@donarmstrong.com"},
-                                           useragent    => {optional => 1},
-                                          },
-                                allow_extra => 1,
-                               );
-     my $pmid = $options{pmid};
-
-     my $ua;
-     if ($options{useragent}) {
-         $ua = $options{useragent};
-     }
-     else {
-         $ua = new LWP::UserAgent(agent=>$options{ua_agent});
-     }
-     my $url = "$options{pubmed_site}$options{pmid_query}" . (ref($pmid) ? (join('&id=',@{$pmid})) : $pmid);
-     print STDERR "url: $url" if $DEBUG;
-     my $request = HTTP::Request->new('GET', $url);
-     my $response = $ua->request($request);
-     $response = $response->content;
-     print STDERR "response: $response" if $DEBUG;
-
-     # For some dumb reason, they send us xml with html
-     # entities. Ditch them.
-     #$response = decode_entities($response);
-     # It's even more freaking broken; they don't double encode them.
-     #$response =~ s/\&gt;(\s|$)/>$1/gso;
-     #$response =~ s/(?:(\s)\&lt;|&lt;(\/))/$1<$2/gso;
-     $response =~ s/&quot;/"/gso;
-
-     # Ditch any doctype
-     $response =~ s/^\s*<\?xml[^>]+>\s*//gso;
-     $response =~ s/^\s*<\!DOCTYPE[^>]+>\s*//gso;
-     # There is also a Pubmedarticleset
-     $response =~ s/^\s*<PubmedArticleSet>\s*//gso;
-     $response =~ s#</PubmedArticleSet>\s*$##gso;
-
-     # Add the opt so we get an array of PubMedArticle
-     $response = "<opt>$response</opt>";
-
-     print STDERR $response if $DEBUG;
-
-     # Figure out if there was an error in the search.
-
-     # Response should be in XML. Parse it.
-     my $xa = new XML::Simple;
-
-     my $ref_struct = $xa->XMLin($response, ForceArray => [ 'PubmedArticle' ],KeyAttr=>[]);
-
-     use Data::Dumper;
-     print STDERR Dumper($ref_struct) if $DEBUG;
-     # Handle the XML structure
-     my @references;
-     foreach my $ref (@{$ref_struct->{PubmedArticle}}) {
-         my $reference =  _create_reference_from_xml($ref,$ua);
-         if (not defined $reference) {
-              warn "Unable to create reference for $ref->{MedlineCitation}->{PMID}\n";
-         }
-         push @references, $reference;
-     }
-     if (wantarray) {
-         return @references;
-     }
-     return $references[0];
-}
-
-sub _create_reference_from_xml($$){
-     my ($ref,$ua) = @_;
-
-     # Figure out what type of reference this is. We only support
-     # Journal Articles right now.
-     my $types = {'journal article'=>'article',
-                 'letter'         =>'article',
-                  'editorial' => 'article',
-                  'review' => 'article',
-                };
-     my $ref_type = undef;
-     my $reference = undef;
-     foreach my $type (keys %{$types}) {
-         if (ref($ref->{MedlineCitation}->{Article}->{PublicationTypeList}->{PublicationType}) eq 'ARRAY'){
-              my $pubtypes;
-              @{$pubtypes}{map {lc} @{$ref->{MedlineCitation}->{Article}->{PublicationTypeList}->{PublicationType}}} =
-                   (1) x @{$ref->{MedlineCitation}->{Article}->{PublicationTypeList}->{PublicationType}};
-              if ($pubtypes->{$type}) {
-                   $ref_type = $types->{$type};
-                   last;
-              }
-              else {
-                   next;
-              }
-         }
-         elsif (lc($ref->{MedlineCitation}->{Article}->{PublicationTypeList}->{PublicationType}) eq lc($type)) {
-              $ref_type = $types->{$type};
-              last;
-         }
-     }
-     if (not defined $ref_type) {
-         warn "Unsupported PublicationType: ".(ref($ref->{MedlineCitation}->{Article}->{PublicationTypeList}->{PublicationType})?
-                                               join(',',@{$ref->{MedlineCitation}->{Article}->{PublicationTypeList}->{PublicationType}}):
-                                               $ref->{MedlineCitation}->{Article}->{PublicationTypeList}->{PublicationType});
-         print STDERR Dumper($ref) if $DEBUG;
-         $ref_type = 'article';
-     }
-     local $_ = $ref_type;
-     if (/article/) {
-         use Reference::Type::Article;
-         $reference = new Reference::Type::Article;
-         my $xml_mapping = {author     => [ _fix_medline_authors($ref->{MedlineCitation}->{Article}->{AuthorList}) ],
-                            title      => [_fix_medline_title($ref->{MedlineCitation}->{Article}->{ArticleTitle})],
-                            abstract   => [_fix_medline_abstract($ref->{MedlineCitation}->{Article}->{Abstract}->{AbstractText})],
-                            journal    => [_fix_medline_journal($ref->{MedlineCitation}->{Article}->{Journal},
-                                                                $ref->{MedlineCitation}->{Article}->{MedlineJournalInfo},
-                                                                $ua,
-                                                                #@_, # configuration
-                                                               )],
-                            _fix_ids($ref),
-                            # pmid       => $ref->{MedlineCitation}->{PMID},
-                            # medline_id => $ref->{MedlineCitation}->{MedlineID},
-                            volume     => [_fix_medline_ditch_empty($ref->{MedlineCitation}->{Article}->{Journal}->{JournalIssue}->{Volume})],
-                            date       => [_fix_medline_pubdate($ref->{MedlineCitation}->{Article}->{Journal}->{JournalIssue}->{PubDate})],
-                            number     => [_fix_medline_ditch_empty($ref->{MedlineCitation}->{Article}->{Journal}->{JournalIssue}->{Issue})],
-                            pages      => [_fix_medline_pages($ref->{MedlineCitation}->{Article}->{Pagination}->{MedlinePgn})],
-#                           keywords   => [_fix_medline_keywords($ref->{MedlineCitation}->{MeshHeadingList},
-#                                                                $ref->{MedlineCitation}->{ChemicalList},
-#                                                               )],
-#                           &_find_pubmed_links($ref->{MedlineCitation}->{PMID},$ua),
-                           };
-         # Deal with author
-
-         foreach my $reference_key (keys %{$xml_mapping}) {
-              my $method = $reference->can($reference_key);
-              die "Reference::Type::Article was unable to handle $reference_key" if not $method;
-              if (defined $xml_mapping->{$reference_key} and $method) {
-                   if (ref($xml_mapping->{$reference_key})) {
-                &{$method}($reference,@{$xml_mapping->{$reference_key}});
-                   }
-                   else {
-                        &{$method}($reference,$xml_mapping->{$reference_key});
-                   }
-              }
-              else {
-                   warn "Reference_key $reference_key was not defined or unable to handle type of key."
-                        if not defined $xml_mapping->{$reference_key} and $DEBUG;
-              }
-         }
-         return $reference;
-     }
-}
-
-sub _fix_medline_title($){
-     my $title = shift;
-
-     $title =~ s/\.$//;
-     return $title;
-}
-
-sub _fix_medline_abstract{
-    my $abstract = shift;
-    my $ret = '';
-    if (ref($abstract) and ref($abstract) eq 'ARRAY') {
-        for my $element (@{$abstract}) {
-            $ret .= "\n" if length $ret;
-            $ret .= $element->{Label}.': '.$element->{content};
-        }
-        return $ret;
-    } else {
-        return $abstract;
-    }
-}
-
-
-sub _fix_medline_authors($){
-     my $author_list = shift;
-     $author_list = $author_list->{Author};
-     my @authors;
-     $author_list = [$author_list] if ref($author_list) ne 'ARRAY';
-     foreach my $author (@{$author_list}) {
-         my %au;
-         $au{first} = $author->{ForeName} if exists $author->{ForeName};
-         $au{last}  = $author->{LastName} if exists $author->{LastName};
-         $au{initials} = $author->{Initials} if exists $author->{Initials};
-         $au{full};
-         push @authors,\%au;
-     }
-     return (author=>\@authors);
-}
-
-=head2 _fix_medline_journal
-
-=head3 Usage
-
-     $reference->journal(_fix_medline_journal($ref->{MedlineCitation}->{Article}->{Journal},
-                                             $ref->{MedlineCitation}->{Article}->{MedlineJournalInfo},
-                                             $ua,));
-
-=head3 Function
-
-From the medline citation informatino returns a properly formatted
-list of information for the journal reference listing.
-
-=head3 Args
-
-Journal information hashref
-
-medline journal information hashref
-
-user agent
-
-=cut
-
-sub _fix_medline_journal($$$;){
-     my ($journal,$medline_journal,$ua) = @_;
-     # journal takes fullname, issn, medlineabbr, pmid, and nlmuid
-     # Try to supply as much as possible.
-     # Use esearch to get pmjournalid
-     # http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=journals&term=0021-9258
-     # use esummary to retreive the journalid
-     # <?xml version="1.0"?>
-     # <!DOCTYPE eSearchResult PUBLIC "-//NLM//DTD eSearchResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eSearch_020511.dtd">
-     # <eSearchResult>
-     #         <Count>1</Count>
-     #         <RetMax>1</RetMax>
-     #         <RetStart>0</RetStart>
-     #         <IdList>
-     #                 <Id>4559</Id>
-     #
-     #         </IdList>
-     #         <TranslationSet>
-     #         </TranslationSet>
-     #         <TranslationStack>
-     #                 <TermSet>
-     #                         <Term>0021-9258[All Fields]</Term>
-     #                         <Field>All Fields</Field>
-     #                         <Count>1</Count>
-     #
-     #                         <Explode>Y</Explode>
-     #                 </TermSet>
-     #         </TranslationStack>
-     # </eSearchResult>
-
-     my $ISSN = $journal->{ISSN};
-     if (ref $ISSN) {
-         $ISSN = $ISSN->{content};
-     }
-     my $url = qq(http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=journals&term=$ISSN);
-     print STDERR "url: $url" if $DEBUG;
-     my $request = HTTP::Request->new('GET', $url);
-     my $response = $ua->request($request);
-     $response = $response->content;
-     print STDERR "response: $response" if $DEBUG;
-
-     my ($journal_id) = $response =~ m#<Id>\s*(\d+)\s*</Id>#i;
-
-     # http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=journals&id=4559
-     #      <?xml version="1.0"?>
-     # <!DOCTYPE eSummaryResult PUBLIC "-//NLM//DTD eSummaryResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eSummary_020511.dtd">
-     # <eSummaryResult>
-     # <DocSum>
-     #         <Id>4559</Id>
-     #         <Item Name="Title" Type="String">The Journal of biological chemistry.</Item>
-     #         <Item Name="MedAbbr" Type="String">J Biol Chem</Item>
-     #         <Item Name="IsoAbbr" Type="String">J. Biol. Chem.</Item>
-     #         <Item Name="NlmId" Type="String">2985121R</Item>
-     #
-     #         <Item Name="pISSN" Type="String">0021-9258</Item>
-     #         <Item Name="eISSN" Type="String">1083-351X</Item>
-     #         <Item Name="PublicationStartYear" Type="String">1905</Item>
-     #         <Item Name="PublicationEndYear" Type="String"></Item>
-     #         <Item Name="Publisher" Type="String">American Society for Biochemistry and Molecular Biology</Item>
-     #         <Item Name="Language" Type="String">eng</Item>
-     #
-     #         <Item Name="Country" Type="String">United States</Item>
-     # </DocSum>
-     #
-     # </eSummaryResult>
-     $url = qq(http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=journals&id=$journal_id);
-     print STDERR "url: $url" if $DEBUG;
-     $request = HTTP::Request->new('GET', $url);
-     $response = $ua->request($request);
-     $response = $response->content;
-     print STDERR "response: $response" if $DEBUG;
-
-     my %journal;
-     while ($response =~ m{^\s*(?:(?:<id>\s*(\d+)</id>)| # Match ids
-                          (?:<item\s+name=\"([^\"]+)\"\s+Type=\"String\">\s*([^<]+?)</item>))\s* # Match item Name clauses
-                          $}ixmg) {
-         if (not defined $2) {
-              $journal{id} = $1;
-         }
-         else {
-              $journal{lc($2)} = $3;
-         }
-     }
-     my %journal_mapping = (title       => q(title),
-                           medlineabbr => q(medabbr),
-                           isoabbr     => q(isoabbr),
-                           nlmid       => q(nlmid),
-                           issn        => q(pissn),
-                           eissn       => q(eissn),
-                           publisher   => q(publisher),
-                           pmid    => q(id)
-                          );
-     my @journal_entry;
-     foreach my $key (keys %journal_mapping) {
-         push @journal_entry,($key=>$journal{$journal_mapping{$key}});
-     }
-     return @journal_entry;
-}
-
-=head2 
-
-=head3 Usage
-
-     $reference->date(_fix_medline_pubdate($ref->{MedlineCitation}->{Article}->{Journal}->{JournalIssue}->{PubDate}));
-
-=head3 Function
-
-=head3 Returns
-
-=head3 Args
-
-=cut
-
-sub _fix_medline_pubdate($){
-     my ($date) = shift;
-     return (year=>$date->{Year},month=>$date->{Month},day=>$date->{Day}) if exists $date->{Year};
-     # Ok... punt.
-     if (exists $date->{MedlineDate}) {
-         my ($year,$month,$day) = split /\s+/,$date->{MedlineDate};
-         return (year=>$year,month=>$month,day=>$day)
-     }
-}
-
-=head2 _fix_medline_pages
-
-=head3 Usage
-
-     pages      => [_fix_medline_pages($ref->{MedlineCitation}->{Article}->{Pagination}->{MedlinePgn})],
-
-=head3 Function
-
-Returns output with a list of pages appropriate for an Article type of
-reference.
-
-=cut
-
-sub _fix_medline_pages($){
-     my ($pagination) = @_;
-     my ($start,$stop) = $pagination =~ /(\d*)\s*\-\s*(\d*)/;
-     if (not defined $start) {
-         ($start) = $pagination =~ /(\d+)/
-     }
-     if ($start > $stop and defined $stop) {
-         # this must be a reduced page listing; fix it up
-         $stop+=$start - $start % 10 ** (int(log($stop)/log(10))+1);
-     }
-     my @return;
-     push @return, (start=>$start) if defined $start and $start ne '';
-     push @return, (stop=>$stop) if defined $stop and $stop ne '';
-     return @return;
-}
-
-sub _find_pubmed_links($$){
-     my ($pmid,$ua) = @_;
-     return ();
-     #http://eutils.ncbi.nlm.nih.gov/entrez/query/static/elink_help.html
-     my $url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=pubmed&cmd=llinks&id=" . (ref($pmid) ? (join('&id=',@{$pmid})) : $pmid);
-     print STDERR "url: $url" if $DEBUG;
-     my $request = HTTP::Request->new('GET', $url);
-     my $response = $ua->request($request);
-     $response = $response->content;
-     print STDERR "response: $response" if $DEBUG;
-
-     # Response should be in XML. Parse it.
-     my $xa = new XML::Simple;
-
-     my $ref_struct = $xa->XMLin($response, ForceArray => ['IdUrlSet'], KeyAttr=>[]);
-
-     use Data::Dumper;
-     print STDERR Dumper($ref_struct);# if $DEBUG;
-     # Rearange data around Id.
-     my $links = {};
-     map {$links->{$_->{Id}}=$_->{ObjUrl}} @{$ref_struct->{LinkSet}->{IdUrlList}->{IdUrlSet}};
-     foreach my $obj_url (@{$links->{$pmid}->{ObjUrl}}) {
-         next unless $obj_url->{SubjectType} = 'publishers/providers';
-         #@links = _find_links_from_url($obj_url->{Url},$ua);
-     }
-     # Find publisher link
-     # If no publisher link, use the first aggregator link.
-}
-
-=head2 _fix_ids
-
-     _fix_ids
-
-
-
-=cut
-
-sub _fix_ids {
-     my ($ref) = @_;
-
-     my %ids_known = (medline => 'medline_id',
-                     pubmed  => 'pmid',
-                     doi     => 'doi',
-                    );
-     my %ids;
-     if (exists $ref->{PubmedData}{ArticleIdList}{ArticleId}) {
-         for my $art_id (ref($ref->{PubmedData}{ArticleIdList}{ArticleId}) eq 'ARRAY' ?
-              @{$ref->{PubmedData}{ArticleIdList}{ArticleId}}:
-                   ($ref->{PubmedData}{ArticleIdList}{ArticleId})) {
-              if (exists $ids_known{$art_id->{IdType}}) {
-                   $ids{$ids_known{$art_id->{IdType}}} = $art_id->{content};
-              }
-         }
-     }
-     if (not exists $ids{pmid}) {
-         $ids{pmid} = $ref->{MedlineCitation}->{PMID} if defined $ref->{MedlineCitation}->{PMID};
-     }
-     if (not exists $ids{medline_id}) {
-         $ids{medline_id} = $ref->{MedlineCitation}->{MedlineID} if defined $ref->{MedlineCitation}->{MedlineID};
-     }
-     return %ids;
-}
-
-
-=head2 _find_links_from_url
-
-=head3 Usage
-
-=head3 Function
-
-=head3 Returns
-
-=head3 Args
-
-=cut
-
-sub _find_links_from_url($$){
-     my ($link,$ua) = @_;
-
-     
-     
-}
-
-sub _fix_medline_ditch_empty($){
-     my ($value) = @_;
-
-     if (ref($value)) {
-         if (ref($value) eq 'HASH') {
-              if (scalar keys %{$value} > 0) {
-                   return $value;
-              }
-              else {
-                   return ();
-              }
-         }
-         elsif (ref($value) eq 'ARRAY') {
-              if (scalar @{$value} > 0) {
-                   return $value;
-              }
-              else {
-                   return ();
-              }
-         }
-         else {
-              return ();
-         }
-     }
-     else {
-         return $value if defined $value;
-         return ();
-     }
-}
-
-
-1;
-
-
-__END__
-
-
-
-
-
-