# This module is part of da_reference, and is released under the terms # of the GPL version 2, or any later version, at your option. See the # file README and COPYING for more information. # Copyright 2004 by Don Armstrong . # $Id$ package Reference::Output::Bibtex; =head1 NAME Reference::Output::Bibtex -- Output references in BibTeX format =head1 SYNOPSIS print bibtex($reference); Returns a reference formatted in bibtex format. =head1 DESCRIPTION Knows how to handle the reference-> bibtex field mapping for many reference types, but overridden types may need to provide their own mapping. =head1 BUGS None known. =cut use warnings; use strict; use vars qw($REVISION $DEBUG @EXPORT @EXPORT_OK %EXPORT_TAGS); use base qw(Exporter); use Encode qw(encode_utf8 is_utf8 decode decode_utf8); use Text::Iconv; BEGIN{ ($REVISION) = q$LastChangedRevision$ =~ /\$LastChangedRevision:\s+([^\s+])/; $DEBUG = 0 unless defined $DEBUG; @EXPORT = qw(bibtex); @EXPORT_OK = qw(); %EXPORT_TAGS = (output => [qw(bibtex)], ); Exporter::export_ok_tags(qw(output)); $EXPORT_TAGS{all} = [@EXPORT_OK]; } # Assigned and discussed at the end of this file my %bibtex_mapping; use Carp; use Params::Validate qw(:types validate_with); use Text::Wrap; =head2 bibtex print bibtex $reference; %bibtex = bibtex $reference; print bibtex($reference,mapping=>{...}) In scalar context, returns a formatted bibtex entry, suitable for printing. In list context, returns a hash of key, value pairs which can be used to print a formatted bibtex entry. You can also pass an optional mapping to be used for making the bibtex entry. See B for the details. The mappings are obeyed in the following order, the first taking precedence over the last. =over =item Passed mapping =item Object's bibtex_mapping =item Internal bibtex_mapping (%Reference::Output::Bibtex::bibtex_mapping) =back Returns a SCALAR bibtex reference in scalar context, a HASH bibtex reference in list context =cut sub bibtex{ my $reference = shift; # Parse options if any my %param = validate_with(params => \@_, spec => {mapping => {type => HASHREF, optional => 1, }, }, ); my $mapping = undef; # Use our mapping by default if it exists $mapping = $bibtex_mapping{lc($reference->{type})} if exists $bibtex_mapping{lc($reference->{type})}; # Override that with the module's mapping $mapping = $reference->{bibtex_mapping} if exists $reference->{bibtex_mapping}; # Finally, override everything with passed mapping $mapping = $param{mapping} if exists $param{mapping}; if (not defined $mapping) { carp "This reference type doesn't support bibtex output."; return undef; } my %bibtex_entry; foreach my $bibtex_field (keys %{$mapping->{mapping}}) { my $params = []; if (ref $bibtex_field) { $params = $$bibtex_field{params} if exists $$bibtex_field{params}; $bibtex_field = $$bibtex_field{field}; } my $function = $reference->can($mapping->{mapping}->{$bibtex_field}); next unless $function; $bibtex_entry{$bibtex_field} = &{$function}($reference,output=>'bibtex',@$params); # dereference the entries if necessesary. next unless wantarray; # Make new copies of the entries if necessary so we can # mogrify to our hearts content. if (ref($bibtex_entry{$bibtex_field}) eq 'HASH') { $bibtex_entry{$bibtex_field} = {%{$bibtex_entry{$bibtex_field}}}; } elsif (ref($bibtex_entry{$bibtex_field}) eq 'ARRAY') { $bibtex_entry{$bibtex_field} = [@{$bibtex_entry{$bibtex_field}}]; } } # Return the entries in hash form if desired. return %bibtex_entry if wantarray; # Ok, stich the bibtex entry together... my $bibtex_entry; $bibtex_entry = '@'.$mapping->{order}[0].'{'.$bibtex_entry{$mapping->{order}[0]}.",\n"; foreach my $bibtex_field (@{$mapping->{order}}[1..$#{$mapping->{order}}]) { next unless defined $bibtex_entry{$bibtex_field}; if (ref $bibtex_entry{$bibtex_field} eq 'ARRAY') { if (ref $mapping->{mapping}{$bibtex_field}) { if (exists $mapping->{mapping}{$bibtex_field}{code}) { local $_ = $bibtex_entry{$bibtex_field}; eval $mapping->{mapping}{$bibtex_field}{code}; carp "Error while executing code to assemble bibtex entry: $@" if $@; } elsif (exists $mapping->{mapping}{$bibtex_field}{join}) { $bibtex_entry{$bibtex_field} = join($mapping->{mapping}{$bibtex_field}{join}, @{$bibtex_entry{$bibtex_field}}); } else { carp "$bibtex_field is an ARRAYREF, joining using commas"; $bibtex_entry{$bibtex_field} = join(', ', @{$bibtex_entry{$bibtex_field}}); } } else { carp "$bibtex_field is an ARRAYREF, joining using commas"; $bibtex_entry{$bibtex_field} = join(', ', @{$bibtex_entry{$bibtex_field}}); } } my $entry = $bibtex_entry{$bibtex_field}; $entry =~ s/%/\\%/g; $entry = encode_utf8(convert_to_utf8($entry)); my $start = "{"; my $stop = "}"; if ($bibtex_field eq 'journal') { $start = ""; $stop = ""; } $bibtex_entry .= wrap(' ' x 4,' ' x 8 . ' ' x length($bibtex_field),"$bibtex_field = $start".$entry."$stop,\n"); } $bibtex_entry .= "}\n"; return $bibtex_entry; } =head2 bibtex_mapping $Reference::Output::Bibtex::bibtex_mapping{Article} = {mapping => {author => {field => 'author', join => ' and ', params => [], }, volume => 'volume', Articlce => 'name', foo => 'bar', }, order => [qw(name author volume foo)], }; This variable holds the mapping to bibtex output. Each type of reference has its own keys. Currently the following types are supported by the Bibtex output method: =over =item article =item collection =item book =back If you wish to add support for your own custom reference type, you merely need to add a bibtex_mapping element to your class's hashref, or add to this variable. [Preferbly the former, as the latter should only be used by the end user.] The mapping key in the reference type hashref is a hashref containing key value pairs according to the following metric: =over =item If the mapping key value is not a reference, the value is used as the name function to call via C<$reference->field>. [In the example above, the volume mapping is built by a call to C<$reference->volume>]. =item If the mapping key value is a hashref, the hashref contains two keys. The C key contains the name of the function to call. The C key contains the parameters =back The order key in the reference type hashref is an arrayref which defines the order in which keys are listed in the BibTeX output. Values in the arrayref should be the keys of the mapping hashref. [The first value listed is the type of reference/reference name pair.] =cut %bibtex_mapping = (article => {mapping => {Article => 'name', author => 'author', title => 'title', journal => 'journal', year => 'year', key => 'keywords', volume => 'volume', number => 'number', pages => 'pages', month => 'month', abstract => 'abstract', pmid => 'pmid', mlid => 'medline_id', doi => 'doi', html => 'html', pdf => 'pdf', }, order => [qw(Article author title journal year key volume number pages month abstract pmid mlid doi html pdf), ], }, book => {mapping => {Book => 'name', author => 'author', title => 'title', year => 'year', key => 'keywords', volume => 'volume', number => 'number', pages => 'pages', month => 'month', abstract => 'abstract', doi => 'doi', # html => 'html', # pdf => 'pdf', }, order => [qw(Article author title journal year key volume number pages month abstract doi html pdf), ], }, ); =head2 convert_to_utf8 $utf8 = convert_to_utf8("text","charset"); =cut sub convert_to_utf8 { my ($data,$charset,$internal_call) = @_; $internal_call //= 0; if (is_utf8($data)) { # cluck("utf8 flag is set when calling convert_to_utf8"); return $data; } $charset = uc($charset//'UTF-8'); if ($charset eq 'RAW') { # croak("Charset must not be raw when calling convert_to_utf8"); } my $iconv_converter; eval { $iconv_converter = Text::Iconv->new($charset,"UTF-8") or die "Unable to create converter for '$charset'"; }; if ($@) { return undef if $internal_call; warn $@; # We weren't able to create the converter, so use Encode # instead return __fallback_convert_to_utf8($data,$charset); } my $converted_data = $iconv_converter->convert($data); # if the conversion failed, retval will be undefined or perhaps # -1. my $retval = $iconv_converter->retval(); if (not defined $retval or $retval < 0 ) { # try iso8559-1 first if (not $internal_call) { my $call_back_data = convert_to_utf8($data,'ISO8859-1',1); # if there's an à (0xC3), it's probably something # horrible, and we shouldn't try to convert it. if (defined $call_back_data and $call_back_data !~ /\x{C3}/) { # warn "failed to convert to utf8 (charset: $charset, data: $data), but succeeded with ISO8859-1: ".encode_utf8($call_back_data); return $call_back_data; } } warn "failed to convert to utf8 (charset: $charset, data: $data)"; # Fallback to encode, which will probably also fail. return __fallback_convert_to_utf8($data,$charset); } return decode("UTF-8",$converted_data); } # this returns data in perl's internal encoding sub __fallback_convert_to_utf8 { my ($data, $charset) = @_; # raw data just gets returned (that's the charset WordDecorder # uses when it doesn't know what to do) return $data if $charset eq 'raw'; if (not defined $charset and not is_utf8($data)) { warn ("Undefined charset, and string '$data' is not in perl's internal encoding"); return $data; } # lets assume everything that doesn't have a charset is utf8 $charset //= 'utf8'; my $result; eval { $result = decode($charset,$data,0); }; if ($@) { warn "Unable to decode charset; '$charset' and '$data': $@"; return $data; } return $result; } 1; __END__