#!/usr/bin/perl # bibtex_to_paper opens the paper corresponding to a bibtex key # and is released under the terms of the GNU GPL version 3, or any # later version, at your option. See the file README and COPYING for # more information. # Copyright 2014 by Don Armstrong . use warnings; use strict; use Getopt::Long; use Pod::Usage; use File::Find; use File::Basename qw(basename); use File::Spec qw(rel2abs); use Text::BibTeX; use User; use Data::Printer; use POSIX; use DBI; =head1 NAME bibtex_to_paper - opens the paper corresponding to a bibtex key =head1 SYNOPSIS bibtex_to_paper [options] bibtexkey Options: --bibtex, -b bibtex file to look up key in --bibtex-cache, -c bibtex cache file --build-cache, -B build cache using bibtex files --search-by-pmid Search term is a pmid instead of a bibtex key --pdf-dir pdf directory --pdfviewer, -p pdf viewer to use --only-print Only print PDF file name --debug, -d debugging level (Default 0) --help, -h display this help --man, -m display manual =head1 OPTIONS =over =item B<--bibtex, -b> Bibtex file to look key up in =item B<--bibtex-cache, -c> Bibtex cache file; rebuilt if bibtex file changes =item B<--pdfviewer, -p> PDF viewer to use; defaults to evince unless a .xoj exists, in which case xournal is used. =item B<--only-print> Only print the PDF file name, don't open it. =item B<--debug, -d> Debug verbosity. (Default 0) =item B<--help, -h> Display brief usage information. =item B<--man, -m> Display this manual. =back =head1 EXAMPLES bibtex_to_paper =cut use vars qw($DEBUG); my %options = (debug => 0, help => 0, man => 0, only_print => 0, search_by_pmid => 0, search_by_file => 0, use_git => 1, 'bibtex_cache' => File::Spec->catfile(User->Home,'.bibtex_to_paper_cache'), ); GetOptions(\%options, 'build_cache|build-cache!', 'bibtex|b=s@', 'bibtex_cache|bibtex-cache|c=s', 'pdfviewer|p=s', 'use_git|use-git!', 'only_print|only-print!', 'search_by_pmid|search-by-pmid!', 'search_by_file|search-by-file!', 'clear_cache|clear-cache!', 'papers_directory|papers-directory=s@', 'debug|d+','help|h|?','man|m'); pod2usage() if $options{help}; pod2usage({verbose=>2}) if $options{man}; $DEBUG = $options{debug}; my @USAGE_ERRORS; if (not exists $options{bibtex} and not exists $options{bibtex_cache}) { push @USAGE_ERRORS, "You must give at least one of --bibtex". "or --bibtex-cache"; } pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS; main(); sub main{ my $dbh; my $sth; if (exists $options{bibtex_cache}) { my $initialize = 0; if (-e $options{bibtex_cache}) { ($dbh,$sth) = open_cache($options{bibtex_cache}); } else { ($dbh,$sth) = initialize_database($options{bibtex_cache}); } } if (exists $options{clear_cache}) { clear_cache($dbh,$sth); } my %entries; if (exists $options{build_cache}) { $options{bibtex} //= []; $options{bibtex} = [@ARGV, @{ref $options{bibtex}?$options{bibtex}:[$options{bibtex}]}, ]; @ARGV = (); } if (exists $options{bibtex}) { for my $bibtex_file (@{ref $options{bibtex}?$options{bibtex}:[$options{bibtex}]}) { parse_bibtex_file($bibtex_file,\%entries); } } if (exists $options{papers_directory} and defined $dbh ) { $dbh->begin_work; load_papers_into_database($dbh,$sth,$options{papers_directory}); $dbh->commit; } p %entries if $DEBUG; if (keys %entries and defined $dbh) { $dbh->begin_work; load_bibtex_entries_into_database($dbh,$sth,\%entries); $dbh->commit; } p @ARGV if $DEBUG; for my $bibtex_key (@ARGV) { open_bibtex_key(\%options,$dbh,$sth,\%entries,$bibtex_key); } } sub clear_cache { my ($dbh,$sth) = @_; $sth->{clear_papers_cache}->execute(); $sth->{clear_bibtex_cache}->execute(); } sub load_papers_into_database { my ($dbh,$sth,$dir) = @_; my @dirs = ref($dir)?@{$dir}:$dir; if ($options{use_git}) { my @files = grep /\.pdf\"?$/, split /\n/, qx(git ls-tree HEAD -r --full-name --name-only); for my $file (@files) { $file =~ s/^\"(.+)\"$/"qq($1)"/gee; insert_or_replace_papers($dbh,$sth,basename($file),File::Spec->rel2abs($file), -e "${file}.xoj"); } } else { my $actually_load_it = sub { if (/\.git/) { $File::Find::prune = 1; return; } return unless /\.pdf$/; my $xoj = 0; if (-e "${_}.xoj") { $xoj = 1; } insert_or_replace_papers($dbh,$sth,basename($File::Find::name),File::Spec->rel2abs($_),$xoj); }; find($actually_load_it,@dirs); } } sub insert_or_replace_papers { my ($dbh,$sth,$file_name,$file_loc,$has_xoj) = @_; $sth->{insert_papers}->execute($file_name,$file_loc,$has_xoj); $sth->{insert_papers}->finish(); } sub load_bibtex_entries_into_database { my ($dbh,$sth,$entries) = @_; for my $entry (keys %{$entries}) { next unless defined $entries->{$entry}; $sth->{insert_bibtex}->execute($entry,@{$entries->{$entry}}{qw(file_name pmid doi html)}); $sth->{insert_bibtex}->finish(); print STDERR "inserted $entry {".join(',',map {defined $_?"'$_'":"'undef'"} %{$entries->{$entry}})."}\n" if $DEBUG; } } sub open_bibtex_key { my ($options,$dbh,$sth,$entries,$bibtex_key) = @_; if (not defined $dbh) { open_entry($dbh,$sth,$entries->{$bibtex_key},$options); } else { my $entry; if ($options->{search_by_pmid}) { $entry = select_entry_from_pmid($dbh,$sth,$bibtex_key); } elsif ($options->{search_by_file}) { $entry = select_entry_from_file($dbh,$sth,$bibtex_key); } else { $entry = select_entry_from_bibtex_key($dbh,$sth,$bibtex_key); } p $entry if $DEBUG; open_entry($dbh,$sth,$entry,$options); } } sub fork_exec { my (@cmd) = @_; my $child = fork(); if (not defined $child) { die "Unable to fork for some reason: $!"; } if ($child == 0) { foreach (0 .. (POSIX::sysconf (&POSIX::_SC_OPEN_MAX) || 1024)) { POSIX::close $_ } open (STDIN, "/dev/null"); open (STDERR, ">&STDOUT"); exec(@cmd); } else { return $child; } } sub open_pdf { my ($file_name,$options,$has_xoj) = @_; print STDERR "opening $file_name\n" if $DEBUG; my $pdf_viewer = 'xournal'; if (exists $options->{pdfviewer} and defined $options->{pdfviewer}) { $pdf_viewer = $options->{pdfviewer}; } fork_exec($pdf_viewer,$file_name); } sub open_browser{ my ($file) = @_; fork_exec('sensible-browser',$file); } sub open_entry{ my ($dbh,$sth,$entry,$options) = @_; return unless defined $entry and ref $entry and keys %{$entry}; if ($DEBUG) { print STDERR "Entry: \n"; p $entry; } if (defined $entry->{file_name} and length $entry->{file_name}) { my $paper = select_one($dbh,$sth->{select_papers_by_name},$entry->{file_name}); if (not defined $paper) { my ($pmid) = $entry->{file_name} =~ /pmid_(\d+)/; if (defined $pmid and length $pmid) { $paper = select_one($dbh,$sth->{select_papers_by_pmid},'%pmid_'.$pmid.'.%'); } } p $paper if $DEBUG; print STDERR $entry->{file_name} if $DEBUG; if (defined $paper) { if ($options->{only_print}) { print $paper->{path}; return; } open_pdf($paper->{path},$options,$paper->{has_xoj}); return; } else { print STDERR "Unable to find paper\n" if $DEBUG; } } if (defined $entry->{doi}) { if ($options->{only_print}) { print $entry->{doi}; return; } my $url = $entry->{doi}; $url =~ s{^doi://}{http://dx.doi.org/}; open_browser($url,$options); return; } if (defined $entry->{html}) { if ($options->{only_print}) { print $entry->{html}; return; } open_browser($entry->{html},$options); return; } } sub select_entry_from_pmid{ my ($dbh,$sth,$pmid) = @_; return select_one($dbh,$sth->{select_bibtex_by_pmid},$pmid); } sub select_entry_from_file{ my ($dbh,$sth,$filename) = @_; return select_one($dbh,$sth->{select_bibtex_by_file_name_like},'%'.$filename.'%'); } sub select_entry_from_bibtex_key{ my ($dbh,$sth,$bibtex_key) = @_; my $entry = select_one($dbh,$sth->{select_bibtex_by_key},$bibtex_key); if (not defined $entry) { print STDERR "Unable to find entry by exact search\n" if $DEBUG; $bibtex_key =~ s/:.*$//; $entry = select_one($dbh,$sth->{select_bibtex_by_approximate_key},$bibtex_key.'%'); } print STDERR "Found entry\n" if $DEBUG and defined $entry; return $entry; } sub select_one{ my ($dbh,$sth,@bind_vals) = @_; $sth->execute(@bind_vals) or die "Unable to select one: ".$dbh->errstr(); my $results = $sth->fetchall_arrayref({}); $sth->finish(); return ref($results)?$results->[0]:undef; } sub parse_bibtex_file { my ($file,$entries) = @_; my $bibfile = Text::BibTeX::File->new($file) or die "Unable to open $file for reading: $!"; my @entry_comments; my $entry; while ($entry = Text::BibTeX::Entry->new($bibfile)) { print STDERR "In Entry ".$entry->metatype() if $DEBUG; if ($entry->metatype() == BTE_COMMENT) { push @entry_comments,$entry->value(); } elsif ($entry->metatype() == BTE_REGULAR) { my $entry_key = $entry->key(); if (not defined $entry_key) { @entry_comments = (); next; } my %entry_data; # if there is a file comment, use it as the file name for my $comment (@entry_comments) { next unless $comment =~ /^\s*file(?:name)?:?\s*(.+?)\s*$/i; next unless length $1; $entry_data{file_name} = $1.'.pdf'; last; } my %field_prefix = (doi => 'doi://', html => 'http://', file => '', pmid => '', ); my %field_name = (doi => 'doi', html => 'html', pmid => 'pmid', file => 'file_name',); for my $field (qw(file doi html pmid)) { my $field_value = $entry->get($field); if (defined $field_value and $field_value =~ /\S+/) { $entry_data{$field_name{$field}} = $field_prefix{$field}.$field_value if not defined $entry_data{$field_name{$field}}; } } $entries->{$entry_key} = {} if not defined $entries->{$entry_key}; for my $field (keys %entry_data) { $entries->{$entry_key}{$field} = $entry_data{$field} if defined $entry_data{$field}; } # reset the entry comments @entry_comments = (); } else { # do nothing } print STDERR "\n" if $DEBUG; } return $entries; } sub initialize_database { my ($cache) = @_; return open_cache($cache,1); } sub open_cache { my ($cache,$initialize) = @_; my $dbh = DBI->connect("dbi:SQLite:dbname=$cache","","") or die "Unable to open/create database $cache"; if ($initialize) { $dbh->do("DROP TABLE IF EXISTS bibtex;"); $dbh->do("DROP TABLE IF EXISTS papers;"); $dbh->do(<do(<do(<do(<do(<do(<do(< <<'EOF', INSERT OR REPLACE INTO papers(file_name,path,has_xoj) VALUES (?,?,?); EOF insert_bibtex => <<'EOF', INSERT OR REPLACE INTO bibtex (bibtex_key,file_name,pmid,doi,html) VALUES (?,?,?,?,?); EOF select_papers_by_name => <<'EOF', SELECT * FROM papers WHERE file_name = ?; EOF select_papers_by_pmid => <<'EOF', SELECT * FROM papers JOIN bibtex ON papers.file_name = bibtex.file_name WHERE bibtex.pmid = ?; EOF select_papers_by_name_like => <<'EOF', SELECT * FROM papers WHERE file_name LIKE ?; EOF select_papers_by_path => <<'EOF', SELECT * FROM papers WHERE path = ?; EOF select_bibtex_by_key => <<'EOF', SELECT * FROM bibtex WHERE bibtex_key = ?; EOF select_bibtex_by_approximate_key => <<'EOF', SELECT * FROM bibtex WHERE bibtex_key LIKE ?; EOF select_bibtex_by_file_name => <<'EOF', SELECT * FROM bibtex WHERE file_name = ?; EOF select_bibtex_by_file_name_like => <<'EOF', SELECT * FROM bibtex WHERE file_name LIKE ?; EOF select_bibtex_by_pmid => <<'EOF', SELECT * FROM bibtex WHERE pmid = ?; EOF clear_papers_cache => <<'EOF', DELETE FROM papers; EOF clear_bibtex_cache => <<'EOF', DELETE FROM bibtex; EOF ); my $st; for my $key (keys %s) { $st->{$key}=$dbh->prepare($s{$key}) // die "Unable to prepare sql statement: ".$dbh->errstr; } return ($dbh,$st); } __END__