X-Git-Url: https://git.donarmstrong.com/?p=bin.git;a=blobdiff_plain;f=bibtex_to_paper;h=a6350eb98caedfecffcac5d9b248c5ac41760660;hp=7474ccbe6e252dc3e2a55caa7012395e456d1a48;hb=HEAD;hpb=b538bd95ec63124725f440cdbd49475e8e307b15 diff --git a/bibtex_to_paper b/bibtex_to_paper index 7474ccb..a6350eb 100755 --- a/bibtex_to_paper +++ b/bibtex_to_paper @@ -13,7 +13,14 @@ use Getopt::Long; use Pod::Usage; use File::Find; -use Text::Bibtex; +use File::Basename qw(basename); +use File::Spec qw(rel2abs); +use Text::BibTeX; +use User; +use Data::Printer; +use POSIX; + +use DBI; =head1 NAME @@ -27,8 +34,10 @@ bibtex_to_paper [options] bibtexkey --bibtex, -b bibtex file to look up key in --bibtex-cache, -c bibtex cache file --build-cache, -B build cache using bibtex files + --search-by-pmid Search term is a pmid instead of a bibtex key --pdf-dir pdf directory --pdfviewer, -p pdf viewer to use + --only-print Only print PDF file name --debug, -d debugging level (Default 0) --help, -h display this help --man, -m display manual @@ -50,6 +59,10 @@ Bibtex cache file; rebuilt if bibtex file changes PDF viewer to use; defaults to evince unless a .xoj exists, in which case xournal is used. +=item B<--only-print> + +Only print the PDF file name, don't open it. + =item B<--debug, -d> Debug verbosity. (Default 0) @@ -76,13 +89,24 @@ use vars qw($DEBUG); my %options = (debug => 0, help => 0, man => 0, + only_print => 0, + search_by_pmid => 0, + search_by_file => 0, + use_git => 1, + 'bibtex_cache' => File::Spec->catfile(User->Home,'.bibtex_to_paper_cache'), ); GetOptions(\%options, - 'build_cache|build-cache|B!', - 'bibtex|b=s', + 'build_cache|build-cache!', + 'bibtex|b=s@', 'bibtex_cache|bibtex-cache|c=s', 'pdfviewer|p=s', + 'use_git|use-git!', + 'only_print|only-print!', + 'search_by_pmid|search-by-pmid!', + 'search_by_file|search-by-file!', + 'clear_cache|clear-cache!', + 'papers_directory|papers-directory=s@', 'debug|d+','help|h|?','man|m'); pod2usage() if $options{help}; @@ -100,13 +124,398 @@ if (not exists $options{bibtex} and pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS; +main(); + +sub main{ + + my $dbh; + my $sth; + if (exists $options{bibtex_cache}) { + my $initialize = 0; + if (-e $options{bibtex_cache}) { + ($dbh,$sth) = open_cache($options{bibtex_cache}); + } else { + ($dbh,$sth) = initialize_database($options{bibtex_cache}); + } + } + + if (exists $options{clear_cache}) { + clear_cache($dbh,$sth); + } + my %entries; + if (exists $options{build_cache}) { + $options{bibtex} //= []; + $options{bibtex} = + [@ARGV, + @{ref $options{bibtex}?$options{bibtex}:[$options{bibtex}]}, + ]; + @ARGV = (); + } + if (exists $options{bibtex}) { + for my $bibtex_file (@{ref $options{bibtex}?$options{bibtex}:[$options{bibtex}]}) { + parse_bibtex_file($bibtex_file,\%entries); + } + } + + if (exists $options{papers_directory} and + defined $dbh + ) { + $dbh->begin_work; + load_papers_into_database($dbh,$sth,$options{papers_directory}); + $dbh->commit; + } + + p %entries if $DEBUG; + if (keys %entries and + defined $dbh) { + $dbh->begin_work; + load_bibtex_entries_into_database($dbh,$sth,\%entries); + $dbh->commit; + } + + p @ARGV if $DEBUG; + for my $bibtex_key (@ARGV) { + open_bibtex_key(\%options,$dbh,$sth,\%entries,$bibtex_key); + } + +} + +sub clear_cache { + my ($dbh,$sth) = @_; + $sth->{clear_papers_cache}->execute(); + $sth->{clear_bibtex_cache}->execute(); +} + +sub load_papers_into_database { + my ($dbh,$sth,$dir) = @_; + + my @dirs = ref($dir)?@{$dir}:$dir; + + if ($options{use_git}) { + my @files = grep /\.pdf\"?$/, split /\n/, qx(git ls-tree HEAD -r --full-name --name-only); + for my $file (@files) { + $file =~ s/^\"(.+)\"$/"qq($1)"/gee; + insert_or_replace_papers($dbh,$sth,basename($file),File::Spec->rel2abs($file), -e "${file}.xoj"); + } + } else { + my $actually_load_it = sub { + if (/\.git/) { + $File::Find::prune = 1; + return; + } + return unless /\.pdf$/; + my $xoj = 0; + if (-e "${_}.xoj") { + $xoj = 1; + } + insert_or_replace_papers($dbh,$sth,basename($File::Find::name),File::Spec->rel2abs($_),$xoj); + }; + find($actually_load_it,@dirs); + } +} + +sub insert_or_replace_papers { + my ($dbh,$sth,$file_name,$file_loc,$has_xoj) = @_; + $sth->{insert_papers}->execute($file_name,$file_loc,$has_xoj); + $sth->{insert_papers}->finish(); +} + +sub load_bibtex_entries_into_database { + my ($dbh,$sth,$entries) = @_; + for my $entry (keys %{$entries}) { + next unless defined $entries->{$entry}; + $sth->{insert_bibtex}->execute($entry,@{$entries->{$entry}}{qw(file_name pmid doi html)}); + $sth->{insert_bibtex}->finish(); + print STDERR "inserted $entry {".join(',',map {defined $_?"'$_'":"'undef'"} %{$entries->{$entry}})."}\n" if $DEBUG; + } +} + +sub open_bibtex_key { + my ($options,$dbh,$sth,$entries,$bibtex_key) = @_; + if (not defined $dbh) { + open_entry($dbh,$sth,$entries->{$bibtex_key},$options); + } else { + my $entry; + if ($options->{search_by_pmid}) { + $entry = select_entry_from_pmid($dbh,$sth,$bibtex_key); + } elsif ($options->{search_by_file}) { + $entry = select_entry_from_file($dbh,$sth,$bibtex_key); + } else { + $entry = select_entry_from_bibtex_key($dbh,$sth,$bibtex_key); + } + p $entry if $DEBUG; + open_entry($dbh,$sth,$entry,$options); + } +} + +sub fork_exec { + my (@cmd) = @_; + my $child = fork(); + if (not defined $child) { + die "Unable to fork for some reason: $!"; + } + if ($child == 0) { + foreach (0 .. (POSIX::sysconf (&POSIX::_SC_OPEN_MAX) || 1024)) + { POSIX::close $_ } + open (STDIN, "/dev/null"); + open (STDERR, ">&STDOUT"); + exec(@cmd); + } else { + return $child; + } + +} + +sub open_pdf { + my ($file_name,$options,$has_xoj) = @_; + print STDERR "opening $file_name\n" if $DEBUG; + my $pdf_viewer = 'xournal'; + if (exists $options->{pdfviewer} and defined $options->{pdfviewer}) { + $pdf_viewer = $options->{pdfviewer}; + } + fork_exec($pdf_viewer,$file_name); +} + +sub open_browser{ + my ($file) = @_; + fork_exec('sensible-browser',$file); +} + +sub open_entry{ + my ($dbh,$sth,$entry,$options) = @_; + + return unless defined $entry and ref $entry and keys %{$entry}; + if ($DEBUG) { + print STDERR "Entry: \n"; + p $entry; + } + if (defined $entry->{file_name} and length $entry->{file_name}) { + my $paper = select_one($dbh,$sth->{select_papers_by_name},$entry->{file_name}); + if (not defined $paper) { + my ($pmid) = $entry->{file_name} =~ /pmid_(\d+)/; + if (defined $pmid and length $pmid) { + $paper = select_one($dbh,$sth->{select_papers_by_pmid},'%pmid_'.$pmid.'.%'); + } + } + p $paper if $DEBUG; + print STDERR $entry->{file_name} if $DEBUG; + if (defined $paper) { + if ($options->{only_print}) { + print $paper->{path}; + return; + } + open_pdf($paper->{path},$options,$paper->{has_xoj}); + return; + } else { + print STDERR "Unable to find paper\n" if $DEBUG; + } + } + if (defined $entry->{doi}) { + if ($options->{only_print}) { + print $entry->{doi}; + return; + } + my $url = $entry->{doi}; + $url =~ s{^doi://}{http://dx.doi.org/}; + open_browser($url,$options); + return; + } + if (defined $entry->{html}) { + if ($options->{only_print}) { + print $entry->{html}; + return; + } + open_browser($entry->{html},$options); + return; + } +} + +sub select_entry_from_pmid{ + my ($dbh,$sth,$pmid) = @_; + + return select_one($dbh,$sth->{select_bibtex_by_pmid},$pmid); +} + +sub select_entry_from_file{ + my ($dbh,$sth,$filename) = @_; + + return select_one($dbh,$sth->{select_bibtex_by_file_name_like},'%'.$filename.'%'); +} + + +sub select_entry_from_bibtex_key{ + my ($dbh,$sth,$bibtex_key) = @_; + + my $entry = select_one($dbh,$sth->{select_bibtex_by_key},$bibtex_key); + if (not defined $entry) { + print STDERR "Unable to find entry by exact search\n" if $DEBUG; + $bibtex_key =~ s/:.*$//; + $entry = select_one($dbh,$sth->{select_bibtex_by_approximate_key},$bibtex_key.'%'); + } + print STDERR "Found entry\n" if $DEBUG and defined $entry; + return $entry; +} + +sub select_one{ + my ($dbh,$sth,@bind_vals) = @_; + $sth->execute(@bind_vals) or + die "Unable to select one: ".$dbh->errstr(); + my $results = $sth->fetchall_arrayref({}); + $sth->finish(); + return ref($results)?$results->[0]:undef; +} + sub parse_bibtex_file { - + my ($file,$entries) = @_; + + my $bibfile = Text::BibTeX::File->new($file) + or die "Unable to open $file for reading: $!"; + my @entry_comments; + my $entry; + while ($entry = Text::BibTeX::Entry->new($bibfile)) { + print STDERR "In Entry ".$entry->metatype() if $DEBUG; + if ($entry->metatype() == BTE_COMMENT) { + push @entry_comments,$entry->value(); + } elsif ($entry->metatype() == BTE_REGULAR) { + my $entry_key = $entry->key(); + if (not defined $entry_key) { + @entry_comments = (); + next; + } + my %entry_data; + # if there is a file comment, use it as the file name + for my $comment (@entry_comments) { + next unless $comment =~ /^\s*file(?:name)?:?\s*(.+?)\s*$/i; + next unless length $1; + $entry_data{file_name} = $1.'.pdf'; + last; + } + my %field_prefix = (doi => 'doi://', + html => 'http://', + file => '', + pmid => '', + ); + my %field_name = (doi => 'doi', + html => 'html', + pmid => 'pmid', + file => 'file_name',); + for my $field (qw(file doi html pmid)) { + my $field_value = $entry->get($field); + if (defined $field_value and $field_value =~ /\S+/) { + $entry_data{$field_name{$field}} = + $field_prefix{$field}.$field_value if + not defined $entry_data{$field_name{$field}}; + } + } + $entries->{$entry_key} = {} if not defined $entries->{$entry_key}; + for my $field (keys %entry_data) { + $entries->{$entry_key}{$field} = $entry_data{$field} if + defined $entry_data{$field}; + } + # reset the entry comments + @entry_comments = (); + } else { + # do nothing + } + print STDERR "\n" if $DEBUG; + } + return $entries; } sub initialize_database { - my + my ($cache) = @_; + return open_cache($cache,1); } +sub open_cache { + my ($cache,$initialize) = @_; + my $dbh = DBI->connect("dbi:SQLite:dbname=$cache","","") or + die "Unable to open/create database $cache"; + if ($initialize) { + $dbh->do("DROP TABLE IF EXISTS bibtex;"); + $dbh->do("DROP TABLE IF EXISTS papers;"); + $dbh->do(<do(<do(<do(<do(<do(<do(< <<'EOF', +INSERT OR REPLACE INTO papers(file_name,path,has_xoj) VALUES (?,?,?); +EOF + insert_bibtex => <<'EOF', +INSERT OR REPLACE INTO bibtex (bibtex_key,file_name,pmid,doi,html) VALUES (?,?,?,?,?); +EOF + select_papers_by_name => <<'EOF', +SELECT * FROM papers WHERE file_name = ?; +EOF + select_papers_by_pmid => <<'EOF', +SELECT * FROM papers JOIN bibtex ON papers.file_name = bibtex.file_name WHERE bibtex.pmid = ?; +EOF + select_papers_by_name_like => <<'EOF', +SELECT * FROM papers WHERE file_name LIKE ?; +EOF + select_papers_by_path => <<'EOF', +SELECT * FROM papers WHERE path = ?; +EOF + select_bibtex_by_key => <<'EOF', +SELECT * FROM bibtex WHERE bibtex_key = ?; +EOF + select_bibtex_by_approximate_key => <<'EOF', +SELECT * FROM bibtex WHERE bibtex_key LIKE ?; +EOF + select_bibtex_by_file_name => <<'EOF', +SELECT * FROM bibtex WHERE file_name = ?; +EOF + select_bibtex_by_file_name_like => <<'EOF', +SELECT * FROM bibtex WHERE file_name LIKE ?; +EOF + select_bibtex_by_pmid => <<'EOF', +SELECT * FROM bibtex WHERE pmid = ?; +EOF + clear_papers_cache => <<'EOF', +DELETE FROM papers; +EOF + clear_bibtex_cache => <<'EOF', +DELETE FROM bibtex; +EOF + ); + my $st; + for my $key (keys %s) { + $st->{$key}=$dbh->prepare($s{$key}) // + die "Unable to prepare sql statement: ".$dbh->errstr; + } + return ($dbh,$st); +} + + __END__