From bf24bf9e31a3688bc5c8dc35a8a627eea9570664 Mon Sep 17 00:00:00 2001 From: Don Armstrong Date: Mon, 27 Jan 2014 11:12:52 -0800 Subject: [PATCH] switch to sqlite for bibtex_to_paper; default cache location; actually kind of works now --- bibtex_to_paper | 309 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 267 insertions(+), 42 deletions(-) diff --git a/bibtex_to_paper b/bibtex_to_paper index f6ac661..9d9cf7c 100755 --- a/bibtex_to_paper +++ b/bibtex_to_paper @@ -13,11 +13,14 @@ use Getopt::Long; use Pod::Usage; use File::Find; -use Text::BibTex; +use File::Basename qw(basename); +use File::Spec qw(rel2abs); +use Text::BibTeX; +use User; +use Data::Printer; -use DB_File; -use MLDBM qw(DB_FILE Storable); -use Fcntl qw/O_RDWR O_CREAT O_TRUNC/; +use DBI; +use Tie::DBI; =head1 NAME @@ -80,13 +83,15 @@ use vars qw($DEBUG); my %options = (debug => 0, help => 0, man => 0, + 'bibtex_cache' => File::Spec->catfile(User->Home,'.bibtex_to_paper_cache'), ); GetOptions(\%options, 'build_cache|build-cache|B!', - 'bibtex|b=s', + 'bibtex|b=s@', 'bibtex_cache|bibtex-cache|c=s', 'pdfviewer|p=s', + 'papers_directory|papers-directory=s@', 'debug|d+','help|h|?','man|m'); pod2usage() if $options{help}; @@ -104,50 +109,221 @@ if (not exists $options{bibtex} and pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS; +main(); + +sub main{ + + my $dbh; + my $sth; + if (exists $options{bibtex_cache}) { + my $initialize = 0; + if (-e $options{bibtex_cache}) { + ($dbh,$sth) = open_cache($options{bibtex_cache}); + } else { + ($dbh,$sth) = initialize_database($options{bibtex_cache}); + } + } + + my %entries; + if (exists $options{build_cache}) { + $options{bibtex} //= []; + $options{bibtex} = + [@ARGV, + @{ref $options{bibtex}?$options{bibtex}:[$options{bibtex}]}, + ]; + } + if (exists $options{bibtex}) { + for my $bibtex_file (@{ref $options{bibtex}?$options{bibtex}:[$options{bibtex}]}) { + parse_bibtex_file($bibtex_file,\%entries); + } + } + + if (exists $options{papers_directory} and + defined $dbh + ) { + load_papers_into_database($dbh,$sth,$options{papers_directory}); + } + + p %entries if $DEBUG; + if (keys %entries and + defined $dbh) { + load_bibtex_entries_into_database($dbh,$sth,\%entries); + } + + for my $bibtex_key (@ARGV) { + open_bibtex_key(\%options,$dbh,$sth,\%entries,$bibtex_key); + } + +} + +sub load_papers_into_database { + my ($dbh,$sth,$dir) = @_; + + my @dirs = ref($dir)?@{$dir}:$dir; + + my $actually_load_it = sub { + return unless /\.pdf$/; + my $xoj = 0; + if (-e "${_}.xoj") { + $xoj = 1; + } + insert_or_replace_papers($dbh,$sth,basename($File::Find::name),File::Spec->rel2abs($File::Find::name),$xoj); + }; + + my @pdfs; + find($actually_load_it,@dirs); +} + +sub insert_or_replace_papers { + my ($dbh,$sth,$file_name,$file_loc,$has_xoj) = @_; + $sth->{insert_papers}->execute($file_name,$file_loc,$has_xoj); + $sth->{insert_papers}->finish(); +} + +sub load_bibtex_entries_into_database { + my ($dbh,$sth,$entries) = @_; + for my $entry (keys %{$entries}) { + next unless defined $entries->{$entry}; + $sth->{insert_bibtex}->execute($entry,@{$entries->{$entry}}{qw(file_name doi html)}); + $sth->{insert_bibtex}->finish(); + print STDERR "inserted $entry $entries->{$entry}\n" if $DEBUG; + } +} + +sub open_bibtex_key { + my ($options,$dbh,$sth,$entries,$bibtex_key) = @_; + if (not defined $dbh) { + open_entry($dbh,$sth,$entries->{$bibtex_key},$options); + } else { + my $entry = select_entry_from_bibtex_key($dbh,$sth,$bibtex_key); + p $entry if $DEBUG; + open_entry($dbh,$sth,$entry,$options); + } +} + +sub fork_exec { + my (@cmd) = @_; + my $child = fork(); + if (not defined $child) { + die "Unable to fork for some reason: $!"; + } + if ($child == 0) { + exec(@cmd); + } else { + return $child; + } + +} + +sub open_pdf { + my ($file_name,$options,$has_xoj) = @_; + print STDERR "opening $file_name\n" if $DEBUG; + if ($has_xoj) { + fork_exec('xournal',$file_name); + } else { + fork_exec('evince',$file_name) + } +} + +sub open_browser{ + my ($file) = @_; + fork_exec('sensible-browser',$file); +} + +sub open_entry{ + my ($dbh,$sth,$entry,$options) = @_; + + return unless defined $entry and ref $entry and keys %{$entry}; + if (defined $entry->{file_name} and length $entry->{file_name}) { + my $paper = select_one($dbh,$sth->{select_papers_by_name},$entry->{file_name}); + p $paper if $DEBUG; + print STDERR $entry->{file_name} if $DEBUG; + if (defined $paper) { + open_pdf($paper->{file_name},$options,$paper->{xoj}); + return; + } + } + if (defined $entry->{doi}) { + my $url = $entry->{doi}; + $url =~ s{^doi://}{http://dx.doi.org/}; + open_browser($url,$options); + return; + } + if (defined $entry->{html}) { + open_browser($entry->{html},$options); + return; + } +} + +sub select_entry_from_bibtex_key{ + my ($dbh,$sth,$bibtex_key) = @_; + + my $entry = select_one($dbh,$sth->{select_bibtex_by_key},$bibtex_key); + return $entry; +} + +sub select_one{ + my ($dbh,$sth,@bind_vals) = @_; + $sth->execute(@bind_vals) or + die "Unable to select one: ".$dbh->errstr(); + my $results = $sth->fetchall_arrayref({}); + $sth->finish(); + return ref($results)?$results->[0]:undef; +} + sub parse_bibtex_file { my ($file,$entries) = @_; - my $bibfile = Text::BibTex::File->new($file) - or die "Unable to open $bibfile for reading: $!"; + my $bibfile = Text::BibTeX::File->new($file) + or die "Unable to open $file for reading: $!"; my @entry_comments; my $entry; - while ($entry = Text::BibTex::Entry->new($bibfile)) { - if ($entry->metatype() eq 'BTE_COMMENT') { + while ($entry = Text::BibTeX::Entry->new($bibfile)) { + print STDERR "In Entry ".$entry->metatype() if $DEBUG; + if ($entry->metatype() == BTE_COMMENT) { push @entry_comments,$entry->value(); - } elsif ($entry->metatype() eq 'BTE_REGULAR') { + } elsif ($entry->metatype() == BTE_REGULAR) { my $entry_key = $entry->key(); - my $link_name; - if (defined $entry_key) { - # if there is a file comment, use it as the file name - for my $comment (@entry_comments) { - next unless $comment =~ /^\s*file(?:name)?:?\s*(.+?)\s*$/i; - $link_name = $1; - last; - } - # if there is a file key, use that as the file name - # if there is a doi, then use that - # if there is a html, then use that - if (not defined $link_name) { - my @possible_fields = $entry->get(qw(file doi html)); - for my $possible_field (@possible_fields) { - if (defined $possible_field and length $possible_field) { - $link_name = $possible_field; - last; - } - } + if (not defined $entry_key) { + @entry_comments = (); + next; + } + my %entry_data; + # if there is a file comment, use it as the file name + for my $comment (@entry_comments) { + next unless $comment =~ /^\s*file(?:name)?:?\s*(.+?)\s*$/i; + next unless length $1; + $entry_data{file_name} = $1.'.pdf'; + last; + } + my %field_prefix = (doi => 'doi://', + html => 'http://', + file => '', + ); + my %field_name = (doi => 'doi', + html => 'html', + file => 'file_name',); + for my $field (qw(file doi html)) { + my $field_value = $entry->get($field); + if (defined $field_value and $field_value =~ /\S+/) { + $entry_data{$field_name{$field}} = + $field_prefix{$field}.$field_value if + not defined $entry_data{$field_name{$field}}; } } - if (not exists $entries->{$entry_key} or - defined $link_name; - ) { - $entries->{$entry_key} = $link_name + $entries->{$entry_key} = {} if not defined $entries->{$entry_key}; + for my $field (keys %entry_data) { + $entries->{$entry_key}{$field} = $entry_data{$field} if + defined $entry_data{$field}; } # reset the entry comments @entry_comments = (); } else { # do nothing } + print STDERR "\n" if $DEBUG; } + return $entries; } @@ -157,17 +333,66 @@ sub initialize_database { } sub open_cache { - my ($cache,$initialize) @_; - my $open_flags = O_RDWR|O_CREAT; + my ($cache,$initialize) = @_; + my $dbh = DBI->connect("dbi:SQLite:dbname=$cache","","") or + die "Unable to open/create database $cache"; if ($initialize) { - $open_flags = $open_flags | O_TRUNC; + $dbh->do("DROP TABLE IF EXISTS bibtex;"); + $dbh->do("DROP TABLE IF EXISTS papers;"); + $dbh->do(<do(<do(<do(<do(<do(< $cache - $open_flags, 0666 - or die "Unable to create create/truncate $cache: $!"; - return \%entries; + my %s = + (insert_papers => <<'EOF', +INSERT OR REPLACE INTO papers(file_name,path,has_xoj) VALUES (?,?,?); +EOF + insert_bibtex => <<'EOF', +INSERT OR REPLACE INTO bibtex (bibtex_key,file_name,doi,html) VALUES (?,?,?,?); +EOF + select_papers_by_name => <<'EOF', +SELECT * FROM papers WHERE file_name = ?; +EOF + select_papers_by_path => <<'EOF', +SELECT * FROM papers WHERE path = ?; +EOF + select_bibtex_by_key => <<'EOF', +SELECT * FROM bibtex WHERE bibtex_key = ?; +EOF + select_bibtex_by_file_name => <<'EOF', +SELECT * FROM bibtex WHERE file_name = ?; +EOF + ); + my $st; + for my $key (keys %s) { + $st->{$key}=$dbh->prepare($s{$key}) // + die "Unable to prepare sql statement: ".$dbh->errstr; + } + return ($dbh,$st); } -- 2.39.5