;;; doi-utils.el --- DOI utilities for making bibtex entries ;; Copyright (C) 2015 John Kitchin ;; Author: John Kitchin ;; Keywords: convenience ;; Version: 0.1 ;; Package-Requires: ((org-ref)) ;; This program is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation, either version 3 of the License, or ;; (at your option) any later version. ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License ;; along with this program. If not, see . ;;; Commentary: ;; This package provides functionality to download PDFs and bibtex entries from a DOI, as well as to update a bibtex entry from a DOI. It depends slightly on org-ref, to determine where to save pdf files too, and where to insert bibtex entries in the default bibliography. ;; The principle commands you will use from here are: ;; - doi-utils-get-bibtex-entry-pdf with the cursor in a bibtex entry. ;; - doi-utils-insert-bibtex-entry-from-doi to insert a bibtex entry at your cursor, clean it and try to get a pdf. ;; - doi-utils-add-bibtex-entry-from-doi to add an entry to your default bibliography (cleaned with pdf if possible). ;; - doi-utils-add-bibtex-entry-from-region to add an entry from a highlighed doi to your default bibliography. ;; - doi-utils-update-bibtex-entry-from-doi with cursor in an entry to update its fields. (require 'json) ;;; Code: ;; * Getting pdf files from a DOI ;; The idea here is simple. When you visit http://dx.doi.org/doi, you get redirected to the journal site. Once you have the url for the article, you can usually compute the url to the pdf, or find it in the page. Then you simply download it. ;; There are some subtleties in doing this that are described here. To get the redirect, we have to use url-retrieve, and a callback function. The callback does not return anything, so we communicate through global variables. url-retrieve is asynchronous, so we have to make sure to wait for it to finish. (defvar *doi-utils-waiting* t "stores waiting state for url retrieval.") (defvar *doi-utils-redirect* nil "stores redirect url from a callback function") (defun doi-utils-redirect-callback (&optional status) "callback for url-retrieve to set the redirect" (when (plist-get status :error) (signal (car (plist-get status :error)) (cdr(plist-get status :error)))) (when (plist-get status :redirect) ; is nil if there none (message "redirects = %s" (plist-get status :redirect)) (message "*doi-utils-redirect* set to %s" (setq *doi-utils-redirect* (plist-get status :redirect)))) ;; we have done our job, so we are not waiting any more. (setq *doi-utils-waiting* nil)) ;; To actually get the redirect we use url-retrieve like this. (defun doi-utils-get-redirect (doi) "get redirect url from dx.doi.org/doi" ;; we are going to wait until the url-retrieve is done (setq *doi-utils-waiting* t) ;; start with no redirect. it will be set in the callback. (setq *doi-utils-redirect* nil) (url-retrieve (format "http://dx.doi.org/%s" doi) 'doi-utils-redirect-callback) ; I suspect we need to wait here for the asynchronous process to ; finish. we loop and sleep until the callback says it is done via ; `*doi-utils-waiting*'. this works as far as i can tell. Before I ; had to run this a few times to get it to work, which i suspect ; just gave the first one enough time to finish. (while *doi-utils-waiting* (sleep-for 0.1))) ;; Once we have a redirect for a particular doi, we need to compute the url to the pdf. We do this with a series of functions. Each function takes a single argument, the redirect url. If it knows how to compute the pdf url it does, and returns it. We store the functions in a variable: (defvar doi-utils-pdf-url-functions nil "list of functions that return a url to a pdf from a redirect url. Each function takes one argument, the redirect url. The function must return a pdf-url, or nil.") ;; ** APS journals (defun aps-pdf-url (*doi-utils-redirect*) (when (string-match "^http://journals.aps.org" *doi-utils-redirect*) (replace-regexp-in-string "/abstract/" "/pdf/" *doi-utils-redirect*))) ;; ** Science (defun science-pdf-url (*doi-utils-redirect*) (when (string-match "^http://www.sciencemag.org" *doi-utils-redirect*) (concat *doi-utils-redirect* ".full.pdf"))) ;; ** Nature (defun nature-pdf-url (*doi-utils-redirect*) (when (string-match "^http://www.nature.com" *doi-utils-redirect*) (let ((result *doi-utils-redirect*)) (setq result (replace-regexp-in-string "/full/" "/pdf/" result)) (replace-regexp-in-string "\.html$" "\.pdf" result)))) ;; ** Wiley ;; http://onlinelibrary.wiley.com/doi/10.1002/anie.201402680/abstract ;; http://onlinelibrary.wiley.com/doi/10.1002/anie.201402680/pdf ;; It appears that it is not enough to use the pdf url above. That takes you to an html page. The actual link to teh pdf is embedded in that page. This is how ScienceDirect does things too. ;; This is where the link is hidden: ;; (defun doi-utils-get-wiley-pdf-url (redirect-url) "Wileyscience direct hides the pdf url in html. We get it out here by parsing the html." (setq *doi-utils-waiting* t) (url-retrieve redirect-url (lambda (status) (goto-char (point-min)) (re-search-forward "