2 ;;; doi-utils.el --- get bibtex entries and pdfs from a DOI
4 ;; Copyright(C) 2014 John Kitchin
6 ;; Author: John Kitchin <jkitchin@andrew.cmu.edu>
7 ;; This file is not currently part of GNU Emacs.
9 ;; This program is free software; you can redistribute it and/or
10 ;; modify it under the terms of the GNU General Public License as
11 ;; published by the Free Software Foundation; either version 2, or (at
12 ;; your option) any later version.
14 ;; This program is distributed in the hope that it will be useful, but
15 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 ;; General Public License for more details.
19 ;; You should have received a copy of the GNU General Public License
20 ;; along with this program ; see the file COPYING. If not, write to
21 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
22 ;; Boston, MA 02111-1307, USA.
26 ;; Lisp code to generate and update bibtex entries from a DOI, and to
27 ;; download pdfs from publisher websites from a DOI.
29 ;; Package-Requires: ((org-ref))
33 (defvar *doi-utils-waiting* t
34 "stores waiting state for url retrieval.")
36 (defvar *doi-utils-redirect* nil
37 "stores redirect url from a callback function")
39 (defun doi-utils-redirect-callback (&optional status)
40 "callback for url-retrieve to set the redirect"
41 (when (plist-get status :error)
42 (signal (car (plist-get status :error)) (cdr(plist-get status :error))))
43 (when (plist-get status :redirect) ; is nil if there none
44 (message "redirects = %s" (plist-get status :redirect))
45 (message "*doi-utils-redirect* set to %s"
46 (setq *doi-utils-redirect* (plist-get status :redirect))))
47 ;; we have done our job, so we are not waiting any more.
48 (setq *doi-utils-waiting* nil))
50 (defun doi-utils-get-redirect (doi)
51 "get redirect url from dx.doi.org/doi"
52 ;; we are going to wait until the url-retrieve is done
53 (setq *doi-utils-waiting* t)
54 ;; start with no redirect. it will be set in the callback.
55 (setq *doi-utils-redirect* nil)
57 (format "http://dx.doi.org/%s" doi)
58 'doi-utils-redirect-callback)
59 ; I suspect we need to wait here for the asynchronous process to
60 ; finish. we loop and sleep until the callback says it is done via
61 ; `*doi-utils-waiting*'. this works as far as i can tell. Before I
62 ; had to run this a few times to get it to work, which i suspect
63 ; just gave the first one enough time to finish.
64 (while *doi-utils-waiting* (sleep-for 0.1)))
66 (defvar doi-utils-pdf-url-functions nil
67 "list of functions that return a url to a pdf from a redirect url. Each function takes one argument, the redirect url. The function must return a pdf-url, or nil.")
69 (defun aps-pdf-url (*doi-utils-redirect*)
70 (when (string-match "^http://journals.aps.org" *doi-utils-redirect*)
71 (replace-regexp-in-string "/abstract/" "/pdf/" *doi-utils-redirect*)))
73 (defun science-pdf-url (*doi-utils-redirect*)
74 (when (string-match "^http://www.sciencemag.org" *doi-utils-redirect*)
75 (concat *doi-utils-redirect* ".full.pdf")))
77 (defun nature-pdf-url (*doi-utils-redirect*)
78 (when (string-match "^http://www.nature.com" *doi-utils-redirect*)
79 (let ((result *doi-utils-redirect*))
80 (setq result (replace-regexp-in-string "/full/" "/pdf/" result))
81 (replace-regexp-in-string "\.html$" "\.pdf" result))))
83 (defun doi-utils-get-wiley-pdf-url (redirect-url)
84 "wileyscience direct hides the pdf url in html. we get it out here"
85 (setq *doi-utils-waiting* t)
86 (url-retrieve redirect-url
89 (re-search-forward "<iframe id=\"pdfDocument\" src=\"\\([^\"]*\\)\"" nil)
90 (setq *doi-utils-pdf-url* (match-string 1)
91 *doi-utils-waiting* nil)))
92 (while *doi-utils-waiting* (sleep-for 0.1))
95 (defun wiley-pdf-url (*doi-utils-redirect*)
96 (when (string-match "^http://onlinelibrary.wiley.com" *doi-utils-redirect*)
97 (doi-utils-get-wiley-pdf-url (replace-regexp-in-string "/abstract" "/pdf" *doi-utils-redirect*))
100 (defun springer-pdf-url (*doi-utils-redirect*)
101 (when (string-match "^http://link.springer.com" *doi-utils-redirect*)
102 (replace-regexp-in-string "/article/" "/content/pdf/" (concat *doi-utils-redirect* ".pdf"))))
104 (defun acs-pdf-url (*doi-utils-redirect*)
105 (when (string-match "^http://pubs.acs.org" *doi-utils-redirect*)
106 (replace-regexp-in-string "/abs/" "/pdf/" *doi-utils-redirect*)))
108 (defun iop-pdf-url (*doi-utils-redirect*)
109 (when (string-match "^http://iopscience.iop.org" *doi-utils-redirect*)
110 (let ((tail (replace-regexp-in-string "^http://iopscience.iop.org" "" *doi-utils-redirect*)))
111 (concat "http://iopscience.iop.org" tail "/pdf" (replace-regexp-in-string "/" "_" tail) ".pdf"))))
113 (defun jstor-pdf-url (*doi-utils-redirect*)
114 (when (string-match "^http://www.jstor.org" *doi-utils-redirect*)
115 (concat (replace-regexp-in-string "/stable/" "/stable/pdfplus/" *doi-utils-redirect*) ".pdf")))
117 (defun aip-pdf-url (*doi-utils-redirect*)
118 (when (string-match "^http://scitation.aip.org" *doi-utils-redirect*)
119 ;; get stuff after content
121 (setq p2 (replace-regexp-in-string "^http://scitation.aip.org/" "" *doi-utils-redirect*))
122 (setq s (split-string p2 "/"))
123 (setq p1 (mapconcat 'identity (-remove-at-indices '(0 6) s) "/"))
124 (setq p3 (concat "/" (nth 0 s) (nth 1 s) "/" (nth 2 s) "/" (nth 3 s)))
125 (format "http://scitation.aip.org/deliver/fulltext/%s.pdf?itemId=/%s&mimeType=pdf&containerItemId=%s"
128 (defun tandfonline-pdf-url (*doi-utils-redirect*)
129 (when (string-match "^http://www.tandfonline.com" *doi-utils-redirect*)
130 (replace-regexp-in-string "/abs/\\|/full/" "/pdf/" *doi-utils-redirect*)))
132 (defun ecs-pdf-url (*doi-utils-redirect*)
133 (when (string-match "^http://jes.ecsdl.org" *doi-utils-redirect*)
134 (replace-regexp-in-string "\.abstract$" ".full.pdf" *doi-utils-redirect*)))
136 (defun ecst-pdf-url (*doi-utils-redirect*)
137 (when (string-match "^http://ecst.ecsdl.org" *doi-utils-redirect*)
138 (concat *doi-utils-redirect* ".full.pdf")))
140 (defun rsc-pdf-url (*doi-utils-redirect*)
141 (when (string-match "^http://pubs.rsc.org" *doi-utils-redirect*)
142 (let ((url (downcase *doi-utils-redirect*)))
143 (setq url (replace-regexp-in-string "articlelanding" "articlepdf" url))
146 (defvar *doi-utils-pdf-url* nil
147 "stores url to pdf download from a callback function")
149 (defun doi-utils-get-science-direct-pdf-url (redirect-url)
150 "science direct hides the pdf url in html. we get it out here"
151 (setq *doi-utils-waiting* t)
152 (url-retrieve redirect-url
154 (beginning-of-buffer)
155 (re-search-forward "pdfurl=\"\\([^\"]*\\)\"" nil t)
156 (setq *doi-utils-pdf-url* (match-string 1)
157 *doi-utils-waiting* nil)))
158 (while *doi-utils-waiting* (sleep-for 0.1))
162 (defun science-direct-pdf-url (*doi-utils-redirect*)
163 (when (string-match "^http://www.sciencedirect.com" *doi-utils-redirect*)
164 (doi-utils-get-science-direct-pdf-url *doi-utils-redirect*)
165 *doi-utils-pdf-url*))
168 ;; http://linkinghub.elsevier.com/retrieve/pii/S0927025609004558
169 ;; which actually redirect to
170 ;; http://www.sciencedirect.com/science/article/pii/S0927025609004558
171 (defun linkinghub-elsevier-pdf-url (*doi-utils-redirect*)
172 (when (string-match "^http://linkinghub.elsevier.com/retrieve" *doi-utils-redirect*)
173 (let ((second-redirect (replace-regexp-in-string
174 "http://linkinghub.elsevier.com/retrieve"
175 "http://www.sciencedirect.com/science/article"
176 *doi-utils-redirect*)))
177 (message "getting pdf url from %s" second-redirect)
178 ;(doi-utils-get-science-direct-pdf-url second-redirect)
179 *doi-utils-pdf-url*)))
181 (defun pnas-pdf-url (*doi-utils-redirect*)
182 (when (string-match "^http://www.pnas.org" *doi-utils-redirect*)
183 (concat *doi-utils-redirect* ".full.pdf?with-ds=yes")))
185 (setq doi-utils-pdf-url-functions
196 'science-direct-pdf-url
197 'linkinghub-elsevier-pdf-url
204 (defun doi-utils-get-pdf-url (doi)
205 "returns a url to a pdf for the doi if one can be
206 calculated. Loops through the functions in `doi-utils-pdf-url-functions'
208 (doi-utils-get-redirect doi)
210 (unless *doi-utils-redirect*
211 (error "No redirect found for %s" doi))
212 (message "applying functions")
214 (dolist (func doi-utils-pdf-url-functions)
215 (message "calling %s" func)
216 (let ((this-pdf-url (funcall func *doi-utils-redirect*)))
217 (message "t: %s" this-pdf-url)
219 (message "found pdf url: %s" this-pdf-url)
220 (throw 'pdf-url this-pdf-url))))))
222 (defun doi-utils-get-bibtex-entry-pdf ()
223 "download pdf for entry at point if the pdf does not already
224 exist locally. The entry must have a doi. The pdf will be saved
225 to `org-ref-pdf-directory', by the name %s.pdf where %s is the
226 bibtex label. Files will not be overwritten. The pdf will be
227 checked to make sure it is a pdf, and not some html failure
228 page. you must have permission to access the pdf. We open the pdf
232 (bibtex-beginning-of-entry)
233 (let (;; get doi, removing http://dx.doi.org/ if it is there.
234 (doi (replace-regexp-in-string
235 "http://dx.doi.org/" ""
236 (bibtex-autokey-get-field "doi")))
241 ;; get the key and build pdf filename.
242 (re-search-forward bibtex-entry-maybe-empty-head)
243 (setq key (match-string bibtex-key-in-head))
244 (setq pdf-file (concat org-ref-pdf-directory key ".pdf"))
246 ;; now get file if needed.
247 (when (and doi (not (file-exists-p pdf-file)))
248 (setq pdf-url (doi-utils-get-pdf-url doi))
251 (url-copy-file pdf-url pdf-file)
252 ;; now check if we got a pdf
254 (insert-file-contents pdf-file)
255 ;; PDFS start with %PDF-1.x as the first few characters.
256 (if (not (string= (buffer-substring 1 6) "%PDF-"))
258 (message "%s" (buffer-string))
259 (delete-file pdf-file))
260 (message "%s saved" pdf-file)))
262 (when (file-exists-p pdf-file)
263 (org-open-file pdf-file)))
264 (message "No pdf-url found for %s at %s" doi *doi-utils-redirect* ))
267 (defun doi-utils-get-json-metadata (doi)
268 (let ((url-request-method "GET")
269 (url-mime-accept-string "application/citeproc+json")
270 (json-object-type 'plist))
272 (url-retrieve-synchronously
273 (concat "http://dx.doi.org/" doi))
274 (json-read-from-string (buffer-substring url-http-end-of-headers (point-max))))))
276 (defun doi-utils-expand-template (s)
277 "expand a template containing %{} with the eval of its contents"
278 (replace-regexp-in-string "%{\\([^}]+\\)}"
280 (let ((sexp (substring arg 2 -1)))
281 (format "%s" (eval (read sexp))))) s))
283 (defun doi-utils-doi-to-bibtex-string (doi)
284 "return a bibtex entry as a string for the doi. Only articles are currently supported"
297 (setq results (doi-utils-get-json-metadata doi)
298 json-data (format "%s" results)
299 type (plist-get results :type)
300 author (mapconcat (lambda (x) (concat (plist-get x :given) " " (plist-get x :family)))
301 (plist-get results :author) " and ")
302 title (plist-get results :title)
303 journal (plist-get results :container-title)
304 volume (plist-get results :volume)
305 issue (plist-get results :issue)
306 year (elt (elt (plist-get (plist-get results :issued) :date-parts) 0) 0)
307 pages (plist-get results :page)
308 doi (plist-get results :DOI)
309 url (plist-get results :URL))
311 ((string= type "journal-article")
312 (doi-utils-expand-template "@article{,
313 author = {%{author}},
315 journal = {%{journal}},
317 volume = {%{volume}},
323 (t (message-box "%s not supported yet." type)))))
325 (defun doi-utils-insert-bibtex-entry-from-doi (doi)
326 "insert bibtex entry from a doi. Also cleans entry using
327 org-ref, and tries to download the corresponding pdf."
328 (interactive "sDOI: ")
329 (insert (doi-utils-doi-to-bibtex-string doi))
331 (if (bibtex-key-in-head nil)
332 (org-ref-clean-bibtex-entry t)
333 (org-ref-clean-bibtex-entry))
335 (doi-utils-get-bibtex-entry-pdf)
336 (save-selected-window
337 (org-ref-open-bibtex-notes)))
339 (defun doi-utils-add-bibtex-entry-from-doi (doi)
340 "add entry to end of first entry in `org-ref-default-bibliography'."
341 (interactive "sDOI: ")
342 (find-file (car org-ref-default-bibliography))
345 (doi-utils-insert-bibtex-entry-from-doi doi))
347 (defun doi-utils-add-bibtex-entry-from-region (start end)
348 "add entry assuming region is a doi to end of first entry in `org-ref-default-bibliography'."
350 (let ((doi (buffer-substring start end)))
351 (find-file (car org-ref-default-bibliography))
354 (doi-utils-insert-bibtex-entry-from-doi doi)))
356 (defun bibtex-set-field (field value)
357 "set field to value in bibtex file. create field if it does not exist"
358 (interactive "sfield: \nsvalue: ")
359 (bibtex-beginning-of-entry)
361 (if (setq found (bibtex-search-forward-field field t))
364 (goto-char (car (cdr found)))
367 (bibtex-make-field field)
371 (message "new field being made")
372 (bibtex-beginning-of-entry)
373 (forward-line) (beginning-of-line)
374 (bibtex-next-field nil)
376 (bibtex-make-field field)
380 (defun plist-get-keys (plist)
381 "return keys in a plist"
383 for key in results by #'cddr collect key))
385 (defun doi-utils-update-bibtex-entry-from-doi (doi)
386 "update fields in a bibtex entry from the doi. Every field will be updated, so previous changes will be lost."
388 (or (replace-regexp-in-string "http://dx.doi.org/" "" (bibtex-autokey-get-field "doi"))
389 (read-string "DOI: "))))
390 (let* ((results (doi-utils-get-json-metadata doi))
391 (type (plist-get results :type))
393 (lambda (x) (concat (plist-get x :given)
394 " " (plist-get x :family)))
395 (plist-get results :author) " and "))
396 (title (plist-get results :title))
397 (journal (plist-get results :container-title))
402 (plist-get results :issued) :date-parts) 0) 0)))
403 (volume (plist-get results :volume))
404 (number (or (plist-get results :issue) ""))
405 (pages (or (plist-get results :page) ""))
406 (url (or (plist-get results :URL) ""))
407 (doi (plist-get results :DOI)))
409 ;; map the json fields to bibtex fields. The code each field is mapped to is evaluated.
410 (setq mapping '((:author . (bibtex-set-field "author" author))
411 (:title . (bibtex-set-field "title" title))
412 (:container-title . (bibtex-set-field "journal" journal))
413 (:issued . (bibtex-set-field "year" year))
414 (:volume . (bibtex-set-field "volume" volume))
415 (:issue . (bibtex-set-field "number" number))
416 (:page . (bibtex-set-field "pages" pages))
417 (:DOI . (bibtex-set-field "doi" doi))
418 (:URL . (bibtex-set-field "url" url))))
420 ;; now we have code to run for each entry. we map over them and evaluate the code
423 (eval (cdr (assoc key mapping))))
424 (plist-get-keys results)))
426 ; reclean entry, but keep key if it exists.
427 (if (bibtex-key-in-head)
428 (org-ref-clean-bibtex-entry t)
429 (org-ref-clean-bibtex-entry)))