config

Personal configuration.
git clone git://code.dwrz.net/config
Log | Files | Refs

pdf-info.el (65103B)


      1 ;;; pdf-info.el --- Extract info from pdf-files via a helper process. -*- lexical-binding: t -*-
      2 
      3 ;; Copyright (C) 2013, 2014  Andreas Politz
      4 
      5 ;; Author: Andreas Politz <politza@fh-trier.de>
      6 ;; Keywords: files, multimedia
      7 
      8 ;; This program is free software; you can redistribute it and/or modify
      9 ;; it under the terms of the GNU General Public License as published by
     10 ;; the Free Software Foundation, either version 3 of the License, or
     11 ;; (at your option) any later version.
     12 
     13 ;; This program is distributed in the hope that it will be useful,
     14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
     15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16 ;; GNU General Public License for more details.
     17 
     18 ;; You should have received a copy of the GNU General Public License
     19 ;; along with this program.  If not, see <http://www.gnu.org/licenses/>.
     20 
     21 ;;; Commentary:
     22 ;;
     23 ;; This library represents the Lisp side of the epdfinfo server.  This
     24 ;; program works on a command/response basis, but there should be no
     25 ;; need to understand the protocol, since every command has a
     26 ;; corresponding Lisp-function (see below under `High level
     27 ;; interface').
     28 ;;
     29 ;; Most of these functions receive a file-or-buffer argument, which
     30 ;; may be what it says and defaults to the current buffer.  Also, most
     31 ;; functions return some sort of alist, with, in most cases,
     32 ;; straight-forward key-value-pairs.  Though some may be only
     33 ;; understandable in the context of Adobe's PDF spec \(Adobe
     34 ;; PDF32000\) or the poppler documentation (e.g. annotation flags).
     35 ;;
     36 ;; If the poppler library is fairly recent (>= 0.19.4, older versions
     37 ;; have a bug, which may corrupt the document), annotations maybe
     38 ;; modified to a certain degree, deleted and text-annotations created.
     39 ;; The state of these modifications is held in the server.  In order
     40 ;; to realize, annotations retrieved or created are referenced by a
     41 ;; unique symbol.  Saving these changes creates a new file, the
     42 ;; original document is never touched.
     43 
     44 ;;; Todo:
     45 ;;
     46 ;; + Close documents at some time (e.g. when the buffer is killed)
     47 ;;
     48 
     49 ;;; Code:
     50 
     51 (require 'tq)
     52 (require 'cl-lib)
     53 
     54 
     55 
     56 ;; * ================================================================== *
     57 ;; * Customizations
     58 ;; * ================================================================== *
     59 
     60 (defgroup pdf-info nil
     61   "Extract infos from pdf-files via a helper process."
     62   :group 'pdf-tools)
     63 
     64 (defcustom pdf-info-epdfinfo-program
     65   (let ((executable (if (eq system-type 'windows-nt)
     66                         "epdfinfo.exe" "epdfinfo"))
     67         (default-directory
     68           (or (and load-file-name
     69                    (file-name-directory load-file-name))
     70               default-directory)))
     71     (cl-labels ((try-directory (directory)
     72                   (and (file-directory-p directory)
     73                        (file-executable-p (expand-file-name executable directory))
     74                        (expand-file-name executable directory))))
     75       (or (executable-find executable)
     76           ;; This works if epdfinfo is in the same place as emacs and
     77           ;; the editor was started with an absolute path, i.e. it is
     78           ;; meant for Windows/Msys2.
     79           (and (stringp (car-safe command-line-args))
     80                (file-name-directory (car command-line-args))
     81                (try-directory
     82                 (file-name-directory (car command-line-args))))
     83           ;; If we are running directly from the git repo.
     84           (try-directory (expand-file-name "../server"))
     85           ;; Fall back to epdfinfo in the directory of this file.
     86           (expand-file-name executable))))
     87   "Filename of the epdfinfo executable."
     88   :type 'file)
     89 
     90 (defcustom pdf-info-epdfinfo-error-filename nil
     91   "Filename for error output of the epdfinfo executable.
     92 
     93 If nil, discard any error messages.  Useful for debugging."
     94   :type `(choice (const :tag "None" nil)
     95                  ,@(when (file-directory-p "/tmp/")
     96                      '((const "/tmp/epdfinfo.log")))
     97                  (file)))
     98 
     99 (defcustom pdf-info-log nil
    100   "Whether to log the communication with the server.
    101 
    102 If this is non-nil, all communication with the epdfinfo program
    103 will be logged to the buffer \"*pdf-info-log*\"."
    104   :type 'boolean)
    105 
    106 (defcustom pdf-info-log-entry-max 512
    107   "Maximum number of characters in a single log entry.
    108 
    109 This variable has no effect if `pdf-info-log' is nil."
    110   :type 'integer)
    111 
    112 (defcustom pdf-info-restart-process-p 'ask
    113   "What to do when the epdfinfo server died.
    114 
    115 This should be one of
    116 nil -- do nothing,
    117 t   -- automatically restart it or
    118 ask -- ask whether to restart or not.
    119 
    120 If it is `ask', the server quits and you answer no, this variable
    121 is set to nil."
    122   :type '(choice (const :tag "Do nothing" nil)
    123                  (const :tag "Restart silently" t)
    124                  (const :tag "Always ask" ask)))
    125 
    126 (defcustom pdf-info-close-document-hook nil
    127   "A hook ran after a document was closed in the server.
    128 
    129 The hook is run in the documents buffer, if it exists. Otherwise
    130 in a `with-temp-buffer' form."
    131   :type 'hook)
    132 
    133 
    134 
    135 ;; * ================================================================== *
    136 ;; * Variables
    137 ;; * ================================================================== *
    138 
    139 (defvar pdf-info-asynchronous nil
    140   "If non-nil process queries asynchronously.
    141 
    142 More specifically the value should be a function of at 2
    143 arguments \(fn STATUS RESPONSE\), where STATUS is either nil, for
    144 a successful query, or the symbol error.  RESPONSE is either the
    145 command's response or the error message.  This does not work
    146 recursive, i.e. if function wants to make another asynchronous
    147 query it has to rebind this variable.
    148 
    149 Alternatively it may be a list \(FN . ARGS\), in which case FN
    150 will be invoked like \(apply FN STATUS RESPONSE ARGS\).
    151 
    152 Also, all pdf-info functions normally returning a response return
    153 nil.
    154 
    155 This variable should only be let-bound.")
    156 
    157 (defconst pdf-info-pdf-date-regexp
    158   ;; Adobe PDF32000.book, 7.9.4 Dates
    159   (eval-when-compile
    160     (concat
    161      ;; allow for preceding garbage
    162      ;;"\\`"
    163      "[dD]:"
    164      "\\([0-9]\\{4\\}\\)"          ;year
    165      "\\(?:"
    166      "\\([0-9]\\{2\\}\\)"          ;month
    167      "\\(?:"
    168      "\\([0-9]\\{2\\}\\)"          ;day
    169      "\\(?:"
    170      "\\([0-9]\\{2\\}\\)"          ;hour
    171      "\\(?:"
    172      "\\([0-9]\\{2\\}\\)"          ;minutes
    173      "\\(?:"
    174      "\\([0-9]\\{2\\}\\)"          ;seconds
    175      "\\)?\\)?\\)?\\)?\\)?"
    176      "\\(?:"
    177      "\\([+-Zz]\\)"                ;UT delta char
    178      "\\(?:"
    179      "\\([0-9]\\{2\\}\\)"          ;UT delta hours
    180      "\\(?:"
    181      "'"
    182      "\\([0-9]\\{2\\}\\)"          ;UT delta minutes
    183      "\\)?\\)?\\)?"
    184      ;; "\\'"
    185      ;; allow for trailing garbage
    186      )))
    187 
    188 (defvar pdf-info--queue t
    189   "Internally used transmission-queue for the server.
    190 
    191 This variable is initially t, telling the code starting the
    192 server, that it never ran.")
    193 
    194 
    195 ;; * ================================================================== *
    196 ;; * Process handling
    197 ;; * ================================================================== *
    198 
    199 (defconst pdf-info-empty-page-data
    200   (eval-when-compile
    201     (concat
    202      "%PDF-1.0\n1 0 obj<</Type/Catalog/Pages 2 0 R>>endobj 2 0"
    203      " obj<</Type/Pages/Kids[3 0 R]/Count 1>>endobj 3 0 obj<</"
    204      "Type/Page/MediaBox[0 0 3 3]>>endobj\nxref\n0 4\n00000000"
    205      "0065535 f\n0000000010 00000 n\n0000000053 00000 n\n00000"
    206      "00102 00000 n\ntrailer<</Size 4/Root 1 0 R>>\nstartxref\n149\n%EOF"))
    207   "PDF data of an empty page.")
    208 
    209 (defun pdf-info-process ()
    210   "Return the process object or nil."
    211   (and pdf-info--queue
    212        (not (eq t pdf-info--queue))
    213        (tq-process pdf-info--queue)))
    214 
    215 (defun pdf-info-check-epdfinfo (&optional interactive-p)
    216   "Check if the server should be working properly.
    217 
    218 Signal an error if some problem was found.  Message a
    219 confirmation, if INTERACTIVE-P is non-nil and no problems were
    220 found.
    221 
    222 Returns nil."
    223   (interactive "p")
    224   (let ((executable pdf-info-epdfinfo-program))
    225     (unless (stringp executable)
    226       (error "pdf-info-epdfinfo-program is unset or not a string"))
    227     (unless (file-executable-p executable)
    228       (error "pdf-info-epdfinfo-program is not executable"))
    229     (when pdf-info-epdfinfo-error-filename
    230       (unless (and (stringp pdf-info-epdfinfo-error-filename)
    231                    (file-writable-p pdf-info-epdfinfo-error-filename))
    232         (error "pdf-info-epdfinfo-error-filename should contain writable filename")))
    233     (let* ((default-directory (expand-file-name "~/"))
    234            (cmdfile (make-temp-file "commands"))
    235            (pdffile (make-temp-file "empty.pdf"))
    236            (tempdir (make-temp-file "tmpdir" t))
    237            (process-environment (cons (concat "TMPDIR=" tempdir)
    238                                       process-environment)))
    239       (unwind-protect
    240           (with-temp-buffer
    241             (with-temp-file pdffile
    242               (set-buffer-multibyte nil)
    243               (insert pdf-info-empty-page-data))
    244             (with-temp-file cmdfile
    245               (insert (format "renderpage:%s:1:100\nquit\n"
    246                               (pdf-info-query--escape pdffile))))
    247             (unless (= 0 (apply #'call-process
    248                                 executable cmdfile (current-buffer)
    249                                 nil (when pdf-info-epdfinfo-error-filename
    250                                       (list pdf-info-epdfinfo-error-filename))))
    251               (error "Error running `%s': %s"
    252                      pdf-info-epdfinfo-program
    253                      (buffer-string))))
    254         (when (file-exists-p cmdfile)
    255           (delete-file cmdfile))
    256         (when (file-exists-p pdffile)
    257           (delete-file pdffile))
    258         (when (file-exists-p tempdir)
    259           (delete-directory tempdir t)))))
    260   (when interactive-p
    261     (message "The epdfinfo program appears to be working."))
    262   nil)
    263 
    264 (defun pdf-info-process-assert-running (&optional force)
    265   "Assert a running process.
    266 
    267 If it never ran, i.e. `pdf-info-process' is t, start it
    268 unconditionally.
    269 
    270 Otherwise, if FORCE is non-nil start it, if it is not running.
    271 Else restart it with respect to the variable
    272 `pdf-info-restart-process-p', which see.
    273 
    274 If getting the process to run fails, this function throws an
    275 error."
    276   (interactive "P")
    277   (unless (and (processp (pdf-info-process))
    278                (eq (process-status (pdf-info-process))
    279                    'run))
    280     (when (pdf-info-process)
    281       (tq-close pdf-info--queue)
    282       (setq pdf-info--queue nil))
    283     (unless (or force
    284                 (eq pdf-info--queue t)
    285                 (and (eq pdf-info-restart-process-p 'ask)
    286                      (not noninteractive)
    287                      (y-or-n-p "The epdfinfo server quit, restart it ? "))
    288                 (and pdf-info-restart-process-p
    289                      (not (eq pdf-info-restart-process-p 'ask))))
    290 
    291       (when (eq pdf-info-restart-process-p 'ask)
    292         (setq pdf-info-restart-process-p nil))
    293       (error "The epdfinfo server quit"))
    294     (pdf-info-check-epdfinfo)
    295     (let* ((process-connection-type)    ;Avoid 4096 Byte bug #12440.
    296            (default-directory "~")
    297            (proc (apply #'start-process
    298                         "epdfinfo" " *epdfinfo*" pdf-info-epdfinfo-program
    299                         (when pdf-info-epdfinfo-error-filename
    300                           (list pdf-info-epdfinfo-error-filename)))))
    301       (with-current-buffer " *epdfinfo*"
    302         (erase-buffer))
    303       (set-process-query-on-exit-flag proc nil)
    304       (set-process-coding-system proc 'utf-8-unix 'utf-8-unix)
    305       (setq pdf-info--queue (tq-create proc))))
    306   pdf-info--queue)
    307 
    308 (when (< emacs-major-version 27)
    309   (advice-add 'tq-process-buffer :around #'pdf-info--tq-workaround))
    310 
    311 (defun pdf-info--tq-workaround (orig-fun tq &rest args)
    312   "Fix a bug in trunk where the wrong callback gets called.
    313 
    314 ORIG-FUN is the callback that should be called. TQ and ARGS are
    315 the transmission-queue and arguments to the callback."
    316   ;; FIXME: Make me iterative.
    317   (if (not (equal (car (process-command (tq-process tq)))
    318                   pdf-info-epdfinfo-program))
    319       (apply orig-fun tq args)
    320     (let ((buffer (tq-buffer tq))
    321           done)
    322       (when (buffer-live-p buffer)
    323         (set-buffer buffer)
    324         (while (and (not done)
    325                     (> (buffer-size) 0))
    326           (setq done t)
    327           (if (tq-queue-empty tq)
    328               (let ((buf (generate-new-buffer "*spurious*")))
    329                 (copy-to-buffer buf (point-min) (point-max))
    330                 (delete-region (point-min) (point))
    331                 (pop-to-buffer buf nil)
    332                 (error "Spurious communication from process %s, see buffer %s"
    333                        (process-name (tq-process tq))
    334                        (buffer-name buf)))
    335             (goto-char (point-min))
    336             (when (re-search-forward (tq-queue-head-regexp tq) nil t)
    337               (setq done nil)
    338               (let ((answer (buffer-substring (point-min) (point)))
    339                     (fn (tq-queue-head-fn tq))
    340                     (closure (tq-queue-head-closure tq)))
    341                 (delete-region (point-min) (point))
    342                 (tq-queue-pop tq)
    343                 (condition-case-unless-debug err
    344                     (funcall fn closure answer)
    345                   (error
    346                    (message "Error while processing tq callback: %s"
    347                             (error-message-string err))))))))))))
    348 
    349 
    350 ;; * ================================================================== *
    351 ;; * Sending and receiving
    352 ;; * ================================================================== *
    353 
    354 (defun pdf-info-query (cmd &rest args)
    355   "Query the server using CMD and ARGS."
    356   (pdf-info-process-assert-running)
    357   (unless (symbolp cmd)
    358     (setq cmd (intern cmd)))
    359   (let* ((query (concat (mapconcat #'pdf-info-query--escape
    360                                    (cons cmd args) ":")
    361                         "\n"))
    362          (callback
    363           (lambda (closure response)
    364             (cl-destructuring-bind (status &rest result)
    365                 (pdf-info-query--parse-response cmd response)
    366               (pdf-info-query--log response)
    367               (let* (pdf-info-asynchronous)
    368                 (if (functionp closure)
    369                     (funcall closure status result)
    370                   (apply (car closure) status result (cdr closure)))))))
    371          response status done
    372          (closure (or pdf-info-asynchronous
    373                       (lambda (s r)
    374                         (setq status s response r done t)))))
    375     (pdf-info-query--log query t)
    376     (tq-enqueue
    377      pdf-info--queue query "^\\.\n" closure callback)
    378     (unless pdf-info-asynchronous
    379       (while (and (not done)
    380                   (eq (process-status (pdf-info-process))
    381                       'run))
    382         (accept-process-output (pdf-info-process) 0.01))
    383       (when (and (not done)
    384                  (not (eq (process-status (pdf-info-process))
    385                           'run))
    386                  (not (eq cmd 'quit)))
    387         (error "The epdfinfo server quit unexpectedly"))
    388       (cond
    389        ((null status) response)
    390        ((eq status 'error)
    391         (error "epdfinfo: %s" response))
    392        ((eq status 'interrupted)
    393         (error "epdfinfo: Command was interrupted"))
    394        (t
    395         (error "Internal error: invalid response status"))))))
    396 
    397 (defun pdf-info-interrupt ()
    398   "FIXME: This command does currently nothing."
    399   (when (and (processp (pdf-info-process))
    400              (eq (process-status (pdf-info-process))
    401                  'run))
    402     (signal-process (pdf-info-process) 'SIGUSR1)))
    403 
    404 (defun pdf-info-query--escape (arg)
    405   "Escape ARG for transmission to the server."
    406   (if (null arg)
    407       (string)
    408     (with-current-buffer (get-buffer-create " *pdf-info-query--escape*")
    409       (erase-buffer)
    410       (insert (format "%s" arg))
    411       (goto-char 1)
    412       (while (not (eobp))
    413         (cond
    414          ((memq (char-after) '(?\\ ?:))
    415           (insert ?\\))
    416          ((eq (char-after) ?\n)
    417           (delete-char 1)
    418           (insert ?\\ ?n)
    419           (backward-char)))
    420         (forward-char))
    421       (buffer-substring-no-properties 1 (point-max)))))
    422 
    423 (defmacro pdf-info-query--read-record ()
    424   "Read a single record of the response in current buffer."
    425   `(let (records done (beg (point)))
    426      (while (not done)
    427        (cl-case (char-after)
    428          (?\\
    429           (delete-char 1)
    430           (if (not (eq (char-after) ?n))
    431               (forward-char)
    432             (delete-char 1)
    433             (insert ?\n)))
    434          ((?: ?\n)
    435           (push (buffer-substring-no-properties
    436                  beg (point)) records)
    437           (forward-char)
    438           (setq beg (point)
    439                 done (bolp)))
    440          (t (forward-char))))
    441      (nreverse records)))
    442 
    443 (defun pdf-info-query--parse-response (cmd response)
    444   "Parse one epdfinfo RESPONSE to CMD.
    445 
    446 Returns a cons \(STATUS . RESULT\), where STATUS is one of nil
    447 for a regular response, error for an error \(RESULT contains the
    448 error message\) or interrupted, i.e. the command was
    449 interrupted."
    450   (with-current-buffer
    451       (get-buffer-create " *pdf-info-query--parse-response*")
    452     (erase-buffer)
    453     (insert response)
    454     (goto-char 1)
    455     (cond
    456      ((looking-at "ERR\n")
    457       (forward-line)
    458       (cons 'error (buffer-substring-no-properties
    459                     (point)
    460                     (progn
    461                       (re-search-forward "^\\.\n")
    462                       (1- (match-beginning 0))))))
    463      ((looking-at "OK\n")
    464       (let (result)
    465         (forward-line)
    466         (while (not (and (= (char-after) ?.)
    467                          (= (char-after (1+ (point))) ?\n)))
    468           (push (pdf-info-query--read-record) result))
    469         (cons nil (pdf-info-query--transform-response
    470                    cmd (nreverse result)))))
    471      ((looking-at "INT\n")
    472       (cons 'interrupted nil))
    473      (t
    474       (cons 'error "Invalid server response")))))
    475 
    476 (defun pdf-info-query--transform-response (cmd response)
    477   "Transform a RESPONSE to CMD into a Lisp form."
    478   (cl-case cmd
    479     (open nil)
    480     (close (equal "1" (caar response)))
    481     (number-of-pages (string-to-number (caar response)))
    482     (charlayout
    483      (mapcar (lambda (elt)
    484                (cl-assert (= 1 (length (cadr elt))) t)
    485                `(,(aref (cadr elt) 0)
    486                  ,(mapcar #'string-to-number
    487                           (split-string (car elt) " " t))))
    488              response))
    489     (regexp-flags
    490      (mapcar (lambda (elt)
    491                (cons (intern (car elt))
    492                      (string-to-number (cadr elt))))
    493              response))
    494     ((search-string search-regexp)
    495      (mapcar
    496       (lambda (r)
    497         `((page . ,(string-to-number (nth 0 r)))
    498           (text . ,(let (case-fold-search)
    499                      (pdf-util-highlight-regexp-in-string
    500                       (regexp-quote (nth 1 r)) (nth 2 r))))
    501           (edges . ,(mapcar (lambda (m)
    502                               (mapcar #'string-to-number
    503                                       (split-string m " " t)))
    504                             (cddr (cdr r))))))
    505       response))
    506     (outline
    507      (mapcar (lambda (r)
    508                `((depth . ,(string-to-number (pop r)))
    509                  ,@(pdf-info-query--transform-action r)))
    510              response))
    511     (pagelinks
    512      (mapcar (lambda (r)
    513                `((edges .
    514                         ,(mapcar #'string-to-number ;area
    515                                  (split-string (pop r) " " t)))
    516                  ,@(pdf-info-query--transform-action r)))
    517              response))
    518     (metadata
    519      (let ((md (car response)))
    520        (if (= 1 (length md))
    521            (list (cons 'title (car md)))
    522          (list
    523           (cons 'title (pop md))
    524           (cons 'author (pop md))
    525           (cons 'subject (pop md))
    526           (cons 'keywords-raw (car md))
    527           (cons 'keywords (split-string (pop md) "[\t\n ]*,[\t\n ]*" t))
    528           (cons 'creator (pop md))
    529           (cons 'producer (pop md))
    530           (cons 'format (pop md))
    531           (cons 'created (pop md))
    532           (cons 'modified (pop md))))))
    533     (gettext
    534      (or (caar response) ""))
    535     (getselection
    536      (mapcar (lambda (line)
    537                (mapcar #'string-to-number
    538                        (split-string (car line) " " t)))
    539              response))
    540     (features (mapcar #'intern (car response)))
    541     (pagesize
    542      (setq response (car response))
    543      (cons (round (string-to-number (car response)))
    544            (round (string-to-number (cadr response)))))
    545     ((getannot editannot addannot)
    546      (pdf-info-query--transform-annotation (car response)))
    547     (getannots
    548      (mapcar #'pdf-info-query--transform-annotation response))
    549     (getattachments
    550      (mapcar #'pdf-info-query--transform-attachment response))
    551     ((getattachment-from-annot)
    552      (pdf-info-query--transform-attachment (car response)))
    553     (boundingbox
    554      (mapcar #'string-to-number (car response)))
    555     (synctex-forward-search
    556      (let ((list (mapcar #'string-to-number (car response))))
    557        `((page . ,(car list))
    558          (edges . ,(cdr list)))))
    559     (synctex-backward-search
    560      `((filename . ,(caar response))
    561        (line . ,(string-to-number (cadr (car response))))
    562        (column . ,(string-to-number (cadr (cdar response))))))
    563     (delannot nil)
    564     ((save) (caar response))
    565     ((renderpage renderpage-text-regions renderpage-highlight)
    566      (pdf-util-munch-file (caar response)))
    567     ((setoptions getoptions)
    568      (let (options)
    569        (dolist (key-value response)
    570          (let ((key (intern (car key-value)))
    571                (value (cadr key-value)))
    572            (cl-case key
    573              ((:render/printed)
    574               (setq value (equal value "1")))
    575              ((:render/usecolors)
    576               (setq value (ignore-errors
    577                             (let ((int-val (cl-parse-integer value)))
    578                               (if (> 3 int-val 0)
    579                                   int-val
    580                                 0))))))
    581            (push value options)
    582            (push key options)))
    583        options))
    584     (pagelabels (mapcar #'car response))
    585     (ping (caar response))
    586     (t response)))
    587 
    588 
    589 (defun pdf-info-query--transform-action (action)
    590   "Transform ACTION response into a Lisp form."
    591   (let ((type (intern (pop action))))
    592     `((type . ,type)
    593       (title . ,(pop action))
    594       ,@(cl-case type
    595           (goto-dest
    596            `((page . ,(string-to-number (pop action)))
    597              (top . ,(and (> (length (car action)) 0)
    598                           (string-to-number (pop action))))))
    599           (goto-remote
    600            `((filename . ,(pop action))
    601              (page . ,(string-to-number (pop action)))
    602              (top . ,(and (> (length (car action)) 0)
    603                           (string-to-number (pop action))))))
    604           (t `((uri . ,(pop action))))))))
    605 
    606 (defun pdf-info-query--transform-annotation (a)
    607   (cl-labels ((not-empty (s)
    608                 (if (not (equal s "")) s)))
    609     (let (a1 a2 a3)
    610       (cl-destructuring-bind (page edges type id flags color contents modified &rest rest)
    611           a
    612         (setq a1 `((page . ,(string-to-number page))
    613                    (edges . ,(mapcar #'string-to-number
    614                                      (split-string edges " " t)))
    615                    (type . ,(intern type))
    616                    (id . ,(intern id))
    617                    (flags . ,(string-to-number flags))
    618                    (color . ,(not-empty color))
    619                    (contents . ,contents)
    620                    (modified . ,(pdf-info-parse-pdf-date modified))))
    621         (when rest
    622           (cl-destructuring-bind (label subject opacity popup-edges popup-is-open created
    623                                         &rest rest)
    624               rest
    625             (setq a2
    626                   `((label . ,(not-empty label))
    627                     (subject . ,(not-empty subject))
    628                     (opacity . ,(let ((o (not-empty opacity)))
    629                                   (and o (string-to-number o))))
    630                     (popup-edges . ,(let ((p (not-empty popup-edges)))
    631                                       (when p
    632                                         (mapcar #'string-to-number
    633                                                 (split-string p " " t)))))
    634                     (popup-is-open . ,(equal popup-is-open "1"))
    635                     (created . ,(pdf-info-parse-pdf-date (not-empty created)))))
    636             (cond
    637              ((eq (cdr (assoc 'type a1)) 'text)
    638               (cl-destructuring-bind (icon state is-open)
    639                   rest
    640                 (setq a3
    641                       `((icon . ,(not-empty icon))
    642                         (state . ,(not-empty state))
    643                         (is-open . ,(equal is-open "1"))))))
    644              ((memq (cdr (assoc 'type a1))
    645                     '(squiggly highlight underline strike-out))
    646               (setq a3 `((markup-edges
    647                           . ,(mapcar (lambda (r)
    648                                        (mapcar #'string-to-number
    649                                                (split-string r " " t)))
    650                                      rest)))))))))
    651       (append a1 a2 a3))))
    652 
    653 (defun pdf-info-query--transform-attachment (a)
    654   (cl-labels ((not-empty (s)
    655                 (if (not (equal s "")) s)))
    656     (cl-destructuring-bind (id filename description size modified
    657                                created checksum file)
    658         a
    659       `((id . ,(intern id))
    660         (filename . ,(not-empty filename))
    661         (description . ,(not-empty description))
    662         (size . ,(let ((n (string-to-number size)))
    663                    (and (>= n 0) n)))
    664         (modified . ,(not-empty modified))
    665         (created . ,(not-empty created))
    666         (checksum . ,(not-empty checksum))
    667         (file . ,(not-empty file))))))
    668 
    669 (defun pdf-info-query--log (string &optional query-p)
    670   "Log STRING as query/response, depending on QUERY-P.
    671 
    672 This is a no-op, if `pdf-info-log' is nil."
    673   (when pdf-info-log
    674     (with-current-buffer (get-buffer-create "*pdf-info-log*")
    675       (buffer-disable-undo)
    676       (let ((pos (point-max))
    677             (window (get-buffer-window)))
    678         (save-excursion
    679           (goto-char (point-max))
    680           (unless (bolp)
    681             (insert ?\n))
    682           (insert
    683            (propertize
    684             (format-time-string "%H:%M:%S ")
    685             'face
    686             (if query-p
    687                 'font-lock-keyword-face
    688               'font-lock-function-name-face))
    689            (if (and (numberp pdf-info-log-entry-max)
    690                     (> (length string)
    691                        pdf-info-log-entry-max))
    692                (concat (substring string 0 pdf-info-log-entry-max)
    693                        "...[truncated]\n")
    694              string)))
    695         (when (and (window-live-p window)
    696                    (= pos (window-point window)))
    697           (set-window-point window (point-max)))))))
    698 
    699 
    700 
    701 ;; * ================================================================== *
    702 ;; * Utility functions
    703 ;; * ================================================================== *
    704 
    705 (defvar doc-view-buffer-file-name)
    706 (defvar doc-view--buffer-file-name)
    707 
    708 (defun pdf-info--normalize-file-or-buffer (file-or-buffer)
    709   "Return the PDF file corresponding to FILE-OR-BUFFER.
    710 
    711 FILE-OR-BUFFER may be nil, a PDF buffer, the name of a PDF buffer
    712 or a PDF file."
    713   (unless file-or-buffer
    714     (setq file-or-buffer
    715           (current-buffer)))
    716   (when (bufferp file-or-buffer)
    717     (unless (buffer-live-p file-or-buffer)
    718       (error "Buffer is not live :%s" file-or-buffer))
    719     (with-current-buffer file-or-buffer
    720       (unless (setq file-or-buffer
    721                     (cl-case major-mode
    722                       (doc-view-mode
    723                        (cond ((boundp 'doc-view-buffer-file-name)
    724                               doc-view-buffer-file-name)
    725                              ((boundp 'doc-view--buffer-file-name)
    726                               doc-view--buffer-file-name)))
    727                       (pdf-view-mode (pdf-view-buffer-file-name))
    728                       (t (buffer-file-name))))
    729         (error "Buffer is not associated with any file :%s" (buffer-name)))))
    730   (unless (stringp file-or-buffer)
    731     (signal 'wrong-type-argument
    732             (list 'stringp 'bufferp 'null file-or-buffer)))
    733   ;; is file
    734   (when (file-remote-p file-or-buffer)
    735     (error "Processing remote files not supported :%s"
    736            file-or-buffer))
    737   ;; (unless (file-readable-p file-or-buffer)
    738   ;;   (error "File not readable :%s" file-or-buffer))
    739   (expand-file-name file-or-buffer))
    740 
    741 (defun pdf-info-valid-page-spec-p (pages)
    742   "The type predicate for a valid page-spec."
    743   (not (not (ignore-errors (pdf-info-normalize-page-range pages)))))
    744 
    745 (defun pdf-info-normalize-page-range (pages)
    746   "Normalize PAGES for sending to the server.
    747 
    748 PAGES may be a single page number, a cons \(FIRST . LAST\), or
    749 nil, which stands for all pages.
    750 
    751 The result is a cons \(FIRST . LAST\), where LAST may be 0
    752 representing the final page."
    753   (cond
    754    ((natnump pages)
    755     (cons pages pages))
    756    ((null pages)
    757     (cons 1 0))
    758    ((and (natnump (car pages))
    759          (natnump (cdr pages)))
    760     pages)
    761    (t
    762     (signal 'wrong-type-argument
    763             (list 'pdf-info-valid-page-spec-p pages)))))
    764 
    765 (defun pdf-info-parse-pdf-date (date)
    766   (when (and date
    767              (string-match pdf-info-pdf-date-regexp date))
    768     (let ((year (match-string 1 date))
    769           (month (match-string 2 date))
    770           (day (match-string 3 date))
    771           (hour (match-string 4 date))
    772           (min (match-string 5 date))
    773           (sec (match-string 6 date))
    774           (ut-char (match-string 7 date))
    775           (ut-hour (match-string 8 date))
    776           (ut-min (match-string 9 date))
    777           (tz 0))
    778       (when (or (equal ut-char "+")
    779                 (equal ut-char "-"))
    780         (when ut-hour
    781           (setq tz (* 3600 (string-to-number ut-hour))))
    782         (when ut-min
    783           (setq tz (+ tz (* 60 (string-to-number ut-min)))))
    784         (when (equal ut-char "-")
    785           (setq tz (- tz))))
    786       (encode-time
    787        (if sec (string-to-number sec) 0)
    788        (if min (string-to-number min) 0)
    789        (if hour (string-to-number hour) 0)
    790        (if day (string-to-number day) 1)
    791        (if month (string-to-number month) 1)
    792        (string-to-number year)
    793        tz))))
    794 
    795 (defmacro pdf-info-compose-queries (let-forms &rest body)
    796   "Let-bind each VAR to QUERIES results and evaluate BODY.
    797 
    798 All queries in each QUERIES form are run by the server in the
    799 order they appear and the results collected in a list, which is
    800 bound to VAR.  Then BODY is evaluated and its value becomes the
    801 final result of all queries, unless at least one of them provoked
    802 an error.  In this case BODY is ignored and the error is the
    803 result.
    804 
    805 This macro handles synchronous and asynchronous calls,
    806 i.e. `pdf-info-asynchronous' is non-nil, transparently.
    807 
    808 \(FN \(\(VAR QUERIES\)...\) BODY\)"
    809   (declare (indent 1)
    810            (debug ((&rest &or
    811                           (symbolp &optional form)
    812                           symbolp)
    813                    body)))
    814   (unless (cl-every (lambda (form)
    815                       (when (symbolp form)
    816                         (setq form (list form)))
    817                       (and (consp form)
    818                            (symbolp (car form))
    819                            (listp (cdr form))))
    820                     let-forms)
    821     (error "Invalid let-form: %s" let-forms))
    822 
    823   (setq let-forms (mapcar (lambda (form)
    824                             (if (symbolp form)
    825                                 (list form)
    826                               form))
    827                           let-forms))
    828   (let* ((status (make-symbol "status"))
    829          (response (make-symbol "response"))
    830          (first-error (make-symbol "first-error"))
    831          (done (make-symbol "done"))
    832          (callback (make-symbol "callback"))
    833          (results (make-symbol "results"))
    834          (push-fn (make-symbol "push-fn"))
    835          (terminal-fn (make-symbol "terminal-fn"))
    836          (buffer (make-symbol "buffer")))
    837     `(let* (,status
    838             ,response ,first-error ,done
    839             (,buffer (current-buffer))
    840             (,callback pdf-info-asynchronous)
    841             ;; Ensure a new alist on every invocation.
    842             (,results (mapcar 'copy-sequence
    843                               ',(cl-mapcar (lambda (form)
    844                                              (list (car form)))
    845                                            let-forms)))
    846             (,push-fn (lambda (status result var)
    847                         ;; Store result in alist RESULTS under key
    848                         ;; VAR.
    849                         (if status
    850                             (unless ,first-error
    851                               (setq ,first-error result))
    852                           (let ((elt (assq var ,results)))
    853                             (setcdr elt (append (cdr elt)
    854                                                 (list result)))))))
    855             (,terminal-fn
    856              (lambda (&rest _)
    857                ;; Let-bind responses corresponding to their variables,
    858                ;; i.e. keys in alist RESULTS.
    859                (let (,@(mapcar (lambda (var)
    860                                  `(,var (cdr (assq ',var ,results))))
    861                                (mapcar #'car let-forms)))
    862                  (setq ,status (not (not ,first-error))
    863                        ,response (or ,first-error
    864                                      (with-current-buffer ,buffer
    865                                        ,@body))
    866                        ,done t)
    867                  ;; Maybe invoke the CALLBACK (which was bound to
    868                  ;; pdf-info-asynchronous).
    869                  (when ,callback
    870                    (if (functionp ,callback)
    871                        (funcall ,callback ,status ,response)
    872                      (apply (car ,callback)
    873                        ,status ,response (cdr ,callback))))))))
    874        ;; Wrap each query in an asynchronous call, with its VAR as
    875        ;; callback argument, so the PUSH-FN can put it in the alist
    876        ;; RESULTS.
    877        ,@(mapcar (lambda (form)
    878                    (list 'let (list
    879                                (list 'pdf-info-asynchronous
    880                                      (list 'list push-fn (list 'quote (car form)))))
    881                          (cadr form)))
    882                  let-forms)
    883        ;; Request a no-op, just so we know that we are finished.
    884        (let ((pdf-info-asynchronous ,terminal-fn))
    885          (pdf-info-ping))
    886        ;; CALLBACK is the original value of pdf-info-asynchronous.  If
    887        ;; nil, this is a synchronous query.
    888        (unless ,callback
    889          (while (and (not ,done)
    890                      (eq (process-status (pdf-info-process))
    891                          'run))
    892            (accept-process-output (pdf-info-process) 0.01))
    893          (when (and (not ,done)
    894                     (not (eq (process-status (pdf-info-process))
    895                              'run)))
    896            (error "The epdfinfo server quit unexpectedly"))
    897          (when ,status
    898            (error "epdfinfo: %s" ,response))
    899          ,response))))
    900 
    901 
    902 ;; * ================================================================== *
    903 ;; * Buffer local server instances
    904 ;; * ================================================================== *
    905 
    906 (put 'pdf-info--queue 'permanent-local t)
    907 
    908 (defun pdf-info-make-local-server (&optional buffer force-restart-p)
    909   "Create a server instance local to BUFFER.
    910 
    911 Does nothing if BUFFER already has a local instance.  Unless
    912 FORCE-RESTART-P is non-nil, then quit a potential process and
    913 restart it."
    914   (unless buffer
    915     (setq buffer (current-buffer)))
    916   (with-current-buffer buffer
    917     (unless (and
    918              (not force-restart-p)
    919              (local-variable-p 'pdf-info--queue)
    920              (processp (pdf-info-process))
    921              (eq (process-status (pdf-info-process))
    922                  'run))
    923       (when (and (local-variable-p 'pdf-info--queue)
    924                  (processp (pdf-info-process)))
    925         (tq-close pdf-info--queue))
    926       (set (make-local-variable 'pdf-info--queue) nil)
    927       (pdf-info-process-assert-running t)
    928       (add-hook 'kill-buffer-hook #'pdf-info-kill-local-server nil t)
    929       pdf-info--queue)))
    930 
    931 (defun pdf-info-kill-local-server (&optional buffer)
    932   "Kill the local server in BUFFER.
    933 
    934 A No-op, if BUFFER has not running server instance."
    935   (save-current-buffer
    936     (when buffer
    937       (set-buffer buffer))
    938     (when (local-variable-p 'pdf-info--queue)
    939       (pdf-info-kill)
    940       (kill-local-variable 'pdf-info--queue)
    941       t)))
    942 
    943 (defun pdf-info-local-server-p (&optional buffer)
    944   "Return non-nil, if BUFFER has a running server instance."
    945   (unless buffer
    946     (setq buffer (current-buffer)))
    947   (setq buffer (get-buffer buffer))
    948   (and (buffer-live-p buffer)
    949        (local-variable-p 'pdf-info--queue buffer)))
    950 
    951 (defun pdf-info-local-batch-query (producer-fn
    952                                    consumer-fn
    953                                    sentinel-fn
    954                                    args)
    955   "Process a set of queries asynchronously in a local instance."
    956   (unless (pdf-info-local-server-p)
    957     (error "Create a local server first"))
    958   (let* ((buffer (current-buffer))
    959          (producer-symbol (make-symbol "producer"))
    960          (consumer-symbol (make-symbol "consumer"))
    961          (producer
    962           (lambda (args)
    963             (if (null args)
    964                 (funcall sentinel-fn 'finished buffer)
    965               (let ((pdf-info-asynchronous
    966                      (apply-partially
    967                       (symbol-function consumer-symbol)
    968                       args)))
    969                 (cond
    970                  ((pdf-info-local-server-p buffer)
    971                   (with-current-buffer buffer
    972                     (apply producer-fn (car args))))
    973                  (t
    974                   (funcall sentinel-fn 'error buffer)))))))
    975          (consumer (lambda (args status result)
    976                      (if (not (pdf-info-local-server-p buffer))
    977                          (funcall sentinel-fn 'error buffer)
    978                        (with-current-buffer buffer
    979                          (apply consumer-fn status result (car args)))
    980                        (funcall (symbol-function producer-symbol)
    981                                 (cdr args))))))
    982     (fset producer-symbol producer)
    983     (fset consumer-symbol consumer)
    984     (funcall producer args)))
    985 
    986 
    987 
    988 ;; * ================================================================== *
    989 ;; * High level interface
    990 ;; * ================================================================== *
    991 
    992 (defvar pdf-info-features nil)
    993 
    994 (defun pdf-info-features ()
    995   "Return a list of symbols describing compile-time features."
    996   (or pdf-info-features
    997       (setq pdf-info-features
    998             (let (pdf-info-asynchronous)
    999               (pdf-info-query 'features)))))
   1000 
   1001 (defun pdf-info-writable-annotations-p ()
   1002   (not (null (memq 'writable-annotations (pdf-info-features)))))
   1003 
   1004 (defun pdf-info-markup-annotations-p ()
   1005   (not (null (memq 'markup-annotations (pdf-info-features)))))
   1006 
   1007 (defmacro pdf-info-assert-writable-annotations ()
   1008   `(unless (memq 'writable-annotations (pdf-info-features))
   1009      (error "Writing annotations is not supported by this version of epdfinfo")))
   1010 
   1011 (defmacro pdf-info-assert-markup-annotations ()
   1012   `(unless (memq 'markup-annotations (pdf-info-features))
   1013      (error "Creating markup annotations is not supported by this version of epdfinfo")))
   1014 
   1015 (defun pdf-info-creatable-annotation-types ()
   1016   (let ((features (pdf-info-features)))
   1017     (cond
   1018      ((not (memq 'writable-annotations features)) nil)
   1019      ((memq 'markup-annotations features)
   1020       (list 'text 'squiggly 'underline 'strike-out 'highlight))
   1021      (t (list 'text)))))
   1022 
   1023 (defun pdf-info-open (&optional file-or-buffer password)
   1024   "Open the document FILE-OR-BUFFER using PASSWORD.
   1025 
   1026 Generally, documents are opened and closed automatically on
   1027 demand, so this function is rarely needed, unless a PASSWORD is
   1028 set on the document.
   1029 
   1030 Manually opened documents are never closed automatically."
   1031 
   1032   (pdf-info-query
   1033    'open (pdf-info--normalize-file-or-buffer file-or-buffer)
   1034    password))
   1035 
   1036 (defun pdf-info-close (&optional file-or-buffer)
   1037   "Close the document FILE-OR-BUFFER.
   1038 
   1039 Returns t, if the document was actually open, otherwise nil.
   1040 This command is rarely needed, see also `pdf-info-open'."
   1041   (let* ((pdf (pdf-info--normalize-file-or-buffer file-or-buffer))
   1042          (buffer (cond
   1043                   ((not file-or-buffer) (current-buffer))
   1044                   ((bufferp file-or-buffer) file-or-buffer)
   1045                   ((stringp file-or-buffer)
   1046                    (find-buffer-visiting file-or-buffer)))))
   1047     (prog1
   1048         (pdf-info-query 'close pdf)
   1049       (if (buffer-live-p buffer)
   1050           (with-current-buffer buffer
   1051             (run-hooks 'pdf-info-close-document-hook))
   1052         (with-temp-buffer
   1053           (run-hooks 'pdf-info-close-document-hook))))))
   1054 
   1055 (defun pdf-info-encrypted-p (&optional file-or-buffer)
   1056   "Return non-nil if FILE-OR-BUFFER requires a password.
   1057 
   1058 Note: This function returns nil, if the document is encrypted,
   1059 but was already opened (presumably using a password)."
   1060 
   1061   (condition-case err
   1062       (pdf-info-open
   1063        (pdf-info--normalize-file-or-buffer file-or-buffer))
   1064     (error (or (string-match-p
   1065                 ":Document is encrypted\\'" (cadr err))
   1066                (signal (car err) (cdr err))))))
   1067 
   1068 (defun pdf-info-metadata (&optional file-or-buffer)
   1069   "Extract the metadata from the document FILE-OR-BUFFER.
   1070 
   1071 This returns an alist containing some information about the
   1072 document."
   1073   (pdf-info-query
   1074    'metadata
   1075    (pdf-info--normalize-file-or-buffer file-or-buffer)))
   1076 
   1077 (defun pdf-info-search-string (string &optional pages file-or-buffer)
   1078   "Search for STRING in PAGES of document FILE-OR-BUFFER.
   1079 
   1080 See `pdf-info-normalize-page-range' for valid PAGES formats.
   1081 
   1082 This function returns a list of matches.  Each item is an alist
   1083 containing keys PAGE, TEXT and EDGES, where PAGE and TEXT are the
   1084 matched page resp. line. EDGES is a list containing a single
   1085 edges element \(LEFT TOP RIGHT BOTTOM\). This is for consistency
   1086 with `pdf-info-search-regexp', which may return matches with
   1087 multiple edges.
   1088 
   1089 The TEXT contains `match' face properties on the matched parts.
   1090 
   1091 Search is case-insensitive, unless `case-fold-search' is nil and
   1092 searching case-sensitive is supported by the server."
   1093 
   1094   (let ((pages (pdf-info-normalize-page-range pages)))
   1095     (pdf-info-query
   1096      'search-string
   1097      (pdf-info--normalize-file-or-buffer file-or-buffer)
   1098      (car pages)
   1099      (cdr pages)
   1100      string
   1101      (if case-fold-search 1 0))))
   1102 
   1103 (defvar pdf-info-regexp-compile-flags nil
   1104   "PCRE compile flags.
   1105 
   1106 Don't use this, but the equally named function.")
   1107 
   1108 (defvar pdf-info-regexp-match-flags nil
   1109   "PCRE match flags.
   1110 
   1111 Don't use this, but the equally named function.")
   1112 
   1113 (defun pdf-info-regexp-compile-flags ()
   1114   (or pdf-info-regexp-compile-flags
   1115       (let* (pdf-info-asynchronous
   1116              (flags (pdf-info-query 'regexp-flags))
   1117              (match (cl-remove-if-not
   1118                      (lambda (flag)
   1119                        (string-match-p
   1120                         "\\`match-" (symbol-name (car flag))))
   1121                      flags))
   1122              (compile (cl-set-difference flags match)))
   1123         (setq pdf-info-regexp-compile-flags compile
   1124               pdf-info-regexp-match-flags match)
   1125         pdf-info-regexp-compile-flags)))
   1126 
   1127 (defun pdf-info-regexp-match-flags ()
   1128   (or pdf-info-regexp-match-flags
   1129       (progn
   1130         (pdf-info-regexp-compile-flags)
   1131         pdf-info-regexp-match-flags)))
   1132 
   1133 (defvar pdf-info-regexp-flags '(multiline)
   1134   "Compile- and match-flags for the PCRE engine.
   1135 
   1136 This is a list of symbols denoting compile- and match-flags when
   1137 searching for regular expressions.
   1138 
   1139 You should not change this directly, but rather `let'-bind it
   1140 around a call to `pdf-info-search-regexp'.
   1141 
   1142 Valid compile-flags are:
   1143 
   1144 newline-crlf, newline-lf, newline-cr, dupnames, optimize,
   1145 no-auto-capture, raw, ungreedy, dollar-endonly, anchored,
   1146 extended, dotall, multiline and caseless.
   1147 
   1148 Note that the last one, caseless, is handled special, as it is
   1149 always added if `case-fold-search' is non-nil.
   1150 
   1151 And valid match-flags:
   1152 
   1153 match-anchored, match-notbol, match-noteol, match-notempty,
   1154 match-partial, match-newline-cr, match-newline-lf,
   1155 match-newline-crlf and match-newline-any.
   1156 
   1157 See the glib documentation at url
   1158 `https://developer.gnome.org/glib/stable/glib-Perl-compatible-regular-expressions.html'.")
   1159 
   1160 (defun pdf-info-search-regexp (pcre &optional pages
   1161                                     no-error
   1162                                     file-or-buffer)
   1163   "Search for a PCRE on PAGES of document FILE-OR-BUFFER.
   1164 
   1165 See `pdf-info-normalize-page-range' for valid PAGES formats and
   1166 `pdf-info-search-string' for its return value.
   1167 
   1168 Uses the flags in `pdf-info-regexp-flags', which see.  If
   1169 `case-fold-search' is non-nil, the caseless flag is added.
   1170 
   1171 If NO-ERROR is non-nil, catch errors due to invalid regexps and
   1172 return nil.  If it is the symbol `invalid-regexp', then re-signal
   1173 this kind of error as a `invalid-regexp' error."
   1174 
   1175   (cl-labels ((orflags (flags alist)
   1176                 (cl-reduce
   1177                  (lambda (v flag)
   1178                    (let ((n
   1179                           (cdr (assq flag alist))))
   1180                      (if n (logior n v) v)))
   1181                  (cons 0 flags))))
   1182     (let ((pages (pdf-info-normalize-page-range pages)))
   1183       (condition-case err
   1184           (pdf-info-query
   1185            'search-regexp
   1186            (pdf-info--normalize-file-or-buffer file-or-buffer)
   1187            (car pages)
   1188            (cdr pages)
   1189            pcre
   1190            (orflags `(,(if case-fold-search
   1191                            'caseless)
   1192                       ,@pdf-info-regexp-flags)
   1193                     (pdf-info-regexp-compile-flags))
   1194            (orflags pdf-info-regexp-flags
   1195                     (pdf-info-regexp-match-flags)))
   1196         (error
   1197          (let ((re
   1198                 (concat "\\`epdfinfo: *Invalid *regexp: *"
   1199                         ;; glib error
   1200                         "\\(?:Error while compiling regular expression"
   1201                         " *%s *\\)?\\(.*\\)")))
   1202            (if (or (null no-error)
   1203                    (not (string-match
   1204                          (format re (regexp-quote pcre))
   1205                          (cadr err))))
   1206                (signal (car err) (cdr err))
   1207              (if (eq no-error 'invalid-regexp)
   1208                  (signal 'invalid-regexp
   1209                          (list (match-string 1 (cadr err))))))))))))
   1210 
   1211 (defun pdf-info-pagelinks (page &optional file-or-buffer)
   1212   "Return a list of links on PAGE in document FILE-OR-BUFFER.
   1213 
   1214 This function returns a list of alists with the following keys.
   1215 EDGES represents the relative bounding-box of the link , TYPE is
   1216 the type of the action, TITLE is a, possibly empty, name for this
   1217 action.
   1218 
   1219 TYPE may be one of
   1220 
   1221 goto-dest -- This is a internal link to some page.  Each element
   1222 contains additional keys PAGE and TOP, where PAGE is the page of
   1223 the link and TOP its vertical position.
   1224 
   1225 goto-remote -- This a external link to some document.  Same as
   1226 goto-dest, with an additional FILENAME of the external PDF.
   1227 
   1228 uri -- A link in form of some URI. Alist contains additional key
   1229 URI.
   1230 
   1231 In the first two cases, PAGE may be 0 and TOP nil, which means
   1232 these data is unspecified."
   1233   (cl-check-type page natnum)
   1234   (pdf-info-query
   1235    'pagelinks
   1236    (pdf-info--normalize-file-or-buffer file-or-buffer)
   1237    page))
   1238 
   1239 (defun pdf-info-number-of-pages (&optional file-or-buffer)
   1240   "Return the number of pages in document FILE-OR-BUFFER."
   1241   (pdf-info-query 'number-of-pages
   1242                   (pdf-info--normalize-file-or-buffer
   1243                    file-or-buffer)))
   1244 
   1245 (defun pdf-info-outline (&optional file-or-buffer)
   1246   "Return the PDF outline of document FILE-OR-BUFFER.
   1247 
   1248 This function returns a list of alists like `pdf-info-pagelinks'.
   1249 Additionally every alist has a DEPTH (>= 1) entry with the depth
   1250 of this element in the tree."
   1251 
   1252   (pdf-info-query
   1253    'outline
   1254    (pdf-info--normalize-file-or-buffer file-or-buffer)))
   1255 
   1256 (defun pdf-info--selection-style (selection-style)
   1257   "SELECTION-STYLE is the smallest unit of the selected region.
   1258 
   1259 It must be one of glyph, word or line. If it is none of these, we
   1260 fallback to glyph."
   1261   (cl-case selection-style
   1262     (glyph 0)
   1263     (word 1)
   1264     (line 2)
   1265     (t 0)))
   1266 
   1267 (defun pdf-info-gettext (page edges &optional selection-style
   1268                               file-or-buffer)
   1269   "Get text on PAGE according to EDGES.
   1270 
   1271 EDGES should contain relative coordinates.  The selection may
   1272 extend over multiple lines, which works similar to a Emacs
   1273 region. SELECTION-STYLE may be one of glyph, word or line and
   1274 determines the smallest unit of the selected region.
   1275 
   1276 Return the text contained in the selection."
   1277 
   1278   (pdf-info-query
   1279    'gettext
   1280    (pdf-info--normalize-file-or-buffer file-or-buffer)
   1281    page
   1282    (mapconcat #'number-to-string edges " ")
   1283    (pdf-info--selection-style selection-style)))
   1284 
   1285 (defun pdf-info-getselection (page edges
   1286                                    &optional selection-style file-or-buffer)
   1287   "Return the edges of the selection EDGES on PAGE.
   1288 
   1289 Arguments are the same as for `pdf-info-gettext'.  Return a list
   1290 of edges corresponding to the text that would be returned by the
   1291 aforementioned function, when called with the same arguments."
   1292   (pdf-info-query
   1293    'getselection
   1294    (pdf-info--normalize-file-or-buffer file-or-buffer)
   1295    page
   1296    (mapconcat #'number-to-string edges " ")
   1297    (pdf-info--selection-style selection-style)))
   1298 
   1299 (defun pdf-info-textregions (page &optional file-or-buffer)
   1300   "Return a list of edges describing PAGE's text-layout."
   1301   (pdf-info-getselection
   1302    page '(0 0 1 1) 'glyph file-or-buffer))
   1303 
   1304 (defun pdf-info-charlayout (page &optional edges-or-pos file-or-buffer)
   1305   "Return the layout of characters of PAGE in/at EDGES-OR-POS.
   1306 
   1307 Returns a list of elements \(CHAR . \(LEFT TOP RIGHT BOT\)\) mapping
   1308 character to their corresponding relative bounding-boxes.
   1309 
   1310 EDGES-OR-POS may be a region \(LEFT TOP RIGHT BOT\) restricting
   1311 the returned value to include only characters fully contained in
   1312 it.  Or a cons \(LEFT . TOP\) which means to only include the
   1313 character at this position.  In this case the return value
   1314 contains at most one element."
   1315 
   1316   ;; FIXME: Actually returns \(CHAR . LEFT ...\).
   1317 
   1318   (unless edges-or-pos
   1319     (setq edges-or-pos '(0 0 1 1)))
   1320   (when (numberp (cdr edges-or-pos))
   1321     (setq edges-or-pos (list (car edges-or-pos)
   1322                              (cdr edges-or-pos)
   1323                              -1 -1)))
   1324   (pdf-info-query
   1325    'charlayout
   1326    (pdf-info--normalize-file-or-buffer file-or-buffer)
   1327    page
   1328    (mapconcat #'number-to-string edges-or-pos " ")))
   1329 
   1330 (defun pdf-info-pagesize (page &optional file-or-buffer)
   1331   "Return the size of PAGE as a cons \(WIDTH . HEIGHT\)
   1332 
   1333 The size is in PDF points."
   1334   (pdf-info-query
   1335    'pagesize
   1336    (pdf-info--normalize-file-or-buffer file-or-buffer)
   1337    page))
   1338 
   1339 (defun pdf-info-running-p ()
   1340   "Return non-nil, if the server is running."
   1341   (and (processp (pdf-info-process))
   1342        (eq (process-status (pdf-info-process))
   1343            'run)))
   1344 
   1345 (defun pdf-info-quit (&optional timeout)
   1346   "Quit the epdfinfo server.
   1347 
   1348 This blocks until all outstanding requests are answered.  Unless
   1349 TIMEOUT is non-nil, in which case we wait at most TIMEOUT seconds
   1350 before killing the server."
   1351   (cl-check-type timeout (or null number))
   1352   (when (pdf-info-running-p)
   1353     (let ((pdf-info-asynchronous
   1354            (if timeout (lambda (&rest _))
   1355              pdf-info-asynchronous)))
   1356       (pdf-info-query 'quit)
   1357       (when timeout
   1358         (setq timeout (+ (float-time) (max 0 timeout)))
   1359         (while (and (pdf-info-running-p)
   1360                     (> timeout (float-time)))
   1361           (accept-process-output (pdf-info-process) 0.5 nil t)))))
   1362   (when (processp (pdf-info-process))
   1363     (tq-close pdf-info--queue))
   1364   (setq pdf-info--queue nil))
   1365 
   1366 (defun pdf-info-kill ()
   1367   "Kill the epdfinfo server.
   1368 
   1369 Immediately delete the server process, see also `pdf-info-quit',
   1370 for a more sane way to exit the program."
   1371   (when (processp (pdf-info-process))
   1372     (tq-close pdf-info--queue))
   1373   (setq pdf-info--queue nil))
   1374 
   1375 (defun pdf-info-getannots (&optional pages file-or-buffer)
   1376   "Return the annotations on PAGE.
   1377 
   1378 See `pdf-info-normalize-page-range' for valid PAGES formats.
   1379 
   1380 This function returns the annotations for PAGES as a list of
   1381 alists.  Each element of this list describes one annotation and
   1382 contains the following keys.
   1383 
   1384 page     - Its page number.
   1385 edges    - Its area.
   1386 type     - A symbol describing the annotation's type.
   1387 id       - A document-wide unique symbol referencing this annotation.
   1388 flags    - Its flags, binary encoded.
   1389 color    - Its color in standard Emacs notation.
   1390 contents - The text of this annotation.
   1391 modified - The last modification date of this annotation.
   1392 
   1393 Additionally, if the annotation is a markup annotation, the
   1394 following keys are present.
   1395 
   1396 label        - The annotation's label.
   1397 subject      - The subject addressed.
   1398 opacity      - The level of relative opacity.
   1399 popup-edges  - The edges of a associated popup window or nil.
   1400 popup-is-open - Whether this window should be displayed open.
   1401 created      - The date this markup annotation was created.
   1402 
   1403 If the annotation is also a markup text annotation, the alist
   1404 contains the following keys.
   1405 
   1406 text-icon  - A string describing the purpose of this annotation.
   1407 text-state - A string, e.g. accepted or rejected." ;FIXME: Use symbols ?
   1408 
   1409   (let ((pages (pdf-info-normalize-page-range pages)))
   1410     (pdf-info-query
   1411      'getannots
   1412      (pdf-info--normalize-file-or-buffer file-or-buffer)
   1413      (car pages)
   1414      (cdr pages))))
   1415 
   1416 (defun pdf-info-getannot (id &optional file-or-buffer)
   1417   "Return the annotation for ID.
   1418 
   1419 ID should be a symbol, which was previously returned in a
   1420 `pdf-info-getannots' query.  Signal an error, if an annotation
   1421 with ID is not available.
   1422 
   1423 See `pdf-info-getannots' for the kind of return value of this
   1424 function."
   1425   (pdf-info-query
   1426    'getannot
   1427    (pdf-info--normalize-file-or-buffer file-or-buffer)
   1428    id))
   1429 
   1430 (defun pdf-info-addannot (page edges type
   1431                                &optional selection-style file-or-buffer
   1432                                &rest markup-edges)
   1433   "Add a new annotation to PAGE with EDGES of TYPE.
   1434 
   1435 FIXME: TYPE may be one of `text', `markup-highlight', ... .
   1436 FIXME: -1 = 24
   1437 See `pdf-info-getannots' for the kind of value of this function
   1438 returns."
   1439   (pdf-info-assert-writable-annotations)
   1440   (when (consp file-or-buffer)
   1441     (push file-or-buffer markup-edges)
   1442     (setq file-or-buffer nil))
   1443   (apply
   1444    #'pdf-info-query
   1445    'addannot
   1446    (pdf-info--normalize-file-or-buffer file-or-buffer)
   1447    page
   1448    type
   1449    (pdf-info--selection-style selection-style)
   1450    (mapconcat 'number-to-string edges " ")
   1451    (mapcar (lambda (me)
   1452              (mapconcat 'number-to-string me " "))
   1453            markup-edges)))
   1454 
   1455 (defun pdf-info-delannot (id &optional file-or-buffer)
   1456   "Delete the annotation with ID in FILE-OR-BUFFER.
   1457 
   1458 ID should be a symbol, which was previously returned in a
   1459 `pdf-info-getannots' query.  Signal an error, if annotation ID
   1460 does not exist."
   1461   (pdf-info-assert-writable-annotations)
   1462   (pdf-info-query
   1463    'delannot
   1464    (pdf-info--normalize-file-or-buffer file-or-buffer)
   1465    id))
   1466 
   1467 (defun pdf-info-mvannot (id edges &optional file-or-buffer)
   1468   "Move/Resize annotation ID to fit EDGES.
   1469 
   1470 ID should be a symbol, which was previously returned in a
   1471 `pdf-info-getannots' query.  Signal an error, if annotation ID
   1472 does not exist.
   1473 
   1474 EDGES should be a list \(LEFT TOP RIGHT BOT\).  RIGHT and/or BOT
   1475 may also be negative, which means to keep the width
   1476 resp. height."
   1477   (pdf-info-editannot id `((edges . ,edges)) file-or-buffer))
   1478 
   1479 (defun pdf-info-editannot (id modifications &optional file-or-buffer)
   1480   "Edit annotation ID, applying MODIFICATIONS.
   1481 
   1482 ID should be a symbol, which was previously returned in a
   1483 `pdf-info-getannots' query.
   1484 
   1485 MODIFICATIONS is an alist of properties and their new values.
   1486 
   1487 The server must support modifying annotations for this to work."
   1488 
   1489   (pdf-info-assert-writable-annotations)
   1490   (let ((edits
   1491          (mapcar
   1492           (lambda (elt)
   1493             (cl-case (car elt)
   1494               (color
   1495                (list (car elt)
   1496                      (pdf-util-hexcolor (cdr elt))))
   1497               (edges
   1498                (list (car elt)
   1499                      (mapconcat 'number-to-string (cdr elt) " ")))
   1500               ((popup-is-open is-open)
   1501                (list (car elt) (if (cdr elt) 1 0)))
   1502               (t
   1503                (list (car elt) (cdr elt)))))
   1504           modifications)))
   1505     (apply #'pdf-info-query
   1506            'editannot
   1507            (pdf-info--normalize-file-or-buffer file-or-buffer)
   1508            id
   1509            (apply #'append edits))))
   1510 
   1511 (defun pdf-info-save (&optional file-or-buffer)
   1512   "Save FILE-OR-BUFFER.
   1513 
   1514 This saves the document to a new temporary file, which is
   1515 returned and owned by the caller."
   1516   (pdf-info-assert-writable-annotations)
   1517   (pdf-info-query
   1518    'save
   1519    (pdf-info--normalize-file-or-buffer file-or-buffer)))
   1520 
   1521 (defun pdf-info-getattachment-from-annot (id &optional do-save file-or-buffer)
   1522   "Return the attachment associated with annotation ID.
   1523 
   1524 ID should be a symbol which was previously returned in a
   1525 `pdf-info-getannots' query, and referencing an attachment of type
   1526 `file', otherwise an error is signaled.
   1527 
   1528 See `pdf-info-getattachments' for the kind of return value of this
   1529 function and the meaning of DO-SAVE."
   1530 
   1531   (pdf-info-query
   1532    'getattachment-from-annot
   1533    (pdf-info--normalize-file-or-buffer file-or-buffer)
   1534    id
   1535    (if do-save 1 0)))
   1536 
   1537 (defun pdf-info-getattachments (&optional do-save file-or-buffer)
   1538   "Return all document level attachments.
   1539 
   1540 If DO-SAVE is non-nil, save the attachments data to a local file,
   1541 which is then owned by the caller, see below.
   1542 
   1543 This function returns a list of alists, where every element
   1544 contains the following keys.  All values, except for id, may be
   1545 nil, i.e. not present.
   1546 
   1547 id          - A symbol uniquely identifying this attachment.
   1548 filename    - The filename of this attachment.
   1549 description - A description of this attachment.
   1550 size        - The size in bytes.
   1551 modified    - The last modification date.
   1552 created     - The date of creation.
   1553 checksum    - A MD5 checksum of this attachment's data.
   1554 file        - The name of a tempfile containing the data (only present if
   1555               DO-SAVE is non-nil)."
   1556 
   1557   (pdf-info-query
   1558    'getattachments
   1559    (pdf-info--normalize-file-or-buffer file-or-buffer)
   1560    (if do-save 1 0)))
   1561 
   1562 (defun pdf-info-synctex-forward-search (source &optional line column file-or-buffer)
   1563   "Perform a forward search with synctex.
   1564 
   1565 SOURCE should be a LaTeX buffer or the absolute filename of a
   1566 corresponding file.  LINE and COLUMN represent the position in
   1567 the buffer or file.  Finally FILE-OR-BUFFER corresponds to the
   1568 PDF document.
   1569 
   1570 Returns an alist with entries PAGE and relative EDGES describing
   1571 the position in the PDF document corresponding to the SOURCE
   1572 location."
   1573 
   1574   (let ((source (if (buffer-live-p (get-buffer source))
   1575                     (buffer-file-name (get-buffer source))
   1576                   source)))
   1577     (pdf-info-query
   1578      'synctex-forward-search
   1579      (pdf-info--normalize-file-or-buffer file-or-buffer)
   1580      source
   1581      (or line 1)
   1582      (or column 1))))
   1583 
   1584 (defun pdf-info-synctex-backward-search (page &optional x y file-or-buffer)
   1585   "Perform a backward search with synctex.
   1586 
   1587 Find the source location corresponding to the coordinates
   1588 \(X . Y\) on PAGE in FILE-OR-BUFFER.
   1589 
   1590 Returns an alist with entries FILENAME, LINE and COLUMN."
   1591 
   1592 
   1593   (pdf-info-query
   1594    'synctex-backward-search
   1595    (pdf-info--normalize-file-or-buffer file-or-buffer)
   1596    page
   1597    (or x 0)
   1598    (or y 0)))
   1599 
   1600 (defun pdf-info-renderpage (page width &optional file-or-buffer &rest commands)
   1601   "Render PAGE with width WIDTH.
   1602 
   1603 Return the data of the corresponding PNG image."
   1604   (when (keywordp file-or-buffer)
   1605     (push file-or-buffer commands)
   1606     (setq file-or-buffer nil))
   1607   (apply #'pdf-info-query
   1608     'renderpage
   1609     (pdf-info--normalize-file-or-buffer file-or-buffer)
   1610     page
   1611     (* width (pdf-util-frame-scale-factor))
   1612     (let (transformed)
   1613       (while (cdr commands)
   1614         (let ((kw (pop commands))
   1615               (value (pop commands)))
   1616           (setq value
   1617                 (cl-case kw
   1618                   ((:crop-to :highlight-line :highlight-region :highlight-text)
   1619                    (mapconcat #'number-to-string value " "))
   1620                   ((:foreground :background)
   1621                    (pdf-util-hexcolor value))
   1622                   (:alpha
   1623                    (number-to-string value))
   1624                   (:selection-style
   1625                    (number-to-string (pdf-info--selection-style value)))
   1626                   (otherwise value)))
   1627           (push kw transformed)
   1628           (push value transformed)))
   1629       (when commands
   1630         (error "Keyword is missing a value: %s" (car commands)))
   1631       (nreverse transformed))))
   1632 
   1633 (defun pdf-info-renderpage-text-regions (page width single-line-p
   1634                                               &optional selection-style file-or-buffer
   1635                                               &rest regions)
   1636   "Highlight text on PAGE with width WIDTH using REGIONS.
   1637 
   1638 REGIONS is a list determining foreground and background color and
   1639 the regions to render. So each element should look like \(FG BG
   1640 \(LEFT TOP RIGHT BOT\) \(LEFT TOP RIGHT BOT\) ... \) . The
   1641 rendering is text-aware and is controlled by SELECTION-STYLE.
   1642 
   1643 If SINGLE-LINE-P is non-nil, the edges in REGIONS are each
   1644 supposed to be limited to a single line in the document.  Setting
   1645 this, if applicable, avoids rendering problems.
   1646 
   1647 For the other args see `pdf-info-renderpage'.
   1648 
   1649 Return the data of the corresponding PNG image."
   1650 
   1651   (when (consp file-or-buffer)
   1652     (push file-or-buffer regions)
   1653     (setq file-or-buffer nil))
   1654 
   1655   (apply #'pdf-info-renderpage
   1656     page width file-or-buffer
   1657     (apply #'append
   1658       `(:selection-style ,selection-style)
   1659       (mapcar (lambda (elt)
   1660                 `(:foreground ,(pop elt)
   1661                   :background ,(pop elt)
   1662                   ,@(cl-mapcan (lambda (edges)
   1663                                  `(,(if single-line-p
   1664                                         :highlight-line
   1665                                       :highlight-text)
   1666                                    ,edges))
   1667                                elt)))
   1668               regions))))
   1669 
   1670 (defun pdf-info-renderpage-highlight (page width
   1671                                            &optional file-or-buffer
   1672                                            &rest regions)
   1673   "Highlight regions on PAGE with width WIDTH using REGIONS.
   1674 
   1675 REGIONS is a list determining the background color, a alpha value
   1676 and the regions to render. So each element should look like \(FILL-COLOR
   1677 STROKE-COLOR ALPHA \(LEFT TOP RIGHT BOT\) \(LEFT TOP RIGHT BOT\) ... \)
   1678 .
   1679 
   1680 For the other args see `pdf-info-renderpage'.
   1681 
   1682 Return the data of the corresponding PNG image."
   1683 
   1684   (when (consp file-or-buffer)
   1685     (push file-or-buffer regions)
   1686     (setq file-or-buffer nil))
   1687 
   1688   (apply #'pdf-info-renderpage
   1689     page width file-or-buffer
   1690     (apply #'append
   1691       (mapcar (lambda (elt)
   1692                 `(:background ,(pop elt)
   1693                   :foreground ,(pop elt)
   1694                   :alpha ,(pop elt)
   1695                   ,@(cl-mapcan (lambda (edges)
   1696                                  `(:highlight-region ,edges))
   1697                                elt)))
   1698               regions))))
   1699 
   1700 (defun pdf-info-boundingbox (page &optional file-or-buffer)
   1701   "Return a bounding-box for PAGE.
   1702 
   1703 Returns a list \(LEFT TOP RIGHT BOT\)."
   1704 
   1705   (pdf-info-query
   1706    'boundingbox
   1707    (pdf-info--normalize-file-or-buffer file-or-buffer)
   1708    page))
   1709 
   1710 (defun pdf-info-getoptions (&optional file-or-buffer)
   1711   (pdf-info-query
   1712    'getoptions
   1713    (pdf-info--normalize-file-or-buffer file-or-buffer)))
   1714 
   1715 (defun pdf-info-setoptions (&optional file-or-buffer &rest options)
   1716   (when (symbolp file-or-buffer)
   1717     (push file-or-buffer options)
   1718     (setq file-or-buffer nil))
   1719   (unless (= (% (length options) 2) 0)
   1720     (error "Missing a option value"))
   1721   (apply #'pdf-info-query
   1722     'setoptions
   1723     (pdf-info--normalize-file-or-buffer file-or-buffer)
   1724     (let (soptions)
   1725       (while options
   1726         (let ((key (pop options))
   1727               (value (pop options)))
   1728           (unless (and (keywordp key)
   1729                        (not (eq key :)))
   1730             (error "Keyword expected: %s" key))
   1731           (cl-case key
   1732             ((:render/foreground :render/background)
   1733              (push (pdf-util-hexcolor value)
   1734                    soptions))
   1735             ((:render/printed)
   1736              (push (if value 1 0) soptions))
   1737             ((:render/usecolors)
   1738              ;; 0 -> original color
   1739              ;; 1 -> recolor document to grayscale mapping black to
   1740              ;;      :render/foreground and white to :render/background
   1741              ;; 2 -> recolor document by inverting the perceived lightness
   1742              ;;      preserving hue
   1743              (push (if (and (integerp value) (> 3 value 0)) value 0)
   1744                    soptions))
   1745             (t (push value soptions)))
   1746           (push key soptions)))
   1747       soptions)))
   1748 
   1749 
   1750 
   1751 (defun pdf-info-pagelabels (&optional file-or-buffer)
   1752   "Return a list of pagelabels.
   1753 
   1754 Returns a list of strings corresponding to the labels of the
   1755 pages in FILE-OR-BUFFER."
   1756 
   1757   (pdf-info-query
   1758    'pagelabels
   1759    (pdf-info--normalize-file-or-buffer file-or-buffer)))
   1760 
   1761 (defun pdf-info-ping (&optional message)
   1762   "Ping the server using MESSAGE.
   1763 
   1764 Returns MESSAGE, which defaults to \"pong\"."
   1765   (pdf-info-query 'ping (or message "pong")))
   1766 
   1767 (provide 'pdf-info)
   1768 
   1769 ;;; pdf-info.el ends here