pdf-info.el (65103B)
1 ;;; pdf-info.el --- Extract info from pdf-files via a helper process. -*- lexical-binding: t -*- 2 3 ;; Copyright (C) 2013, 2014 Andreas Politz 4 5 ;; Author: Andreas Politz <politza@fh-trier.de> 6 ;; Keywords: files, multimedia 7 8 ;; This program is free software; you can redistribute it and/or modify 9 ;; it under the terms of the GNU General Public License as published by 10 ;; the Free Software Foundation, either version 3 of the License, or 11 ;; (at your option) any later version. 12 13 ;; This program is distributed in the hope that it will be useful, 14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 ;; GNU General Public License for more details. 17 18 ;; You should have received a copy of the GNU General Public License 19 ;; along with this program. If not, see <http://www.gnu.org/licenses/>. 20 21 ;;; Commentary: 22 ;; 23 ;; This library represents the Lisp side of the epdfinfo server. This 24 ;; program works on a command/response basis, but there should be no 25 ;; need to understand the protocol, since every command has a 26 ;; corresponding Lisp-function (see below under `High level 27 ;; interface'). 28 ;; 29 ;; Most of these functions receive a file-or-buffer argument, which 30 ;; may be what it says and defaults to the current buffer. Also, most 31 ;; functions return some sort of alist, with, in most cases, 32 ;; straight-forward key-value-pairs. Though some may be only 33 ;; understandable in the context of Adobe's PDF spec \(Adobe 34 ;; PDF32000\) or the poppler documentation (e.g. annotation flags). 35 ;; 36 ;; If the poppler library is fairly recent (>= 0.19.4, older versions 37 ;; have a bug, which may corrupt the document), annotations maybe 38 ;; modified to a certain degree, deleted and text-annotations created. 39 ;; The state of these modifications is held in the server. In order 40 ;; to realize, annotations retrieved or created are referenced by a 41 ;; unique symbol. Saving these changes creates a new file, the 42 ;; original document is never touched. 43 44 ;;; Todo: 45 ;; 46 ;; + Close documents at some time (e.g. when the buffer is killed) 47 ;; 48 49 ;;; Code: 50 51 (require 'tq) 52 (require 'cl-lib) 53 54 55 56 ;; * ================================================================== * 57 ;; * Customizations 58 ;; * ================================================================== * 59 60 (defgroup pdf-info nil 61 "Extract infos from pdf-files via a helper process." 62 :group 'pdf-tools) 63 64 (defcustom pdf-info-epdfinfo-program 65 (let ((executable (if (eq system-type 'windows-nt) 66 "epdfinfo.exe" "epdfinfo")) 67 (default-directory 68 (or (and load-file-name 69 (file-name-directory load-file-name)) 70 default-directory))) 71 (cl-labels ((try-directory (directory) 72 (and (file-directory-p directory) 73 (file-executable-p (expand-file-name executable directory)) 74 (expand-file-name executable directory)))) 75 (or (executable-find executable) 76 ;; This works if epdfinfo is in the same place as emacs and 77 ;; the editor was started with an absolute path, i.e. it is 78 ;; meant for Windows/Msys2. 79 (and (stringp (car-safe command-line-args)) 80 (file-name-directory (car command-line-args)) 81 (try-directory 82 (file-name-directory (car command-line-args)))) 83 ;; If we are running directly from the git repo. 84 (try-directory (expand-file-name "../server")) 85 ;; Fall back to epdfinfo in the directory of this file. 86 (expand-file-name executable)))) 87 "Filename of the epdfinfo executable." 88 :type 'file) 89 90 (defcustom pdf-info-epdfinfo-error-filename nil 91 "Filename for error output of the epdfinfo executable. 92 93 If nil, discard any error messages. Useful for debugging." 94 :type `(choice (const :tag "None" nil) 95 ,@(when (file-directory-p "/tmp/") 96 '((const "/tmp/epdfinfo.log"))) 97 (file))) 98 99 (defcustom pdf-info-log nil 100 "Whether to log the communication with the server. 101 102 If this is non-nil, all communication with the epdfinfo program 103 will be logged to the buffer \"*pdf-info-log*\"." 104 :type 'boolean) 105 106 (defcustom pdf-info-log-entry-max 512 107 "Maximum number of characters in a single log entry. 108 109 This variable has no effect if `pdf-info-log' is nil." 110 :type 'integer) 111 112 (defcustom pdf-info-restart-process-p 'ask 113 "What to do when the epdfinfo server died. 114 115 This should be one of 116 nil -- do nothing, 117 t -- automatically restart it or 118 ask -- ask whether to restart or not. 119 120 If it is `ask', the server quits and you answer no, this variable 121 is set to nil." 122 :type '(choice (const :tag "Do nothing" nil) 123 (const :tag "Restart silently" t) 124 (const :tag "Always ask" ask))) 125 126 (defcustom pdf-info-close-document-hook nil 127 "A hook ran after a document was closed in the server. 128 129 The hook is run in the documents buffer, if it exists. Otherwise 130 in a `with-temp-buffer' form." 131 :type 'hook) 132 133 134 135 ;; * ================================================================== * 136 ;; * Variables 137 ;; * ================================================================== * 138 139 (defvar pdf-info-asynchronous nil 140 "If non-nil process queries asynchronously. 141 142 More specifically the value should be a function of at 2 143 arguments \(fn STATUS RESPONSE\), where STATUS is either nil, for 144 a successful query, or the symbol error. RESPONSE is either the 145 command's response or the error message. This does not work 146 recursive, i.e. if function wants to make another asynchronous 147 query it has to rebind this variable. 148 149 Alternatively it may be a list \(FN . ARGS\), in which case FN 150 will be invoked like \(apply FN STATUS RESPONSE ARGS\). 151 152 Also, all pdf-info functions normally returning a response return 153 nil. 154 155 This variable should only be let-bound.") 156 157 (defconst pdf-info-pdf-date-regexp 158 ;; Adobe PDF32000.book, 7.9.4 Dates 159 (eval-when-compile 160 (concat 161 ;; allow for preceding garbage 162 ;;"\\`" 163 "[dD]:" 164 "\\([0-9]\\{4\\}\\)" ;year 165 "\\(?:" 166 "\\([0-9]\\{2\\}\\)" ;month 167 "\\(?:" 168 "\\([0-9]\\{2\\}\\)" ;day 169 "\\(?:" 170 "\\([0-9]\\{2\\}\\)" ;hour 171 "\\(?:" 172 "\\([0-9]\\{2\\}\\)" ;minutes 173 "\\(?:" 174 "\\([0-9]\\{2\\}\\)" ;seconds 175 "\\)?\\)?\\)?\\)?\\)?" 176 "\\(?:" 177 "\\([+-Zz]\\)" ;UT delta char 178 "\\(?:" 179 "\\([0-9]\\{2\\}\\)" ;UT delta hours 180 "\\(?:" 181 "'" 182 "\\([0-9]\\{2\\}\\)" ;UT delta minutes 183 "\\)?\\)?\\)?" 184 ;; "\\'" 185 ;; allow for trailing garbage 186 ))) 187 188 (defvar pdf-info--queue t 189 "Internally used transmission-queue for the server. 190 191 This variable is initially t, telling the code starting the 192 server, that it never ran.") 193 194 195 ;; * ================================================================== * 196 ;; * Process handling 197 ;; * ================================================================== * 198 199 (defconst pdf-info-empty-page-data 200 (eval-when-compile 201 (concat 202 "%PDF-1.0\n1 0 obj<</Type/Catalog/Pages 2 0 R>>endobj 2 0" 203 " obj<</Type/Pages/Kids[3 0 R]/Count 1>>endobj 3 0 obj<</" 204 "Type/Page/MediaBox[0 0 3 3]>>endobj\nxref\n0 4\n00000000" 205 "0065535 f\n0000000010 00000 n\n0000000053 00000 n\n00000" 206 "00102 00000 n\ntrailer<</Size 4/Root 1 0 R>>\nstartxref\n149\n%EOF")) 207 "PDF data of an empty page.") 208 209 (defun pdf-info-process () 210 "Return the process object or nil." 211 (and pdf-info--queue 212 (not (eq t pdf-info--queue)) 213 (tq-process pdf-info--queue))) 214 215 (defun pdf-info-check-epdfinfo (&optional interactive-p) 216 "Check if the server should be working properly. 217 218 Signal an error if some problem was found. Message a 219 confirmation, if INTERACTIVE-P is non-nil and no problems were 220 found. 221 222 Returns nil." 223 (interactive "p") 224 (let ((executable pdf-info-epdfinfo-program)) 225 (unless (stringp executable) 226 (error "pdf-info-epdfinfo-program is unset or not a string")) 227 (unless (file-executable-p executable) 228 (error "pdf-info-epdfinfo-program is not executable")) 229 (when pdf-info-epdfinfo-error-filename 230 (unless (and (stringp pdf-info-epdfinfo-error-filename) 231 (file-writable-p pdf-info-epdfinfo-error-filename)) 232 (error "pdf-info-epdfinfo-error-filename should contain writable filename"))) 233 (let* ((default-directory (expand-file-name "~/")) 234 (cmdfile (make-temp-file "commands")) 235 (pdffile (make-temp-file "empty.pdf")) 236 (tempdir (make-temp-file "tmpdir" t)) 237 (process-environment (cons (concat "TMPDIR=" tempdir) 238 process-environment))) 239 (unwind-protect 240 (with-temp-buffer 241 (with-temp-file pdffile 242 (set-buffer-multibyte nil) 243 (insert pdf-info-empty-page-data)) 244 (with-temp-file cmdfile 245 (insert (format "renderpage:%s:1:100\nquit\n" 246 (pdf-info-query--escape pdffile)))) 247 (unless (= 0 (apply #'call-process 248 executable cmdfile (current-buffer) 249 nil (when pdf-info-epdfinfo-error-filename 250 (list pdf-info-epdfinfo-error-filename)))) 251 (error "Error running `%s': %s" 252 pdf-info-epdfinfo-program 253 (buffer-string)))) 254 (when (file-exists-p cmdfile) 255 (delete-file cmdfile)) 256 (when (file-exists-p pdffile) 257 (delete-file pdffile)) 258 (when (file-exists-p tempdir) 259 (delete-directory tempdir t))))) 260 (when interactive-p 261 (message "The epdfinfo program appears to be working.")) 262 nil) 263 264 (defun pdf-info-process-assert-running (&optional force) 265 "Assert a running process. 266 267 If it never ran, i.e. `pdf-info-process' is t, start it 268 unconditionally. 269 270 Otherwise, if FORCE is non-nil start it, if it is not running. 271 Else restart it with respect to the variable 272 `pdf-info-restart-process-p', which see. 273 274 If getting the process to run fails, this function throws an 275 error." 276 (interactive "P") 277 (unless (and (processp (pdf-info-process)) 278 (eq (process-status (pdf-info-process)) 279 'run)) 280 (when (pdf-info-process) 281 (tq-close pdf-info--queue) 282 (setq pdf-info--queue nil)) 283 (unless (or force 284 (eq pdf-info--queue t) 285 (and (eq pdf-info-restart-process-p 'ask) 286 (not noninteractive) 287 (y-or-n-p "The epdfinfo server quit, restart it ? ")) 288 (and pdf-info-restart-process-p 289 (not (eq pdf-info-restart-process-p 'ask)))) 290 291 (when (eq pdf-info-restart-process-p 'ask) 292 (setq pdf-info-restart-process-p nil)) 293 (error "The epdfinfo server quit")) 294 (pdf-info-check-epdfinfo) 295 (let* ((process-connection-type) ;Avoid 4096 Byte bug #12440. 296 (default-directory "~") 297 (proc (apply #'start-process 298 "epdfinfo" " *epdfinfo*" pdf-info-epdfinfo-program 299 (when pdf-info-epdfinfo-error-filename 300 (list pdf-info-epdfinfo-error-filename))))) 301 (with-current-buffer " *epdfinfo*" 302 (erase-buffer)) 303 (set-process-query-on-exit-flag proc nil) 304 (set-process-coding-system proc 'utf-8-unix 'utf-8-unix) 305 (setq pdf-info--queue (tq-create proc)))) 306 pdf-info--queue) 307 308 (when (< emacs-major-version 27) 309 (advice-add 'tq-process-buffer :around #'pdf-info--tq-workaround)) 310 311 (defun pdf-info--tq-workaround (orig-fun tq &rest args) 312 "Fix a bug in trunk where the wrong callback gets called. 313 314 ORIG-FUN is the callback that should be called. TQ and ARGS are 315 the transmission-queue and arguments to the callback." 316 ;; FIXME: Make me iterative. 317 (if (not (equal (car (process-command (tq-process tq))) 318 pdf-info-epdfinfo-program)) 319 (apply orig-fun tq args) 320 (let ((buffer (tq-buffer tq)) 321 done) 322 (when (buffer-live-p buffer) 323 (set-buffer buffer) 324 (while (and (not done) 325 (> (buffer-size) 0)) 326 (setq done t) 327 (if (tq-queue-empty tq) 328 (let ((buf (generate-new-buffer "*spurious*"))) 329 (copy-to-buffer buf (point-min) (point-max)) 330 (delete-region (point-min) (point)) 331 (pop-to-buffer buf nil) 332 (error "Spurious communication from process %s, see buffer %s" 333 (process-name (tq-process tq)) 334 (buffer-name buf))) 335 (goto-char (point-min)) 336 (when (re-search-forward (tq-queue-head-regexp tq) nil t) 337 (setq done nil) 338 (let ((answer (buffer-substring (point-min) (point))) 339 (fn (tq-queue-head-fn tq)) 340 (closure (tq-queue-head-closure tq))) 341 (delete-region (point-min) (point)) 342 (tq-queue-pop tq) 343 (condition-case-unless-debug err 344 (funcall fn closure answer) 345 (error 346 (message "Error while processing tq callback: %s" 347 (error-message-string err)))))))))))) 348 349 350 ;; * ================================================================== * 351 ;; * Sending and receiving 352 ;; * ================================================================== * 353 354 (defun pdf-info-query (cmd &rest args) 355 "Query the server using CMD and ARGS." 356 (pdf-info-process-assert-running) 357 (unless (symbolp cmd) 358 (setq cmd (intern cmd))) 359 (let* ((query (concat (mapconcat #'pdf-info-query--escape 360 (cons cmd args) ":") 361 "\n")) 362 (callback 363 (lambda (closure response) 364 (cl-destructuring-bind (status &rest result) 365 (pdf-info-query--parse-response cmd response) 366 (pdf-info-query--log response) 367 (let* (pdf-info-asynchronous) 368 (if (functionp closure) 369 (funcall closure status result) 370 (apply (car closure) status result (cdr closure))))))) 371 response status done 372 (closure (or pdf-info-asynchronous 373 (lambda (s r) 374 (setq status s response r done t))))) 375 (pdf-info-query--log query t) 376 (tq-enqueue 377 pdf-info--queue query "^\\.\n" closure callback) 378 (unless pdf-info-asynchronous 379 (while (and (not done) 380 (eq (process-status (pdf-info-process)) 381 'run)) 382 (accept-process-output (pdf-info-process) 0.01)) 383 (when (and (not done) 384 (not (eq (process-status (pdf-info-process)) 385 'run)) 386 (not (eq cmd 'quit))) 387 (error "The epdfinfo server quit unexpectedly")) 388 (cond 389 ((null status) response) 390 ((eq status 'error) 391 (error "epdfinfo: %s" response)) 392 ((eq status 'interrupted) 393 (error "epdfinfo: Command was interrupted")) 394 (t 395 (error "Internal error: invalid response status")))))) 396 397 (defun pdf-info-interrupt () 398 "FIXME: This command does currently nothing." 399 (when (and (processp (pdf-info-process)) 400 (eq (process-status (pdf-info-process)) 401 'run)) 402 (signal-process (pdf-info-process) 'SIGUSR1))) 403 404 (defun pdf-info-query--escape (arg) 405 "Escape ARG for transmission to the server." 406 (if (null arg) 407 (string) 408 (with-current-buffer (get-buffer-create " *pdf-info-query--escape*") 409 (erase-buffer) 410 (insert (format "%s" arg)) 411 (goto-char 1) 412 (while (not (eobp)) 413 (cond 414 ((memq (char-after) '(?\\ ?:)) 415 (insert ?\\)) 416 ((eq (char-after) ?\n) 417 (delete-char 1) 418 (insert ?\\ ?n) 419 (backward-char))) 420 (forward-char)) 421 (buffer-substring-no-properties 1 (point-max))))) 422 423 (defmacro pdf-info-query--read-record () 424 "Read a single record of the response in current buffer." 425 `(let (records done (beg (point))) 426 (while (not done) 427 (cl-case (char-after) 428 (?\\ 429 (delete-char 1) 430 (if (not (eq (char-after) ?n)) 431 (forward-char) 432 (delete-char 1) 433 (insert ?\n))) 434 ((?: ?\n) 435 (push (buffer-substring-no-properties 436 beg (point)) records) 437 (forward-char) 438 (setq beg (point) 439 done (bolp))) 440 (t (forward-char)))) 441 (nreverse records))) 442 443 (defun pdf-info-query--parse-response (cmd response) 444 "Parse one epdfinfo RESPONSE to CMD. 445 446 Returns a cons \(STATUS . RESULT\), where STATUS is one of nil 447 for a regular response, error for an error \(RESULT contains the 448 error message\) or interrupted, i.e. the command was 449 interrupted." 450 (with-current-buffer 451 (get-buffer-create " *pdf-info-query--parse-response*") 452 (erase-buffer) 453 (insert response) 454 (goto-char 1) 455 (cond 456 ((looking-at "ERR\n") 457 (forward-line) 458 (cons 'error (buffer-substring-no-properties 459 (point) 460 (progn 461 (re-search-forward "^\\.\n") 462 (1- (match-beginning 0)))))) 463 ((looking-at "OK\n") 464 (let (result) 465 (forward-line) 466 (while (not (and (= (char-after) ?.) 467 (= (char-after (1+ (point))) ?\n))) 468 (push (pdf-info-query--read-record) result)) 469 (cons nil (pdf-info-query--transform-response 470 cmd (nreverse result))))) 471 ((looking-at "INT\n") 472 (cons 'interrupted nil)) 473 (t 474 (cons 'error "Invalid server response"))))) 475 476 (defun pdf-info-query--transform-response (cmd response) 477 "Transform a RESPONSE to CMD into a Lisp form." 478 (cl-case cmd 479 (open nil) 480 (close (equal "1" (caar response))) 481 (number-of-pages (string-to-number (caar response))) 482 (charlayout 483 (mapcar (lambda (elt) 484 (cl-assert (= 1 (length (cadr elt))) t) 485 `(,(aref (cadr elt) 0) 486 ,(mapcar #'string-to-number 487 (split-string (car elt) " " t)))) 488 response)) 489 (regexp-flags 490 (mapcar (lambda (elt) 491 (cons (intern (car elt)) 492 (string-to-number (cadr elt)))) 493 response)) 494 ((search-string search-regexp) 495 (mapcar 496 (lambda (r) 497 `((page . ,(string-to-number (nth 0 r))) 498 (text . ,(let (case-fold-search) 499 (pdf-util-highlight-regexp-in-string 500 (regexp-quote (nth 1 r)) (nth 2 r)))) 501 (edges . ,(mapcar (lambda (m) 502 (mapcar #'string-to-number 503 (split-string m " " t))) 504 (cddr (cdr r)))))) 505 response)) 506 (outline 507 (mapcar (lambda (r) 508 `((depth . ,(string-to-number (pop r))) 509 ,@(pdf-info-query--transform-action r))) 510 response)) 511 (pagelinks 512 (mapcar (lambda (r) 513 `((edges . 514 ,(mapcar #'string-to-number ;area 515 (split-string (pop r) " " t))) 516 ,@(pdf-info-query--transform-action r))) 517 response)) 518 (metadata 519 (let ((md (car response))) 520 (if (= 1 (length md)) 521 (list (cons 'title (car md))) 522 (list 523 (cons 'title (pop md)) 524 (cons 'author (pop md)) 525 (cons 'subject (pop md)) 526 (cons 'keywords-raw (car md)) 527 (cons 'keywords (split-string (pop md) "[\t\n ]*,[\t\n ]*" t)) 528 (cons 'creator (pop md)) 529 (cons 'producer (pop md)) 530 (cons 'format (pop md)) 531 (cons 'created (pop md)) 532 (cons 'modified (pop md)))))) 533 (gettext 534 (or (caar response) "")) 535 (getselection 536 (mapcar (lambda (line) 537 (mapcar #'string-to-number 538 (split-string (car line) " " t))) 539 response)) 540 (features (mapcar #'intern (car response))) 541 (pagesize 542 (setq response (car response)) 543 (cons (round (string-to-number (car response))) 544 (round (string-to-number (cadr response))))) 545 ((getannot editannot addannot) 546 (pdf-info-query--transform-annotation (car response))) 547 (getannots 548 (mapcar #'pdf-info-query--transform-annotation response)) 549 (getattachments 550 (mapcar #'pdf-info-query--transform-attachment response)) 551 ((getattachment-from-annot) 552 (pdf-info-query--transform-attachment (car response))) 553 (boundingbox 554 (mapcar #'string-to-number (car response))) 555 (synctex-forward-search 556 (let ((list (mapcar #'string-to-number (car response)))) 557 `((page . ,(car list)) 558 (edges . ,(cdr list))))) 559 (synctex-backward-search 560 `((filename . ,(caar response)) 561 (line . ,(string-to-number (cadr (car response)))) 562 (column . ,(string-to-number (cadr (cdar response)))))) 563 (delannot nil) 564 ((save) (caar response)) 565 ((renderpage renderpage-text-regions renderpage-highlight) 566 (pdf-util-munch-file (caar response))) 567 ((setoptions getoptions) 568 (let (options) 569 (dolist (key-value response) 570 (let ((key (intern (car key-value))) 571 (value (cadr key-value))) 572 (cl-case key 573 ((:render/printed) 574 (setq value (equal value "1"))) 575 ((:render/usecolors) 576 (setq value (ignore-errors 577 (let ((int-val (cl-parse-integer value))) 578 (if (> 3 int-val 0) 579 int-val 580 0)))))) 581 (push value options) 582 (push key options))) 583 options)) 584 (pagelabels (mapcar #'car response)) 585 (ping (caar response)) 586 (t response))) 587 588 589 (defun pdf-info-query--transform-action (action) 590 "Transform ACTION response into a Lisp form." 591 (let ((type (intern (pop action)))) 592 `((type . ,type) 593 (title . ,(pop action)) 594 ,@(cl-case type 595 (goto-dest 596 `((page . ,(string-to-number (pop action))) 597 (top . ,(and (> (length (car action)) 0) 598 (string-to-number (pop action)))))) 599 (goto-remote 600 `((filename . ,(pop action)) 601 (page . ,(string-to-number (pop action))) 602 (top . ,(and (> (length (car action)) 0) 603 (string-to-number (pop action)))))) 604 (t `((uri . ,(pop action)))))))) 605 606 (defun pdf-info-query--transform-annotation (a) 607 (cl-labels ((not-empty (s) 608 (if (not (equal s "")) s))) 609 (let (a1 a2 a3) 610 (cl-destructuring-bind (page edges type id flags color contents modified &rest rest) 611 a 612 (setq a1 `((page . ,(string-to-number page)) 613 (edges . ,(mapcar #'string-to-number 614 (split-string edges " " t))) 615 (type . ,(intern type)) 616 (id . ,(intern id)) 617 (flags . ,(string-to-number flags)) 618 (color . ,(not-empty color)) 619 (contents . ,contents) 620 (modified . ,(pdf-info-parse-pdf-date modified)))) 621 (when rest 622 (cl-destructuring-bind (label subject opacity popup-edges popup-is-open created 623 &rest rest) 624 rest 625 (setq a2 626 `((label . ,(not-empty label)) 627 (subject . ,(not-empty subject)) 628 (opacity . ,(let ((o (not-empty opacity))) 629 (and o (string-to-number o)))) 630 (popup-edges . ,(let ((p (not-empty popup-edges))) 631 (when p 632 (mapcar #'string-to-number 633 (split-string p " " t))))) 634 (popup-is-open . ,(equal popup-is-open "1")) 635 (created . ,(pdf-info-parse-pdf-date (not-empty created))))) 636 (cond 637 ((eq (cdr (assoc 'type a1)) 'text) 638 (cl-destructuring-bind (icon state is-open) 639 rest 640 (setq a3 641 `((icon . ,(not-empty icon)) 642 (state . ,(not-empty state)) 643 (is-open . ,(equal is-open "1")))))) 644 ((memq (cdr (assoc 'type a1)) 645 '(squiggly highlight underline strike-out)) 646 (setq a3 `((markup-edges 647 . ,(mapcar (lambda (r) 648 (mapcar #'string-to-number 649 (split-string r " " t))) 650 rest))))))))) 651 (append a1 a2 a3)))) 652 653 (defun pdf-info-query--transform-attachment (a) 654 (cl-labels ((not-empty (s) 655 (if (not (equal s "")) s))) 656 (cl-destructuring-bind (id filename description size modified 657 created checksum file) 658 a 659 `((id . ,(intern id)) 660 (filename . ,(not-empty filename)) 661 (description . ,(not-empty description)) 662 (size . ,(let ((n (string-to-number size))) 663 (and (>= n 0) n))) 664 (modified . ,(not-empty modified)) 665 (created . ,(not-empty created)) 666 (checksum . ,(not-empty checksum)) 667 (file . ,(not-empty file)))))) 668 669 (defun pdf-info-query--log (string &optional query-p) 670 "Log STRING as query/response, depending on QUERY-P. 671 672 This is a no-op, if `pdf-info-log' is nil." 673 (when pdf-info-log 674 (with-current-buffer (get-buffer-create "*pdf-info-log*") 675 (buffer-disable-undo) 676 (let ((pos (point-max)) 677 (window (get-buffer-window))) 678 (save-excursion 679 (goto-char (point-max)) 680 (unless (bolp) 681 (insert ?\n)) 682 (insert 683 (propertize 684 (format-time-string "%H:%M:%S ") 685 'face 686 (if query-p 687 'font-lock-keyword-face 688 'font-lock-function-name-face)) 689 (if (and (numberp pdf-info-log-entry-max) 690 (> (length string) 691 pdf-info-log-entry-max)) 692 (concat (substring string 0 pdf-info-log-entry-max) 693 "...[truncated]\n") 694 string))) 695 (when (and (window-live-p window) 696 (= pos (window-point window))) 697 (set-window-point window (point-max))))))) 698 699 700 701 ;; * ================================================================== * 702 ;; * Utility functions 703 ;; * ================================================================== * 704 705 (defvar doc-view-buffer-file-name) 706 (defvar doc-view--buffer-file-name) 707 708 (defun pdf-info--normalize-file-or-buffer (file-or-buffer) 709 "Return the PDF file corresponding to FILE-OR-BUFFER. 710 711 FILE-OR-BUFFER may be nil, a PDF buffer, the name of a PDF buffer 712 or a PDF file." 713 (unless file-or-buffer 714 (setq file-or-buffer 715 (current-buffer))) 716 (when (bufferp file-or-buffer) 717 (unless (buffer-live-p file-or-buffer) 718 (error "Buffer is not live :%s" file-or-buffer)) 719 (with-current-buffer file-or-buffer 720 (unless (setq file-or-buffer 721 (cl-case major-mode 722 (doc-view-mode 723 (cond ((boundp 'doc-view-buffer-file-name) 724 doc-view-buffer-file-name) 725 ((boundp 'doc-view--buffer-file-name) 726 doc-view--buffer-file-name))) 727 (pdf-view-mode (pdf-view-buffer-file-name)) 728 (t (buffer-file-name)))) 729 (error "Buffer is not associated with any file :%s" (buffer-name))))) 730 (unless (stringp file-or-buffer) 731 (signal 'wrong-type-argument 732 (list 'stringp 'bufferp 'null file-or-buffer))) 733 ;; is file 734 (when (file-remote-p file-or-buffer) 735 (error "Processing remote files not supported :%s" 736 file-or-buffer)) 737 ;; (unless (file-readable-p file-or-buffer) 738 ;; (error "File not readable :%s" file-or-buffer)) 739 (expand-file-name file-or-buffer)) 740 741 (defun pdf-info-valid-page-spec-p (pages) 742 "The type predicate for a valid page-spec." 743 (not (not (ignore-errors (pdf-info-normalize-page-range pages))))) 744 745 (defun pdf-info-normalize-page-range (pages) 746 "Normalize PAGES for sending to the server. 747 748 PAGES may be a single page number, a cons \(FIRST . LAST\), or 749 nil, which stands for all pages. 750 751 The result is a cons \(FIRST . LAST\), where LAST may be 0 752 representing the final page." 753 (cond 754 ((natnump pages) 755 (cons pages pages)) 756 ((null pages) 757 (cons 1 0)) 758 ((and (natnump (car pages)) 759 (natnump (cdr pages))) 760 pages) 761 (t 762 (signal 'wrong-type-argument 763 (list 'pdf-info-valid-page-spec-p pages))))) 764 765 (defun pdf-info-parse-pdf-date (date) 766 (when (and date 767 (string-match pdf-info-pdf-date-regexp date)) 768 (let ((year (match-string 1 date)) 769 (month (match-string 2 date)) 770 (day (match-string 3 date)) 771 (hour (match-string 4 date)) 772 (min (match-string 5 date)) 773 (sec (match-string 6 date)) 774 (ut-char (match-string 7 date)) 775 (ut-hour (match-string 8 date)) 776 (ut-min (match-string 9 date)) 777 (tz 0)) 778 (when (or (equal ut-char "+") 779 (equal ut-char "-")) 780 (when ut-hour 781 (setq tz (* 3600 (string-to-number ut-hour)))) 782 (when ut-min 783 (setq tz (+ tz (* 60 (string-to-number ut-min))))) 784 (when (equal ut-char "-") 785 (setq tz (- tz)))) 786 (encode-time 787 (if sec (string-to-number sec) 0) 788 (if min (string-to-number min) 0) 789 (if hour (string-to-number hour) 0) 790 (if day (string-to-number day) 1) 791 (if month (string-to-number month) 1) 792 (string-to-number year) 793 tz)))) 794 795 (defmacro pdf-info-compose-queries (let-forms &rest body) 796 "Let-bind each VAR to QUERIES results and evaluate BODY. 797 798 All queries in each QUERIES form are run by the server in the 799 order they appear and the results collected in a list, which is 800 bound to VAR. Then BODY is evaluated and its value becomes the 801 final result of all queries, unless at least one of them provoked 802 an error. In this case BODY is ignored and the error is the 803 result. 804 805 This macro handles synchronous and asynchronous calls, 806 i.e. `pdf-info-asynchronous' is non-nil, transparently. 807 808 \(FN \(\(VAR QUERIES\)...\) BODY\)" 809 (declare (indent 1) 810 (debug ((&rest &or 811 (symbolp &optional form) 812 symbolp) 813 body))) 814 (unless (cl-every (lambda (form) 815 (when (symbolp form) 816 (setq form (list form))) 817 (and (consp form) 818 (symbolp (car form)) 819 (listp (cdr form)))) 820 let-forms) 821 (error "Invalid let-form: %s" let-forms)) 822 823 (setq let-forms (mapcar (lambda (form) 824 (if (symbolp form) 825 (list form) 826 form)) 827 let-forms)) 828 (let* ((status (make-symbol "status")) 829 (response (make-symbol "response")) 830 (first-error (make-symbol "first-error")) 831 (done (make-symbol "done")) 832 (callback (make-symbol "callback")) 833 (results (make-symbol "results")) 834 (push-fn (make-symbol "push-fn")) 835 (terminal-fn (make-symbol "terminal-fn")) 836 (buffer (make-symbol "buffer"))) 837 `(let* (,status 838 ,response ,first-error ,done 839 (,buffer (current-buffer)) 840 (,callback pdf-info-asynchronous) 841 ;; Ensure a new alist on every invocation. 842 (,results (mapcar 'copy-sequence 843 ',(cl-mapcar (lambda (form) 844 (list (car form))) 845 let-forms))) 846 (,push-fn (lambda (status result var) 847 ;; Store result in alist RESULTS under key 848 ;; VAR. 849 (if status 850 (unless ,first-error 851 (setq ,first-error result)) 852 (let ((elt (assq var ,results))) 853 (setcdr elt (append (cdr elt) 854 (list result))))))) 855 (,terminal-fn 856 (lambda (&rest _) 857 ;; Let-bind responses corresponding to their variables, 858 ;; i.e. keys in alist RESULTS. 859 (let (,@(mapcar (lambda (var) 860 `(,var (cdr (assq ',var ,results)))) 861 (mapcar #'car let-forms))) 862 (setq ,status (not (not ,first-error)) 863 ,response (or ,first-error 864 (with-current-buffer ,buffer 865 ,@body)) 866 ,done t) 867 ;; Maybe invoke the CALLBACK (which was bound to 868 ;; pdf-info-asynchronous). 869 (when ,callback 870 (if (functionp ,callback) 871 (funcall ,callback ,status ,response) 872 (apply (car ,callback) 873 ,status ,response (cdr ,callback)))))))) 874 ;; Wrap each query in an asynchronous call, with its VAR as 875 ;; callback argument, so the PUSH-FN can put it in the alist 876 ;; RESULTS. 877 ,@(mapcar (lambda (form) 878 (list 'let (list 879 (list 'pdf-info-asynchronous 880 (list 'list push-fn (list 'quote (car form))))) 881 (cadr form))) 882 let-forms) 883 ;; Request a no-op, just so we know that we are finished. 884 (let ((pdf-info-asynchronous ,terminal-fn)) 885 (pdf-info-ping)) 886 ;; CALLBACK is the original value of pdf-info-asynchronous. If 887 ;; nil, this is a synchronous query. 888 (unless ,callback 889 (while (and (not ,done) 890 (eq (process-status (pdf-info-process)) 891 'run)) 892 (accept-process-output (pdf-info-process) 0.01)) 893 (when (and (not ,done) 894 (not (eq (process-status (pdf-info-process)) 895 'run))) 896 (error "The epdfinfo server quit unexpectedly")) 897 (when ,status 898 (error "epdfinfo: %s" ,response)) 899 ,response)))) 900 901 902 ;; * ================================================================== * 903 ;; * Buffer local server instances 904 ;; * ================================================================== * 905 906 (put 'pdf-info--queue 'permanent-local t) 907 908 (defun pdf-info-make-local-server (&optional buffer force-restart-p) 909 "Create a server instance local to BUFFER. 910 911 Does nothing if BUFFER already has a local instance. Unless 912 FORCE-RESTART-P is non-nil, then quit a potential process and 913 restart it." 914 (unless buffer 915 (setq buffer (current-buffer))) 916 (with-current-buffer buffer 917 (unless (and 918 (not force-restart-p) 919 (local-variable-p 'pdf-info--queue) 920 (processp (pdf-info-process)) 921 (eq (process-status (pdf-info-process)) 922 'run)) 923 (when (and (local-variable-p 'pdf-info--queue) 924 (processp (pdf-info-process))) 925 (tq-close pdf-info--queue)) 926 (set (make-local-variable 'pdf-info--queue) nil) 927 (pdf-info-process-assert-running t) 928 (add-hook 'kill-buffer-hook #'pdf-info-kill-local-server nil t) 929 pdf-info--queue))) 930 931 (defun pdf-info-kill-local-server (&optional buffer) 932 "Kill the local server in BUFFER. 933 934 A No-op, if BUFFER has not running server instance." 935 (save-current-buffer 936 (when buffer 937 (set-buffer buffer)) 938 (when (local-variable-p 'pdf-info--queue) 939 (pdf-info-kill) 940 (kill-local-variable 'pdf-info--queue) 941 t))) 942 943 (defun pdf-info-local-server-p (&optional buffer) 944 "Return non-nil, if BUFFER has a running server instance." 945 (unless buffer 946 (setq buffer (current-buffer))) 947 (setq buffer (get-buffer buffer)) 948 (and (buffer-live-p buffer) 949 (local-variable-p 'pdf-info--queue buffer))) 950 951 (defun pdf-info-local-batch-query (producer-fn 952 consumer-fn 953 sentinel-fn 954 args) 955 "Process a set of queries asynchronously in a local instance." 956 (unless (pdf-info-local-server-p) 957 (error "Create a local server first")) 958 (let* ((buffer (current-buffer)) 959 (producer-symbol (make-symbol "producer")) 960 (consumer-symbol (make-symbol "consumer")) 961 (producer 962 (lambda (args) 963 (if (null args) 964 (funcall sentinel-fn 'finished buffer) 965 (let ((pdf-info-asynchronous 966 (apply-partially 967 (symbol-function consumer-symbol) 968 args))) 969 (cond 970 ((pdf-info-local-server-p buffer) 971 (with-current-buffer buffer 972 (apply producer-fn (car args)))) 973 (t 974 (funcall sentinel-fn 'error buffer))))))) 975 (consumer (lambda (args status result) 976 (if (not (pdf-info-local-server-p buffer)) 977 (funcall sentinel-fn 'error buffer) 978 (with-current-buffer buffer 979 (apply consumer-fn status result (car args))) 980 (funcall (symbol-function producer-symbol) 981 (cdr args)))))) 982 (fset producer-symbol producer) 983 (fset consumer-symbol consumer) 984 (funcall producer args))) 985 986 987 988 ;; * ================================================================== * 989 ;; * High level interface 990 ;; * ================================================================== * 991 992 (defvar pdf-info-features nil) 993 994 (defun pdf-info-features () 995 "Return a list of symbols describing compile-time features." 996 (or pdf-info-features 997 (setq pdf-info-features 998 (let (pdf-info-asynchronous) 999 (pdf-info-query 'features))))) 1000 1001 (defun pdf-info-writable-annotations-p () 1002 (not (null (memq 'writable-annotations (pdf-info-features))))) 1003 1004 (defun pdf-info-markup-annotations-p () 1005 (not (null (memq 'markup-annotations (pdf-info-features))))) 1006 1007 (defmacro pdf-info-assert-writable-annotations () 1008 `(unless (memq 'writable-annotations (pdf-info-features)) 1009 (error "Writing annotations is not supported by this version of epdfinfo"))) 1010 1011 (defmacro pdf-info-assert-markup-annotations () 1012 `(unless (memq 'markup-annotations (pdf-info-features)) 1013 (error "Creating markup annotations is not supported by this version of epdfinfo"))) 1014 1015 (defun pdf-info-creatable-annotation-types () 1016 (let ((features (pdf-info-features))) 1017 (cond 1018 ((not (memq 'writable-annotations features)) nil) 1019 ((memq 'markup-annotations features) 1020 (list 'text 'squiggly 'underline 'strike-out 'highlight)) 1021 (t (list 'text))))) 1022 1023 (defun pdf-info-open (&optional file-or-buffer password) 1024 "Open the document FILE-OR-BUFFER using PASSWORD. 1025 1026 Generally, documents are opened and closed automatically on 1027 demand, so this function is rarely needed, unless a PASSWORD is 1028 set on the document. 1029 1030 Manually opened documents are never closed automatically." 1031 1032 (pdf-info-query 1033 'open (pdf-info--normalize-file-or-buffer file-or-buffer) 1034 password)) 1035 1036 (defun pdf-info-close (&optional file-or-buffer) 1037 "Close the document FILE-OR-BUFFER. 1038 1039 Returns t, if the document was actually open, otherwise nil. 1040 This command is rarely needed, see also `pdf-info-open'." 1041 (let* ((pdf (pdf-info--normalize-file-or-buffer file-or-buffer)) 1042 (buffer (cond 1043 ((not file-or-buffer) (current-buffer)) 1044 ((bufferp file-or-buffer) file-or-buffer) 1045 ((stringp file-or-buffer) 1046 (find-buffer-visiting file-or-buffer))))) 1047 (prog1 1048 (pdf-info-query 'close pdf) 1049 (if (buffer-live-p buffer) 1050 (with-current-buffer buffer 1051 (run-hooks 'pdf-info-close-document-hook)) 1052 (with-temp-buffer 1053 (run-hooks 'pdf-info-close-document-hook)))))) 1054 1055 (defun pdf-info-encrypted-p (&optional file-or-buffer) 1056 "Return non-nil if FILE-OR-BUFFER requires a password. 1057 1058 Note: This function returns nil, if the document is encrypted, 1059 but was already opened (presumably using a password)." 1060 1061 (condition-case err 1062 (pdf-info-open 1063 (pdf-info--normalize-file-or-buffer file-or-buffer)) 1064 (error (or (string-match-p 1065 ":Document is encrypted\\'" (cadr err)) 1066 (signal (car err) (cdr err)))))) 1067 1068 (defun pdf-info-metadata (&optional file-or-buffer) 1069 "Extract the metadata from the document FILE-OR-BUFFER. 1070 1071 This returns an alist containing some information about the 1072 document." 1073 (pdf-info-query 1074 'metadata 1075 (pdf-info--normalize-file-or-buffer file-or-buffer))) 1076 1077 (defun pdf-info-search-string (string &optional pages file-or-buffer) 1078 "Search for STRING in PAGES of document FILE-OR-BUFFER. 1079 1080 See `pdf-info-normalize-page-range' for valid PAGES formats. 1081 1082 This function returns a list of matches. Each item is an alist 1083 containing keys PAGE, TEXT and EDGES, where PAGE and TEXT are the 1084 matched page resp. line. EDGES is a list containing a single 1085 edges element \(LEFT TOP RIGHT BOTTOM\). This is for consistency 1086 with `pdf-info-search-regexp', which may return matches with 1087 multiple edges. 1088 1089 The TEXT contains `match' face properties on the matched parts. 1090 1091 Search is case-insensitive, unless `case-fold-search' is nil and 1092 searching case-sensitive is supported by the server." 1093 1094 (let ((pages (pdf-info-normalize-page-range pages))) 1095 (pdf-info-query 1096 'search-string 1097 (pdf-info--normalize-file-or-buffer file-or-buffer) 1098 (car pages) 1099 (cdr pages) 1100 string 1101 (if case-fold-search 1 0)))) 1102 1103 (defvar pdf-info-regexp-compile-flags nil 1104 "PCRE compile flags. 1105 1106 Don't use this, but the equally named function.") 1107 1108 (defvar pdf-info-regexp-match-flags nil 1109 "PCRE match flags. 1110 1111 Don't use this, but the equally named function.") 1112 1113 (defun pdf-info-regexp-compile-flags () 1114 (or pdf-info-regexp-compile-flags 1115 (let* (pdf-info-asynchronous 1116 (flags (pdf-info-query 'regexp-flags)) 1117 (match (cl-remove-if-not 1118 (lambda (flag) 1119 (string-match-p 1120 "\\`match-" (symbol-name (car flag)))) 1121 flags)) 1122 (compile (cl-set-difference flags match))) 1123 (setq pdf-info-regexp-compile-flags compile 1124 pdf-info-regexp-match-flags match) 1125 pdf-info-regexp-compile-flags))) 1126 1127 (defun pdf-info-regexp-match-flags () 1128 (or pdf-info-regexp-match-flags 1129 (progn 1130 (pdf-info-regexp-compile-flags) 1131 pdf-info-regexp-match-flags))) 1132 1133 (defvar pdf-info-regexp-flags '(multiline) 1134 "Compile- and match-flags for the PCRE engine. 1135 1136 This is a list of symbols denoting compile- and match-flags when 1137 searching for regular expressions. 1138 1139 You should not change this directly, but rather `let'-bind it 1140 around a call to `pdf-info-search-regexp'. 1141 1142 Valid compile-flags are: 1143 1144 newline-crlf, newline-lf, newline-cr, dupnames, optimize, 1145 no-auto-capture, raw, ungreedy, dollar-endonly, anchored, 1146 extended, dotall, multiline and caseless. 1147 1148 Note that the last one, caseless, is handled special, as it is 1149 always added if `case-fold-search' is non-nil. 1150 1151 And valid match-flags: 1152 1153 match-anchored, match-notbol, match-noteol, match-notempty, 1154 match-partial, match-newline-cr, match-newline-lf, 1155 match-newline-crlf and match-newline-any. 1156 1157 See the glib documentation at url 1158 `https://developer.gnome.org/glib/stable/glib-Perl-compatible-regular-expressions.html'.") 1159 1160 (defun pdf-info-search-regexp (pcre &optional pages 1161 no-error 1162 file-or-buffer) 1163 "Search for a PCRE on PAGES of document FILE-OR-BUFFER. 1164 1165 See `pdf-info-normalize-page-range' for valid PAGES formats and 1166 `pdf-info-search-string' for its return value. 1167 1168 Uses the flags in `pdf-info-regexp-flags', which see. If 1169 `case-fold-search' is non-nil, the caseless flag is added. 1170 1171 If NO-ERROR is non-nil, catch errors due to invalid regexps and 1172 return nil. If it is the symbol `invalid-regexp', then re-signal 1173 this kind of error as a `invalid-regexp' error." 1174 1175 (cl-labels ((orflags (flags alist) 1176 (cl-reduce 1177 (lambda (v flag) 1178 (let ((n 1179 (cdr (assq flag alist)))) 1180 (if n (logior n v) v))) 1181 (cons 0 flags)))) 1182 (let ((pages (pdf-info-normalize-page-range pages))) 1183 (condition-case err 1184 (pdf-info-query 1185 'search-regexp 1186 (pdf-info--normalize-file-or-buffer file-or-buffer) 1187 (car pages) 1188 (cdr pages) 1189 pcre 1190 (orflags `(,(if case-fold-search 1191 'caseless) 1192 ,@pdf-info-regexp-flags) 1193 (pdf-info-regexp-compile-flags)) 1194 (orflags pdf-info-regexp-flags 1195 (pdf-info-regexp-match-flags))) 1196 (error 1197 (let ((re 1198 (concat "\\`epdfinfo: *Invalid *regexp: *" 1199 ;; glib error 1200 "\\(?:Error while compiling regular expression" 1201 " *%s *\\)?\\(.*\\)"))) 1202 (if (or (null no-error) 1203 (not (string-match 1204 (format re (regexp-quote pcre)) 1205 (cadr err)))) 1206 (signal (car err) (cdr err)) 1207 (if (eq no-error 'invalid-regexp) 1208 (signal 'invalid-regexp 1209 (list (match-string 1 (cadr err)))))))))))) 1210 1211 (defun pdf-info-pagelinks (page &optional file-or-buffer) 1212 "Return a list of links on PAGE in document FILE-OR-BUFFER. 1213 1214 This function returns a list of alists with the following keys. 1215 EDGES represents the relative bounding-box of the link , TYPE is 1216 the type of the action, TITLE is a, possibly empty, name for this 1217 action. 1218 1219 TYPE may be one of 1220 1221 goto-dest -- This is a internal link to some page. Each element 1222 contains additional keys PAGE and TOP, where PAGE is the page of 1223 the link and TOP its vertical position. 1224 1225 goto-remote -- This a external link to some document. Same as 1226 goto-dest, with an additional FILENAME of the external PDF. 1227 1228 uri -- A link in form of some URI. Alist contains additional key 1229 URI. 1230 1231 In the first two cases, PAGE may be 0 and TOP nil, which means 1232 these data is unspecified." 1233 (cl-check-type page natnum) 1234 (pdf-info-query 1235 'pagelinks 1236 (pdf-info--normalize-file-or-buffer file-or-buffer) 1237 page)) 1238 1239 (defun pdf-info-number-of-pages (&optional file-or-buffer) 1240 "Return the number of pages in document FILE-OR-BUFFER." 1241 (pdf-info-query 'number-of-pages 1242 (pdf-info--normalize-file-or-buffer 1243 file-or-buffer))) 1244 1245 (defun pdf-info-outline (&optional file-or-buffer) 1246 "Return the PDF outline of document FILE-OR-BUFFER. 1247 1248 This function returns a list of alists like `pdf-info-pagelinks'. 1249 Additionally every alist has a DEPTH (>= 1) entry with the depth 1250 of this element in the tree." 1251 1252 (pdf-info-query 1253 'outline 1254 (pdf-info--normalize-file-or-buffer file-or-buffer))) 1255 1256 (defun pdf-info--selection-style (selection-style) 1257 "SELECTION-STYLE is the smallest unit of the selected region. 1258 1259 It must be one of glyph, word or line. If it is none of these, we 1260 fallback to glyph." 1261 (cl-case selection-style 1262 (glyph 0) 1263 (word 1) 1264 (line 2) 1265 (t 0))) 1266 1267 (defun pdf-info-gettext (page edges &optional selection-style 1268 file-or-buffer) 1269 "Get text on PAGE according to EDGES. 1270 1271 EDGES should contain relative coordinates. The selection may 1272 extend over multiple lines, which works similar to a Emacs 1273 region. SELECTION-STYLE may be one of glyph, word or line and 1274 determines the smallest unit of the selected region. 1275 1276 Return the text contained in the selection." 1277 1278 (pdf-info-query 1279 'gettext 1280 (pdf-info--normalize-file-or-buffer file-or-buffer) 1281 page 1282 (mapconcat #'number-to-string edges " ") 1283 (pdf-info--selection-style selection-style))) 1284 1285 (defun pdf-info-getselection (page edges 1286 &optional selection-style file-or-buffer) 1287 "Return the edges of the selection EDGES on PAGE. 1288 1289 Arguments are the same as for `pdf-info-gettext'. Return a list 1290 of edges corresponding to the text that would be returned by the 1291 aforementioned function, when called with the same arguments." 1292 (pdf-info-query 1293 'getselection 1294 (pdf-info--normalize-file-or-buffer file-or-buffer) 1295 page 1296 (mapconcat #'number-to-string edges " ") 1297 (pdf-info--selection-style selection-style))) 1298 1299 (defun pdf-info-textregions (page &optional file-or-buffer) 1300 "Return a list of edges describing PAGE's text-layout." 1301 (pdf-info-getselection 1302 page '(0 0 1 1) 'glyph file-or-buffer)) 1303 1304 (defun pdf-info-charlayout (page &optional edges-or-pos file-or-buffer) 1305 "Return the layout of characters of PAGE in/at EDGES-OR-POS. 1306 1307 Returns a list of elements \(CHAR . \(LEFT TOP RIGHT BOT\)\) mapping 1308 character to their corresponding relative bounding-boxes. 1309 1310 EDGES-OR-POS may be a region \(LEFT TOP RIGHT BOT\) restricting 1311 the returned value to include only characters fully contained in 1312 it. Or a cons \(LEFT . TOP\) which means to only include the 1313 character at this position. In this case the return value 1314 contains at most one element." 1315 1316 ;; FIXME: Actually returns \(CHAR . LEFT ...\). 1317 1318 (unless edges-or-pos 1319 (setq edges-or-pos '(0 0 1 1))) 1320 (when (numberp (cdr edges-or-pos)) 1321 (setq edges-or-pos (list (car edges-or-pos) 1322 (cdr edges-or-pos) 1323 -1 -1))) 1324 (pdf-info-query 1325 'charlayout 1326 (pdf-info--normalize-file-or-buffer file-or-buffer) 1327 page 1328 (mapconcat #'number-to-string edges-or-pos " "))) 1329 1330 (defun pdf-info-pagesize (page &optional file-or-buffer) 1331 "Return the size of PAGE as a cons \(WIDTH . HEIGHT\) 1332 1333 The size is in PDF points." 1334 (pdf-info-query 1335 'pagesize 1336 (pdf-info--normalize-file-or-buffer file-or-buffer) 1337 page)) 1338 1339 (defun pdf-info-running-p () 1340 "Return non-nil, if the server is running." 1341 (and (processp (pdf-info-process)) 1342 (eq (process-status (pdf-info-process)) 1343 'run))) 1344 1345 (defun pdf-info-quit (&optional timeout) 1346 "Quit the epdfinfo server. 1347 1348 This blocks until all outstanding requests are answered. Unless 1349 TIMEOUT is non-nil, in which case we wait at most TIMEOUT seconds 1350 before killing the server." 1351 (cl-check-type timeout (or null number)) 1352 (when (pdf-info-running-p) 1353 (let ((pdf-info-asynchronous 1354 (if timeout (lambda (&rest _)) 1355 pdf-info-asynchronous))) 1356 (pdf-info-query 'quit) 1357 (when timeout 1358 (setq timeout (+ (float-time) (max 0 timeout))) 1359 (while (and (pdf-info-running-p) 1360 (> timeout (float-time))) 1361 (accept-process-output (pdf-info-process) 0.5 nil t))))) 1362 (when (processp (pdf-info-process)) 1363 (tq-close pdf-info--queue)) 1364 (setq pdf-info--queue nil)) 1365 1366 (defun pdf-info-kill () 1367 "Kill the epdfinfo server. 1368 1369 Immediately delete the server process, see also `pdf-info-quit', 1370 for a more sane way to exit the program." 1371 (when (processp (pdf-info-process)) 1372 (tq-close pdf-info--queue)) 1373 (setq pdf-info--queue nil)) 1374 1375 (defun pdf-info-getannots (&optional pages file-or-buffer) 1376 "Return the annotations on PAGE. 1377 1378 See `pdf-info-normalize-page-range' for valid PAGES formats. 1379 1380 This function returns the annotations for PAGES as a list of 1381 alists. Each element of this list describes one annotation and 1382 contains the following keys. 1383 1384 page - Its page number. 1385 edges - Its area. 1386 type - A symbol describing the annotation's type. 1387 id - A document-wide unique symbol referencing this annotation. 1388 flags - Its flags, binary encoded. 1389 color - Its color in standard Emacs notation. 1390 contents - The text of this annotation. 1391 modified - The last modification date of this annotation. 1392 1393 Additionally, if the annotation is a markup annotation, the 1394 following keys are present. 1395 1396 label - The annotation's label. 1397 subject - The subject addressed. 1398 opacity - The level of relative opacity. 1399 popup-edges - The edges of a associated popup window or nil. 1400 popup-is-open - Whether this window should be displayed open. 1401 created - The date this markup annotation was created. 1402 1403 If the annotation is also a markup text annotation, the alist 1404 contains the following keys. 1405 1406 text-icon - A string describing the purpose of this annotation. 1407 text-state - A string, e.g. accepted or rejected." ;FIXME: Use symbols ? 1408 1409 (let ((pages (pdf-info-normalize-page-range pages))) 1410 (pdf-info-query 1411 'getannots 1412 (pdf-info--normalize-file-or-buffer file-or-buffer) 1413 (car pages) 1414 (cdr pages)))) 1415 1416 (defun pdf-info-getannot (id &optional file-or-buffer) 1417 "Return the annotation for ID. 1418 1419 ID should be a symbol, which was previously returned in a 1420 `pdf-info-getannots' query. Signal an error, if an annotation 1421 with ID is not available. 1422 1423 See `pdf-info-getannots' for the kind of return value of this 1424 function." 1425 (pdf-info-query 1426 'getannot 1427 (pdf-info--normalize-file-or-buffer file-or-buffer) 1428 id)) 1429 1430 (defun pdf-info-addannot (page edges type 1431 &optional selection-style file-or-buffer 1432 &rest markup-edges) 1433 "Add a new annotation to PAGE with EDGES of TYPE. 1434 1435 FIXME: TYPE may be one of `text', `markup-highlight', ... . 1436 FIXME: -1 = 24 1437 See `pdf-info-getannots' for the kind of value of this function 1438 returns." 1439 (pdf-info-assert-writable-annotations) 1440 (when (consp file-or-buffer) 1441 (push file-or-buffer markup-edges) 1442 (setq file-or-buffer nil)) 1443 (apply 1444 #'pdf-info-query 1445 'addannot 1446 (pdf-info--normalize-file-or-buffer file-or-buffer) 1447 page 1448 type 1449 (pdf-info--selection-style selection-style) 1450 (mapconcat 'number-to-string edges " ") 1451 (mapcar (lambda (me) 1452 (mapconcat 'number-to-string me " ")) 1453 markup-edges))) 1454 1455 (defun pdf-info-delannot (id &optional file-or-buffer) 1456 "Delete the annotation with ID in FILE-OR-BUFFER. 1457 1458 ID should be a symbol, which was previously returned in a 1459 `pdf-info-getannots' query. Signal an error, if annotation ID 1460 does not exist." 1461 (pdf-info-assert-writable-annotations) 1462 (pdf-info-query 1463 'delannot 1464 (pdf-info--normalize-file-or-buffer file-or-buffer) 1465 id)) 1466 1467 (defun pdf-info-mvannot (id edges &optional file-or-buffer) 1468 "Move/Resize annotation ID to fit EDGES. 1469 1470 ID should be a symbol, which was previously returned in a 1471 `pdf-info-getannots' query. Signal an error, if annotation ID 1472 does not exist. 1473 1474 EDGES should be a list \(LEFT TOP RIGHT BOT\). RIGHT and/or BOT 1475 may also be negative, which means to keep the width 1476 resp. height." 1477 (pdf-info-editannot id `((edges . ,edges)) file-or-buffer)) 1478 1479 (defun pdf-info-editannot (id modifications &optional file-or-buffer) 1480 "Edit annotation ID, applying MODIFICATIONS. 1481 1482 ID should be a symbol, which was previously returned in a 1483 `pdf-info-getannots' query. 1484 1485 MODIFICATIONS is an alist of properties and their new values. 1486 1487 The server must support modifying annotations for this to work." 1488 1489 (pdf-info-assert-writable-annotations) 1490 (let ((edits 1491 (mapcar 1492 (lambda (elt) 1493 (cl-case (car elt) 1494 (color 1495 (list (car elt) 1496 (pdf-util-hexcolor (cdr elt)))) 1497 (edges 1498 (list (car elt) 1499 (mapconcat 'number-to-string (cdr elt) " "))) 1500 ((popup-is-open is-open) 1501 (list (car elt) (if (cdr elt) 1 0))) 1502 (t 1503 (list (car elt) (cdr elt))))) 1504 modifications))) 1505 (apply #'pdf-info-query 1506 'editannot 1507 (pdf-info--normalize-file-or-buffer file-or-buffer) 1508 id 1509 (apply #'append edits)))) 1510 1511 (defun pdf-info-save (&optional file-or-buffer) 1512 "Save FILE-OR-BUFFER. 1513 1514 This saves the document to a new temporary file, which is 1515 returned and owned by the caller." 1516 (pdf-info-assert-writable-annotations) 1517 (pdf-info-query 1518 'save 1519 (pdf-info--normalize-file-or-buffer file-or-buffer))) 1520 1521 (defun pdf-info-getattachment-from-annot (id &optional do-save file-or-buffer) 1522 "Return the attachment associated with annotation ID. 1523 1524 ID should be a symbol which was previously returned in a 1525 `pdf-info-getannots' query, and referencing an attachment of type 1526 `file', otherwise an error is signaled. 1527 1528 See `pdf-info-getattachments' for the kind of return value of this 1529 function and the meaning of DO-SAVE." 1530 1531 (pdf-info-query 1532 'getattachment-from-annot 1533 (pdf-info--normalize-file-or-buffer file-or-buffer) 1534 id 1535 (if do-save 1 0))) 1536 1537 (defun pdf-info-getattachments (&optional do-save file-or-buffer) 1538 "Return all document level attachments. 1539 1540 If DO-SAVE is non-nil, save the attachments data to a local file, 1541 which is then owned by the caller, see below. 1542 1543 This function returns a list of alists, where every element 1544 contains the following keys. All values, except for id, may be 1545 nil, i.e. not present. 1546 1547 id - A symbol uniquely identifying this attachment. 1548 filename - The filename of this attachment. 1549 description - A description of this attachment. 1550 size - The size in bytes. 1551 modified - The last modification date. 1552 created - The date of creation. 1553 checksum - A MD5 checksum of this attachment's data. 1554 file - The name of a tempfile containing the data (only present if 1555 DO-SAVE is non-nil)." 1556 1557 (pdf-info-query 1558 'getattachments 1559 (pdf-info--normalize-file-or-buffer file-or-buffer) 1560 (if do-save 1 0))) 1561 1562 (defun pdf-info-synctex-forward-search (source &optional line column file-or-buffer) 1563 "Perform a forward search with synctex. 1564 1565 SOURCE should be a LaTeX buffer or the absolute filename of a 1566 corresponding file. LINE and COLUMN represent the position in 1567 the buffer or file. Finally FILE-OR-BUFFER corresponds to the 1568 PDF document. 1569 1570 Returns an alist with entries PAGE and relative EDGES describing 1571 the position in the PDF document corresponding to the SOURCE 1572 location." 1573 1574 (let ((source (if (buffer-live-p (get-buffer source)) 1575 (buffer-file-name (get-buffer source)) 1576 source))) 1577 (pdf-info-query 1578 'synctex-forward-search 1579 (pdf-info--normalize-file-or-buffer file-or-buffer) 1580 source 1581 (or line 1) 1582 (or column 1)))) 1583 1584 (defun pdf-info-synctex-backward-search (page &optional x y file-or-buffer) 1585 "Perform a backward search with synctex. 1586 1587 Find the source location corresponding to the coordinates 1588 \(X . Y\) on PAGE in FILE-OR-BUFFER. 1589 1590 Returns an alist with entries FILENAME, LINE and COLUMN." 1591 1592 1593 (pdf-info-query 1594 'synctex-backward-search 1595 (pdf-info--normalize-file-or-buffer file-or-buffer) 1596 page 1597 (or x 0) 1598 (or y 0))) 1599 1600 (defun pdf-info-renderpage (page width &optional file-or-buffer &rest commands) 1601 "Render PAGE with width WIDTH. 1602 1603 Return the data of the corresponding PNG image." 1604 (when (keywordp file-or-buffer) 1605 (push file-or-buffer commands) 1606 (setq file-or-buffer nil)) 1607 (apply #'pdf-info-query 1608 'renderpage 1609 (pdf-info--normalize-file-or-buffer file-or-buffer) 1610 page 1611 (* width (pdf-util-frame-scale-factor)) 1612 (let (transformed) 1613 (while (cdr commands) 1614 (let ((kw (pop commands)) 1615 (value (pop commands))) 1616 (setq value 1617 (cl-case kw 1618 ((:crop-to :highlight-line :highlight-region :highlight-text) 1619 (mapconcat #'number-to-string value " ")) 1620 ((:foreground :background) 1621 (pdf-util-hexcolor value)) 1622 (:alpha 1623 (number-to-string value)) 1624 (:selection-style 1625 (number-to-string (pdf-info--selection-style value))) 1626 (otherwise value))) 1627 (push kw transformed) 1628 (push value transformed))) 1629 (when commands 1630 (error "Keyword is missing a value: %s" (car commands))) 1631 (nreverse transformed)))) 1632 1633 (defun pdf-info-renderpage-text-regions (page width single-line-p 1634 &optional selection-style file-or-buffer 1635 &rest regions) 1636 "Highlight text on PAGE with width WIDTH using REGIONS. 1637 1638 REGIONS is a list determining foreground and background color and 1639 the regions to render. So each element should look like \(FG BG 1640 \(LEFT TOP RIGHT BOT\) \(LEFT TOP RIGHT BOT\) ... \) . The 1641 rendering is text-aware and is controlled by SELECTION-STYLE. 1642 1643 If SINGLE-LINE-P is non-nil, the edges in REGIONS are each 1644 supposed to be limited to a single line in the document. Setting 1645 this, if applicable, avoids rendering problems. 1646 1647 For the other args see `pdf-info-renderpage'. 1648 1649 Return the data of the corresponding PNG image." 1650 1651 (when (consp file-or-buffer) 1652 (push file-or-buffer regions) 1653 (setq file-or-buffer nil)) 1654 1655 (apply #'pdf-info-renderpage 1656 page width file-or-buffer 1657 (apply #'append 1658 `(:selection-style ,selection-style) 1659 (mapcar (lambda (elt) 1660 `(:foreground ,(pop elt) 1661 :background ,(pop elt) 1662 ,@(cl-mapcan (lambda (edges) 1663 `(,(if single-line-p 1664 :highlight-line 1665 :highlight-text) 1666 ,edges)) 1667 elt))) 1668 regions)))) 1669 1670 (defun pdf-info-renderpage-highlight (page width 1671 &optional file-or-buffer 1672 &rest regions) 1673 "Highlight regions on PAGE with width WIDTH using REGIONS. 1674 1675 REGIONS is a list determining the background color, a alpha value 1676 and the regions to render. So each element should look like \(FILL-COLOR 1677 STROKE-COLOR ALPHA \(LEFT TOP RIGHT BOT\) \(LEFT TOP RIGHT BOT\) ... \) 1678 . 1679 1680 For the other args see `pdf-info-renderpage'. 1681 1682 Return the data of the corresponding PNG image." 1683 1684 (when (consp file-or-buffer) 1685 (push file-or-buffer regions) 1686 (setq file-or-buffer nil)) 1687 1688 (apply #'pdf-info-renderpage 1689 page width file-or-buffer 1690 (apply #'append 1691 (mapcar (lambda (elt) 1692 `(:background ,(pop elt) 1693 :foreground ,(pop elt) 1694 :alpha ,(pop elt) 1695 ,@(cl-mapcan (lambda (edges) 1696 `(:highlight-region ,edges)) 1697 elt))) 1698 regions)))) 1699 1700 (defun pdf-info-boundingbox (page &optional file-or-buffer) 1701 "Return a bounding-box for PAGE. 1702 1703 Returns a list \(LEFT TOP RIGHT BOT\)." 1704 1705 (pdf-info-query 1706 'boundingbox 1707 (pdf-info--normalize-file-or-buffer file-or-buffer) 1708 page)) 1709 1710 (defun pdf-info-getoptions (&optional file-or-buffer) 1711 (pdf-info-query 1712 'getoptions 1713 (pdf-info--normalize-file-or-buffer file-or-buffer))) 1714 1715 (defun pdf-info-setoptions (&optional file-or-buffer &rest options) 1716 (when (symbolp file-or-buffer) 1717 (push file-or-buffer options) 1718 (setq file-or-buffer nil)) 1719 (unless (= (% (length options) 2) 0) 1720 (error "Missing a option value")) 1721 (apply #'pdf-info-query 1722 'setoptions 1723 (pdf-info--normalize-file-or-buffer file-or-buffer) 1724 (let (soptions) 1725 (while options 1726 (let ((key (pop options)) 1727 (value (pop options))) 1728 (unless (and (keywordp key) 1729 (not (eq key :))) 1730 (error "Keyword expected: %s" key)) 1731 (cl-case key 1732 ((:render/foreground :render/background) 1733 (push (pdf-util-hexcolor value) 1734 soptions)) 1735 ((:render/printed) 1736 (push (if value 1 0) soptions)) 1737 ((:render/usecolors) 1738 ;; 0 -> original color 1739 ;; 1 -> recolor document to grayscale mapping black to 1740 ;; :render/foreground and white to :render/background 1741 ;; 2 -> recolor document by inverting the perceived lightness 1742 ;; preserving hue 1743 (push (if (and (integerp value) (> 3 value 0)) value 0) 1744 soptions)) 1745 (t (push value soptions))) 1746 (push key soptions))) 1747 soptions))) 1748 1749 1750 1751 (defun pdf-info-pagelabels (&optional file-or-buffer) 1752 "Return a list of pagelabels. 1753 1754 Returns a list of strings corresponding to the labels of the 1755 pages in FILE-OR-BUFFER." 1756 1757 (pdf-info-query 1758 'pagelabels 1759 (pdf-info--normalize-file-or-buffer file-or-buffer))) 1760 1761 (defun pdf-info-ping (&optional message) 1762 "Ping the server using MESSAGE. 1763 1764 Returns MESSAGE, which defaults to \"pong\"." 1765 (pdf-info-query 'ping (or message "pong"))) 1766 1767 (provide 'pdf-info) 1768 1769 ;;; pdf-info.el ends here