org-feed.el (26658B)
1 ;;; org-feed.el --- Add RSS feed items to Org files -*- lexical-binding: t; -*- 2 ;; 3 ;; Copyright (C) 2009-2024 Free Software Foundation, Inc. 4 ;; 5 ;; Author: Carsten Dominik <carsten.dominik@gmail.com> 6 ;; Keywords: outlines, hypermedia, calendar, text 7 ;; URL: https://orgmode.org 8 ;; 9 ;; This file is part of GNU Emacs. 10 ;; 11 ;; GNU Emacs is free software: you can redistribute it and/or modify 12 ;; it under the terms of the GNU General Public License as published by 13 ;; the Free Software Foundation, either version 3 of the License, or 14 ;; (at your option) any later version. 15 16 ;; GNU Emacs is distributed in the hope that it will be useful, 17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 ;; GNU General Public License for more details. 20 21 ;; You should have received a copy of the GNU General Public License 22 ;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. 23 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 24 ;; 25 ;;; Commentary: 26 ;; 27 ;; This module allows entries to be created and changed in an Org mode 28 ;; file triggered by items in an RSS feed. The basic functionality 29 ;; is geared toward simply adding new items found in a feed as 30 ;; outline nodes to an Org file. Using hooks, arbitrary actions can 31 ;; be triggered for new or changed items. 32 ;; 33 ;; Selecting feeds and target locations 34 ;; ------------------------------------ 35 ;; 36 ;; This module is configured through a single variable, `org-feed-alist'. 37 ;; Here is an example, using a notes/tasks feed from reQall.com. 38 ;; 39 ;; (setq org-feed-alist 40 ;; '(("ReQall" 41 ;; "http://www.reqall.com/user/feeds/rss/a1b2c3....." 42 ;; "~/org/feeds.org" "ReQall Entries") 43 ;; 44 ;; With this setup, the command `M-x org-feed-update-all' will 45 ;; collect new entries in the feed at the given URL and create 46 ;; entries as subheadings under the "ReQall Entries" heading in the 47 ;; file "~/org/feeds.org". Each feed should normally have its own 48 ;; heading - however see the `:drawer' parameter. 49 ;; 50 ;; Besides these standard elements that need to be specified for each 51 ;; feed, keyword-value pairs can set additional options. For example, 52 ;; to de-select transitional entries with a title containing 53 ;; 54 ;; "reQall is typing what you said", 55 ;; 56 ;; you could use the `:filter' argument: 57 ;; 58 ;; (setq org-feed-alist 59 ;; '(("ReQall" 60 ;; "http://www.reqall.com/user/feeds/rss/a1b2c3....." 61 ;; "~/org/feeds.org" "ReQall Entries" 62 ;; :filter my-reqall-filter))) 63 ;; 64 ;; (defun my-reqall-filter (e) 65 ;; (if (string-match "reQall is typing what you said" 66 ;; (plist-get e :title)) 67 ;; nil 68 ;; e)) 69 ;; 70 ;; See the docstring for `org-feed-alist' for more details. 71 ;; 72 ;; 73 ;; Keeping track of previously added entries 74 ;; ----------------------------------------- 75 ;; 76 ;; Since Org allows you to delete, archive, or move outline nodes, 77 ;; org-feed.el needs to keep track of which feed items have been handled 78 ;; before, so that they will not be handled again. For this, org-feed.el 79 ;; stores information in a special drawer, FEEDSTATUS, under the heading 80 ;; that received the input of the feed. 81 ;; 82 ;; 83 ;; Acknowledgments 84 ;; --------------- 85 ;; 86 ;; org-feed.el is based on ideas by Brad Bozarth who implemented a 87 ;; similar mechanism using shell and awk scripts. 88 89 ;;; Code: 90 91 (require 'org-macs) 92 (org-assert-version) 93 94 (require 'org) 95 (require 'sha1) 96 97 (declare-function url-retrieve-synchronously "url" 98 (url &optional silent inhibit-cookies timeout)) 99 (declare-function xml-node-children "xml" (node)) 100 (declare-function xml-get-children "xml" (node child-name)) 101 (declare-function xml-get-attribute "xml" (node attribute)) 102 (declare-function xml-get-attribute-or-nil "xml" (node attribute)) 103 (declare-function xml-substitute-special "xml" (string)) 104 105 (declare-function org-capture-escaped-% "org-capture" ()) 106 (declare-function org-capture-expand-embedded-elisp "org-capture" (&optional mark)) 107 (declare-function org-capture-inside-embedded-elisp-p "org-capture" ()) 108 109 (defgroup org-feed nil 110 "Options concerning RSS feeds as inputs for Org files." 111 :tag "Org Feed" 112 :group 'org) 113 114 (defcustom org-feed-alist nil 115 "Alist specifying RSS feeds that should create inputs for Org. 116 Each entry in this list specified an RSS feed tat should be queried 117 to create inbox items in Org. Each entry is a list with the following items: 118 119 name a custom name for this feed 120 URL the Feed URL 121 file the target Org file where entries should be listed, when 122 nil the target becomes the current buffer (may be an 123 indirect buffer) each time the feed update is invoked 124 headline the headline under which entries should be listed 125 126 Additional arguments can be given using keyword-value pairs. Many of these 127 specify functions that receive one or a list of \"entries\" as their single 128 argument. An entry is a property list that describes a feed item. The 129 property list has properties for each field in the item, for example `:title' 130 for the `<title>' field and `:pubDate' for the publication date. In addition, 131 it contains the following properties: 132 133 `:item-full-text' the full text in the <item> tag 134 `:guid-permalink' t when the guid property is a permalink 135 136 Here are the keyword-value pair allows in `org-feed-alist'. 137 138 :drawer drawer-name 139 The name of the drawer for storing feed information. The default is 140 \"FEEDSTATUS\". Using different drawers for different feeds allows 141 several feeds to target the same inbox heading. 142 143 :filter filter-function 144 A function to select interesting entries in the feed. It gets a single 145 entry as parameter. It should return the entry if it is relevant, or 146 nil if it is not. 147 148 :template template-string 149 The default action on new items in the feed is to add them as children 150 under the headline for the feed. The template describes how the entry 151 should be formatted. If not given, it defaults to 152 `org-feed-default-template'. 153 154 :formatter formatter-function 155 Instead of relying on a template, you may specify a function to format 156 the outline node to be inserted as a child. This function gets passed 157 a property list describing a single feed item, and it should return a 158 string that is a properly formatted Org outline node of level 1. 159 160 :new-handler function 161 If adding new items as children to the outline is not what you want 162 to do with new items, define a handler function that is called with 163 a list of all new items in the feed, each one represented as a property 164 list. The handler should do what needs to be done, and org-feed will 165 mark all items given to this handler as \"handled\", i.e. they will not 166 be passed to this handler again in future readings of the feed. 167 When the handler is called, point will be at the feed headline. 168 169 :changed-handler function 170 This function gets passed a list of all entries that have been 171 handled before, but are now still in the feed and have *changed* 172 since last handled (as evidenced by a different sha1 hash). 173 When the handler is called, point will be at the feed headline. 174 175 :parse-feed function 176 This function gets passed a buffer, and should return a list 177 of entries, each being a property list containing the 178 `:guid' and `:item-full-text' keys. The default is 179 `org-feed-parse-rss-feed'; `org-feed-parse-atom-feed' is an 180 alternative. 181 182 :parse-entry function 183 This function gets passed an entry as returned by the parse-feed 184 function, and should return the entry with interesting properties added. 185 The default is `org-feed-parse-rss-entry'; `org-feed-parse-atom-entry' 186 is an alternative." 187 :group 'org-feed 188 :type '(repeat 189 (list :value ("" "http://" "" "") 190 (string :tag "Name") 191 (string :tag "Feed URL") 192 (file :tag "File for inbox") 193 (string :tag "Headline for inbox") 194 (repeat :inline t 195 (choice 196 (list :inline t :tag "Filter" 197 (const :filter) 198 (symbol :tag "Filter Function")) 199 (list :inline t :tag "Template" 200 (const :template) 201 (string :tag "Template")) 202 (list :inline t :tag "Formatter" 203 (const :formatter) 204 (symbol :tag "Formatter Function")) 205 (list :inline t :tag "New items handler" 206 (const :new-handler) 207 (symbol :tag "Handler Function")) 208 (list :inline t :tag "Changed items" 209 (const :changed-handler) 210 (symbol :tag "Handler Function")) 211 (list :inline t :tag "Parse Feed" 212 (const :parse-feed) 213 (symbol :tag "Parse Feed Function")) 214 (list :inline t :tag "Parse Entry" 215 (const :parse-entry) 216 (symbol :tag "Parse Entry Function")) 217 ))))) 218 219 (defcustom org-feed-drawer "FEEDSTATUS" 220 "The name of the drawer for feed status information. 221 Each feed may also specify its own drawer name using the `:drawer' 222 parameter in `org-feed-alist'." 223 :group 'org-feed 224 :type '(string :tag "Drawer Name")) 225 226 (defcustom org-feed-default-template "\n* %h\n %U\n %description\n %a\n" 227 "Template for the Org node created from RSS feed items. 228 This is just the default, each feed can specify its own. 229 Any fields from the feed item can be interpolated into the template with 230 %name, for example %title, %description, %pubDate etc. In addition, the 231 following special escapes are valid as well: 232 233 %h The title, or the first line of the description 234 %t The date as a stamp, either from <pubDate> (if present), or 235 the current date 236 %T Date and time 237 %u,%U Like %t,%T, but inactive time stamps 238 %a A link, from <guid> if that is a permalink, else from <link> 239 %(sexp) Evaluate elisp `(sexp)' and replace with the result, the simple 240 %-escapes above can be used as arguments, e.g. %(capitalize \\\"%h\\\")" 241 :group 'org-feed 242 :type '(string :tag "Template")) 243 244 (defcustom org-feed-save-after-adding t 245 "Non-nil means save buffer after adding new feed items." 246 :group 'org-feed 247 :type 'boolean) 248 249 (defcustom org-feed-retrieve-method 'url-retrieve-synchronously 250 "The method to be used to retrieve a feed URL. 251 This can be `curl' or `wget' to call these external programs, or it can be 252 an Emacs Lisp function that will return a buffer containing the content 253 of the file pointed to by the URL." 254 :group 'org-feed 255 :type '(choice 256 (const :tag "Internally with url.el" url-retrieve-synchronously) 257 (const :tag "Externally with curl" curl) 258 (const :tag "Externally with wget" wget) 259 (function :tag "Function"))) 260 261 (defcustom org-feed-before-adding-hook nil 262 "Hook that is run before adding new feed items to a file. 263 You might want to commit the file in its current state to version control, 264 for example." 265 :group 'org-feed 266 :type 'hook) 267 268 (defcustom org-feed-after-adding-hook nil 269 "Hook that is run after new items have been added to a file. 270 Depending on `org-feed-save-after-adding', the buffer will already 271 have been saved." 272 :group 'org-feed 273 :type 'hook) 274 275 (defvar org-feed-buffer "*Org feed*" 276 "The buffer used to retrieve a feed.") 277 278 ;;;###autoload 279 (defun org-feed-update-all () 280 "Get inbox items from all feeds in `org-feed-alist'." 281 (interactive) 282 (let ((entries 0) 283 (errors 0) 284 (total-feeds (length org-feed-alist))) 285 (dolist (feed org-feed-alist) 286 (let ((items (ignore-errors (org-feed-update feed)))) 287 (if items (cl-incf entries items) 288 (cl-incf errors)))) 289 (message "%s from %d %s%s" 290 (pcase entries 291 (0 "No new entries") 292 (1 "1 new entry") 293 (_ (format "%d new entries" entries))) 294 total-feeds 295 (if (= total-feeds 1) "feed" "feeds") 296 (if (= 0 errors) "" (format " (unavailable feeds: %d)" errors))))) 297 298 ;;;###autoload 299 (defun org-feed-update (feed &optional retrieve-only) 300 "Get inbox items from FEED. 301 FEED can be a string with an association in `org-feed-alist', or 302 it can be a list structured like an entry in `org-feed-alist'." 303 (interactive (list (org-completing-read "Feed name: " org-feed-alist))) 304 (if (stringp feed) (setq feed (assoc feed org-feed-alist))) 305 (unless feed 306 (error "No such feed in `org-feed-alist")) 307 (catch 'exit 308 (let ((name (car feed)) 309 (url (nth 1 feed)) 310 (file (or (nth 2 feed) (buffer-file-name (or (buffer-base-buffer) 311 (current-buffer))))) 312 (headline (nth 3 feed)) 313 (filter (nth 1 (memq :filter feed))) 314 (formatter (nth 1 (memq :formatter feed))) 315 (new-handler (nth 1 (memq :new-handler feed))) 316 (changed-handler (nth 1 (memq :changed-handler feed))) 317 (template (or (nth 1 (memq :template feed)) 318 org-feed-default-template)) 319 (drawer (or (nth 1 (memq :drawer feed)) 320 org-feed-drawer)) 321 (parse-feed (or (nth 1 (memq :parse-feed feed)) 322 'org-feed-parse-rss-feed)) 323 (parse-entry (or (nth 1 (memq :parse-entry feed)) 324 'org-feed-parse-rss-entry)) 325 feed-buffer inbox-pos new-formatted 326 entries old-status status new changed guid-alist guid olds) 327 (setq feed-buffer (org-feed-get-feed url)) 328 (unless (and feed-buffer (bufferp (get-buffer feed-buffer))) 329 (error "Cannot get feed %s" name)) 330 (when retrieve-only 331 (throw 'exit feed-buffer)) 332 (setq entries (funcall parse-feed feed-buffer)) 333 (ignore-errors (kill-buffer feed-buffer)) 334 (save-excursion 335 (save-window-excursion 336 (setq inbox-pos (org-feed-goto-inbox-internal file headline)) 337 (setq old-status (org-feed-read-previous-status inbox-pos drawer)) 338 ;; Add the "handled" status to the appropriate entries 339 (setq entries (mapcar (lambda (e) 340 (setq e 341 (plist-put e :handled 342 (nth 1 (assoc 343 (plist-get e :guid) 344 old-status))))) 345 entries)) 346 ;; Find out which entries are new and which are changed 347 (dolist (e entries) 348 (if (not (plist-get e :handled)) 349 (push e new) 350 (setq olds (nth 2 (assoc (plist-get e :guid) old-status))) 351 (if (and olds 352 (not (string= (sha1 353 (plist-get e :item-full-text)) 354 olds))) 355 (push e changed)))) 356 357 ;; Parse the relevant entries fully 358 (setq new (mapcar parse-entry new) 359 changed (mapcar parse-entry changed)) 360 361 ;; Run the filter 362 (when filter 363 (setq new (delq nil (mapcar filter new)) 364 changed (delq nil (mapcar filter new)))) 365 366 (when (not (or new changed)) 367 (message "No new items in feed %s" name) 368 (throw 'exit 0)) 369 370 ;; Get alist based on guid, to look up entries 371 (setq guid-alist 372 (append 373 (mapcar (lambda (e) (list (plist-get e :guid) e)) new) 374 (mapcar (lambda (e) (list (plist-get e :guid) e)) changed))) 375 376 ;; Construct the new status 377 (setq status 378 (mapcar 379 (lambda (e) 380 (setq guid (plist-get e :guid)) 381 (list guid 382 ;; things count as handled if we handle them now, 383 ;; or if they were handled previously 384 (if (assoc guid guid-alist) t (plist-get e :handled)) 385 ;; A hash, to detect changes 386 (sha1 (plist-get e :item-full-text)))) 387 entries)) 388 389 ;; Handle new items in the feed 390 (when new 391 (if new-handler 392 (progn 393 (goto-char inbox-pos) 394 (funcall new-handler new)) 395 ;; No custom handler, do the default adding 396 ;; Format the new entries into an alist with GUIDs in the car 397 (setq new-formatted 398 (mapcar 399 (lambda (e) (org-feed-format-entry e template formatter)) 400 new))) 401 402 ;; Insert the new items 403 (org-feed-add-items inbox-pos new-formatted)) 404 405 ;; Handle changed items in the feed 406 (when (and changed-handler changed) 407 (goto-char inbox-pos) 408 (funcall changed-handler changed)) 409 410 ;; Write the new status 411 ;; We do this only now, in case something goes wrong above, so 412 ;; that would end up with a status that does not reflect 413 ;; which items truly have been handled 414 (org-feed-write-status inbox-pos drawer status) 415 416 ;; Normalize the visibility of the inbox tree 417 (goto-char inbox-pos) 418 (org-fold-subtree t) 419 (org-fold-show-children) 420 421 ;; Hooks and messages 422 (when org-feed-save-after-adding (save-buffer)) 423 (message "Added %d new item%s from feed %s to file %s, heading %s" 424 (length new) (if (> (length new) 1) "s" "") 425 name 426 (file-name-nondirectory file) headline) 427 (run-hooks 'org-feed-after-adding-hook) 428 (length new)))))) 429 430 ;;;###autoload 431 (defun org-feed-goto-inbox (feed) 432 "Go to the inbox that captures the feed named FEED." 433 (interactive 434 (list (if (= (length org-feed-alist) 1) 435 (car org-feed-alist) 436 (org-completing-read "Feed name: " org-feed-alist)))) 437 (if (stringp feed) (setq feed (assoc feed org-feed-alist))) 438 (unless feed 439 (error "No such feed in `org-feed-alist")) 440 (org-feed-goto-inbox-internal (nth 2 feed) (nth 3 feed))) 441 442 ;;;###autoload 443 (defun org-feed-show-raw-feed (feed) 444 "Show the raw feed buffer of a feed." 445 (interactive 446 (list (if (= (length org-feed-alist) 1) 447 (car org-feed-alist) 448 (org-completing-read "Feed name: " org-feed-alist)))) 449 (if (stringp feed) (setq feed (assoc feed org-feed-alist))) 450 (unless feed 451 (error "No such feed in `org-feed-alist")) 452 (pop-to-buffer-same-window 453 (org-feed-update feed 'retrieve-only)) 454 (goto-char (point-min))) 455 456 (defun org-feed-goto-inbox-internal (file heading) 457 "Find or create HEADING in FILE. 458 Switch to that buffer, and return the position of that headline." 459 (find-file file) 460 (widen) 461 (goto-char (point-min)) 462 (if (re-search-forward 463 (concat "^\\*+[ \t]+" heading "[ \t]*\\(:.*?:[ \t]*\\)?$") 464 nil t) 465 (goto-char (match-beginning 0)) 466 (goto-char (point-max)) 467 (insert "\n\n* " heading "\n\n") 468 (org-back-to-heading t)) 469 (point)) 470 471 (defun org-feed-read-previous-status (pos drawer) 472 "Get the alist of old GUIDs from the entry at POS. 473 This will find DRAWER and extract the alist." 474 (save-excursion 475 (goto-char pos) 476 (let ((end (save-excursion (org-end-of-subtree t t)))) 477 (if (re-search-forward 478 (concat "^[ \t]*:" drawer ":[ \t]*\n\\(\\(?:.\\|\n\\)*?\\)\n[ \t]*:END:") 479 end t) 480 (read (match-string 1)) 481 nil)))) 482 483 (defun org-feed-write-status (pos drawer status) 484 "Write the feed STATUS to DRAWER in entry at POS." 485 (save-excursion 486 (goto-char pos) 487 (let ((end (save-excursion (org-end-of-subtree t t)))) 488 (if (re-search-forward (concat "^[ \t]*:" drawer ":[ \t]*\n") 489 end t) 490 (progn 491 (goto-char (match-end 0)) 492 (delete-region (point) 493 (save-excursion 494 (and (re-search-forward "^[ \t]*:END:" nil t) 495 (match-beginning 0))))) 496 (outline-next-heading) 497 (insert " :" drawer ":\n :END:\n") 498 (forward-line -1)) 499 (insert (pp-to-string status))))) 500 501 (defun org-feed-add-items (pos entries) 502 "Add the formatted items to the headline as POS." 503 (let (entry level) 504 (save-excursion 505 (goto-char pos) 506 (unless (looking-at org-complex-heading-regexp) 507 (error "Wrong position")) 508 (setq level (org-get-valid-level (length (match-string 1)) 1)) 509 (org-end-of-subtree t t) 510 (skip-chars-backward " \t\n") 511 (forward-line 1) 512 (setq pos (point)) 513 (while (setq entry (pop entries)) 514 (org-paste-subtree level entry 'yank)) 515 (org-mark-ring-push pos)))) 516 517 (defun org-feed-format-entry (entry template formatter) 518 "Format ENTRY so that it can be inserted into an Org file. 519 ENTRY is a property list. This function adds a `:formatted-for-org' property 520 and returns the full property list. 521 If that property is already present, nothing changes." 522 (require 'org-capture) 523 (if formatter (funcall formatter entry) 524 (let* ((dlines 525 (org-split-string (or (plist-get entry :description) "???") 526 "\n")) 527 (time (or (if (plist-get entry :pubDate) 528 (org-read-date t t (plist-get entry :pubDate))) 529 (current-time))) 530 (v-h (or (plist-get entry :title) (car dlines) "???")) 531 (v-t (format-time-string (org-time-stamp-format nil nil) time)) 532 (v-T (format-time-string (org-time-stamp-format t nil) time)) 533 (v-u (format-time-string (org-time-stamp-format nil t) time)) 534 (v-U (format-time-string (org-time-stamp-format t t) time)) 535 (v-a (let ((tmp (or (and (plist-get entry :guid-permalink) 536 (plist-get entry :guid)) 537 (plist-get entry :link)))) 538 (if tmp (format "[[%s]]\n" tmp ) "")))) 539 (with-temp-buffer 540 (insert template) 541 (goto-char (point-min)) 542 543 ;; Mark %() embedded elisp for later evaluation. 544 (org-capture-expand-embedded-elisp 'mark) 545 546 ;; Simple %-escapes. `org-capture-escaped-%' may modify 547 ;; buffer and cripple match-data. Use markers instead. 548 (while (re-search-forward "%\\([a-zA-Z]+\\)" nil t) 549 (let ((key (match-string 1)) 550 (beg (copy-marker (match-beginning 0))) 551 (end (copy-marker (match-end 0)))) 552 (unless (org-capture-escaped-%) 553 (delete-region beg end) 554 (set-marker beg nil) 555 (set-marker end nil) 556 (let ((replacement 557 (pcase key 558 ("h" v-h) 559 ("t" v-t) 560 ("T" v-T) 561 ("u" v-u) 562 ("U" v-U) 563 ("a" v-a) 564 (name 565 (let ((v (plist-get entry (intern (concat ":" name))))) 566 (save-excursion 567 (save-match-data 568 (forward-line 0) 569 (if (looking-at 570 (concat "^\\([ \t]*\\)%" name "[ \t]*$")) 571 (org-feed-make-indented-block 572 v (current-indentation)) 573 v)))))))) 574 (when replacement 575 (insert 576 ;; Escape string delimiters within embedded lisp. 577 (if (org-capture-inside-embedded-elisp-p) 578 (replace-regexp-in-string "\"" "\\\\\"" replacement) 579 replacement))))))) 580 581 ;; %() embedded elisp 582 (org-capture-expand-embedded-elisp) 583 584 (decode-coding-string 585 (buffer-string) (detect-coding-region (point-min) (point-max) t)))))) 586 587 (defun org-feed-make-indented-block (s n) 588 "Add indentation of N spaces to a multiline string S." 589 (if (not (string-match "\n" s)) 590 s 591 (mapconcat 'identity 592 (org-split-string s "\n") 593 (concat "\n" (make-string n ?\ ))))) 594 595 (defun org-feed-skip-http-headers (buffer) 596 "Remove HTTP headers from BUFFER, and return it. 597 Assumes headers are indeed present!" 598 (with-current-buffer buffer 599 (widen) 600 (goto-char (point-min)) 601 (search-forward "\n\n") 602 (delete-region (point-min) (point)) 603 buffer)) 604 605 (defun org-feed-get-feed (url) 606 "Get the RSS feed file at URL and return the buffer." 607 (cond 608 ((eq org-feed-retrieve-method 'url-retrieve-synchronously) 609 (org-feed-skip-http-headers (url-retrieve-synchronously url))) 610 ((eq org-feed-retrieve-method 'curl) 611 (ignore-errors (kill-buffer org-feed-buffer)) 612 (call-process "curl" nil org-feed-buffer nil "--silent" url) 613 org-feed-buffer) 614 ((eq org-feed-retrieve-method 'wget) 615 (ignore-errors (kill-buffer org-feed-buffer)) 616 (call-process "wget" nil org-feed-buffer nil "-q" "-O" "-" url) 617 org-feed-buffer) 618 ((functionp org-feed-retrieve-method) 619 (funcall org-feed-retrieve-method url)))) 620 621 (defun org-feed-parse-rss-feed (buffer) 622 "Parse BUFFER for RSS feed entries. 623 Returns a list of entries, with each entry a property list, 624 containing the properties `:guid' and `:item-full-text'." 625 (require 'xml) 626 (let ((case-fold-search t) 627 entries beg end item guid entry) 628 (with-current-buffer buffer 629 (widen) 630 (goto-char (point-min)) 631 (while (re-search-forward "<item\\>.*?>" nil t) 632 (setq beg (point) 633 end (and (re-search-forward "</item>" nil t) 634 (match-beginning 0))) 635 (setq item (buffer-substring beg end) 636 guid (if (string-match "<guid\\>.*?>\\(\\(?:.\\|\n\\)*?\\)</guid>" item) 637 (xml-substitute-special (match-string-no-properties 1 item)))) 638 (setq entry (list :guid guid :item-full-text item)) 639 (push entry entries) 640 (widen) 641 (goto-char end)) 642 (nreverse entries)))) 643 644 (defun org-feed-parse-rss-entry (entry) 645 "Parse the `:item-full-text' field for xml tags and create new properties." 646 (require 'xml) 647 (with-temp-buffer 648 (insert (plist-get entry :item-full-text)) 649 (goto-char (point-min)) 650 (while (re-search-forward "<\\([a-zA-Z]+\\>\\).*?>\\(\\(?:.\\|\n\\)*?\\)</\\1>" 651 nil t) 652 (setq entry (plist-put entry 653 (intern (concat ":" (match-string 1))) 654 (xml-substitute-special (match-string 2))))) 655 (goto-char (point-min)) 656 (unless (re-search-forward "isPermaLink[ \t]*=[ \t]*\"false\"" nil t) 657 (setq entry (plist-put entry :guid-permalink t)))) 658 entry) 659 660 (defun org-feed-parse-atom-feed (buffer) 661 "Parse BUFFER for Atom feed entries. 662 Returns a list of entries, with each entry a property list, 663 containing the properties `:guid' and `:item-full-text'. 664 665 The `:item-full-text' property actually contains the sexp 666 formatted as a string, not the original XML data." 667 (require 'xml) 668 (with-current-buffer buffer 669 (widen) 670 (let ((feed (car (xml-parse-region (point-min) (point-max))))) 671 (mapcar 672 (lambda (entry) 673 (list 674 :guid (car (xml-node-children (car (xml-get-children entry 'id)))) 675 :item-full-text (prin1-to-string entry))) 676 (xml-get-children feed 'entry))))) 677 678 (defun org-feed-parse-atom-entry (entry) 679 "Parse the `:item-full-text' as a sexp and create new properties." 680 (let ((xml (car (read-from-string (plist-get entry :item-full-text))))) 681 ;; Get first <link href='foo'/>. 682 (setq entry (plist-put entry :link 683 (xml-get-attribute 684 (car (xml-get-children xml 'link)) 685 'href))) 686 ;; Add <title/> as :title. 687 (setq entry (plist-put entry :title 688 (xml-substitute-special 689 (car (xml-node-children 690 (car (xml-get-children xml 'title))))))) 691 (let* ((content (car (xml-get-children xml 'content))) 692 (type (xml-get-attribute-or-nil content 'type))) 693 (when content 694 (cond 695 ((string= type "text") 696 ;; We like plain text. 697 (setq entry (plist-put entry :description 698 (xml-substitute-special 699 (car (xml-node-children content)))))) 700 ((string= type "html") 701 ;; TODO: convert HTML to Org markup. 702 (setq entry (plist-put entry :description 703 (xml-substitute-special 704 (car (xml-node-children content)))))) 705 ((string= type "xhtml") 706 ;; TODO: convert XHTML to Org markup. 707 (setq entry (plist-put entry :description 708 (prin1-to-string 709 (xml-node-children content))))) 710 (t 711 (setq entry (plist-put entry :description 712 (format-message 713 "Unknown `%s' content." type))))))) 714 entry)) 715 716 (provide 'org-feed) 717 718 ;; Local variables: 719 ;; generated-autoload-file: "org-loaddefs.el" 720 ;; End: 721 722 ;;; org-feed.el ends here