ledger-regex.el (13879B)
1 ;;; ledger-regex.el --- Helper code for use with the "ledger" command-line tool -*- lexical-binding: t; -*- 2 3 ;; Copyright (C) 2003-2016 John Wiegley (johnw AT gnu DOT org) 4 5 ;; This file is not part of GNU Emacs. 6 7 ;; This is free software; you can redistribute it and/or modify it under 8 ;; the terms of the GNU General Public License as published by the Free 9 ;; Software Foundation; either version 2, or (at your option) any later 10 ;; version. 11 ;; 12 ;; This is distributed in the hope that it will be useful, but WITHOUT 13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 ;; for more details. 16 ;; 17 ;; You should have received a copy of the GNU General Public License 18 ;; along with GNU Emacs; see the file COPYING. If not, write to the 19 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 20 ;; MA 02110-1301 USA. 21 22 23 ;;; Commentary: 24 ;; Regular expressions used by ledger-mode. 25 26 ;;; Code: 27 28 (require 'rx) 29 (require 'cl-lib) 30 31 (defvar ledger-iso-date-regexp) 32 33 (defconst ledger-amount-decimal-comma-regex 34 "-?\\(?:[1-9][0-9.]\\|0\\)*[,]?[0-9]*") 35 36 (defconst ledger-amount-decimal-period-regex 37 "-?\\(?:[1-9][0-9,]*\\|0\\)[.]?[0-9]*") 38 39 (defconst ledger-other-entries-regex 40 "\\(^[~=A-Za-z].+\\)+") 41 42 (defconst ledger-comment-regex 43 "^[;#|\\*%].*\\|[ \t]+;.*") 44 45 (defconst ledger-multiline-comment-start-regex 46 "^!comment$") 47 (defconst ledger-multiline-comment-end-regex 48 "^!end_comment$") 49 (defconst ledger-multiline-comment-regex 50 "^!comment\n\\(.*\n\\)*?!end_comment$") 51 52 (defconst ledger-payee-any-status-regex 53 "^[0-9]+[-/][-/.=0-9]+\\(?:\\s-+\\*\\)?\\(?:\\s-+(.*?)\\)?\\s-+\\(.+?\\)\\s-*\\(?:;\\|$\\)") 54 55 (defconst ledger-payee-pending-regex 56 "^[0-9]+[-/][-/.=0-9]+\\s-!\\s-+\\(([^)]+)\\s-+\\)?\\([^*].+?\\)\\s-*\\(;\\|$\\)") 57 58 (defconst ledger-payee-cleared-regex 59 "^[0-9]+[-/][-/.=0-9]+\\s-\\*\\s-+\\(([^)]+)\\s-+\\)?\\([^*].+?\\)\\s-*\\(;\\|$\\)") 60 61 (defconst ledger-payee-uncleared-regex 62 "^[0-9]+[-/][-/.=0-9]+\\s-+\\(([^)]+)\\s-+\\)?\\([^*].+?\\)\\s-*\\(;\\|$\\)") 63 64 (defconst ledger-payee-directive-regex 65 (concat "^payee[ \t]+\\(.*?\\)[ \t]*$")) 66 67 (defconst ledger-payee-name-or-directive-regex 68 (format "\\(?:%s\\|%s\\)" ledger-payee-any-status-regex ledger-payee-directive-regex)) 69 70 (defconst ledger-init-string-regex 71 "^--.+?\\($\\|[ ]\\)") 72 73 (defconst ledger-account-name-regex 74 "\\(?1:[^][(); \t\r\n]+\\(?: [^][(); \t\r\n]+\\)*\\)") 75 76 (defconst ledger-account-directive-regex 77 (concat "^account[ \t]+" ledger-account-name-regex)) 78 79 (defconst ledger-account-name-maybe-virtual-regex 80 (concat "[[(]?" ledger-account-name-regex "[])]?")) 81 82 (defconst ledger-account-any-status-regex 83 (concat "^[ \t]+\\(?:[!*][ \t]*\\)?" ledger-account-name-maybe-virtual-regex)) 84 85 ;; This would incorrectly match "account (foo)", but writing the regexp this way 86 ;; allows us to have just one match result 87 (defconst ledger-account-name-or-directive-regex 88 (format "\\(?:%s\\|%s\\)" ledger-account-any-status-regex ledger-account-directive-regex)) 89 90 (defconst ledger-account-pending-regex 91 (concat "\\(^[ \t]+\\)!" ledger-account-name-maybe-virtual-regex)) 92 93 (defconst ledger-account-cleared-regex 94 (concat "\\(^[ \t]+\\)*" ledger-account-name-maybe-virtual-regex)) 95 96 (defmacro ledger-define-regexp (name regex docs &rest args) 97 "Simplify the creation of a Ledger regex and helper functions." 98 (let* ((regex (eval regex)) 99 (group-count (regexp-opt-depth regex)) 100 (defs 101 (list 102 `(defconst 103 ,(intern (concat "ledger-" (symbol-name name) "-regexp")) 104 ,regex 105 ,docs) 106 `(defconst 107 ,(intern (concat "ledger-regex-" (symbol-name name) 108 "-group--count")) 109 ,group-count))) 110 (addend 0) last-group) 111 (if (null args) 112 (progn 113 (when (cl-plusp group-count) 114 (nconc 115 defs 116 (list 117 `(defconst 118 ,(intern 119 (concat "ledger-regex-" (symbol-name name) "-group")) 120 1))) 121 (nconc 122 defs 123 (list 124 `(defmacro 125 ,(intern (concat "ledger-regex-" (symbol-name name))) 126 (&optional string) 127 ,(format "Return the match string for the %s" name) 128 (match-string 129 ,(intern (concat "ledger-regex-" (symbol-name name) 130 "-group")) 131 string)))))) 132 133 (while args 134 (let (arg var grouping target force-increment) 135 (setq arg (pop args)) 136 137 (when (eq arg :separate) 138 (setq arg (pop args)) 139 (setq force-increment t)) 140 141 (if (symbolp arg) 142 (setq var arg target arg) 143 (cl-assert (listp arg)) 144 (if (= 2 (length arg)) 145 (setq var (car arg) 146 target (cadr arg)) 147 (setq var (car arg) 148 grouping (cadr arg) 149 target (cl-caddr arg)))) 150 151 (if (and last-group 152 (or (not (eq last-group (or grouping target))) 153 force-increment)) 154 (cl-incf addend 155 (symbol-value 156 (intern-soft (concat "ledger-regex-" 157 (symbol-name last-group) 158 "-group--count"))))) 159 (nconc 160 defs 161 (list 162 `(defconst 163 ,(intern (concat "ledger-regex-" (symbol-name name) 164 "-group-" (symbol-name var))) 165 ,(+ addend 166 (symbol-value 167 (intern-soft 168 (if grouping 169 (concat "ledger-regex-" (symbol-name grouping) 170 "-group-" (symbol-name target)) 171 (concat "ledger-regex-" (symbol-name target) 172 "-group")))))))) 173 (nconc 174 defs 175 (list 176 `(defmacro 177 ,(intern (concat "ledger-regex-" (symbol-name name) 178 "-" (symbol-name var))) 179 (&optional string) 180 ,(format "Return the sub-group match for the %s %s." 181 name var) 182 (match-string 183 ,(intern (concat "ledger-regex-" (symbol-name name) 184 "-group-" (symbol-name var))) 185 string)))) 186 187 (setq last-group (or grouping target))))) 188 189 (cons 'eval-and-compile defs))) 190 191 (put 'ledger-define-regexp 'lisp-indent-function 1) 192 193 (ledger-define-regexp iso-date 194 (let ((sep '(or ?- ?/))) 195 (rx (group 196 (and (group (= 4 num)) 197 (eval sep) 198 (group (and num (? num))) 199 (eval sep) 200 (group (and num (? num))))))) 201 "Match a single date, in its \"written\" form.") 202 203 (ledger-define-regexp full-date 204 (macroexpand 205 `(rx (and (regexp ,ledger-iso-date-regexp) 206 (? (and ?= (regexp ,ledger-iso-date-regexp)))))) 207 "Match a compound date, of the form ACTUAL=EFFECTIVE" 208 (actual iso-date) 209 :separate 210 (effective iso-date)) 211 212 (ledger-define-regexp state 213 (rx (group (any ?! ?*))) 214 "Match a transaction or posting's \"state\" character.") 215 216 (ledger-define-regexp code 217 (rx (and ?\( (group (+? (not (any ?\))))) ?\))) 218 "Match the transaction code.") 219 220 (ledger-define-regexp long-space 221 (rx (and (*? blank) 222 (or (and ? (or ? ?\t)) ?\t))) 223 "Match a \"long space\".") 224 225 (ledger-define-regexp note 226 (rx (group (+ nonl))) 227 "") 228 229 (ledger-define-regexp end-note 230 (macroexpand 231 `(rx (and (regexp ,ledger-long-space-regexp) ?\; 232 (regexp ,ledger-note-regexp)))) 233 "") 234 235 (ledger-define-regexp full-note 236 (macroexpand 237 `(rx (and line-start (+ blank) 238 ?\; (regexp ,ledger-note-regexp)))) 239 "") 240 241 (ledger-define-regexp xact-line 242 (macroexpand 243 `(rx (and line-start 244 (regexp ,ledger-full-date-regexp) 245 (? (and (+ blank) (regexp ,ledger-state-regexp))) 246 (? (and (+ blank) (regexp ,ledger-code-regexp))) 247 (+ blank) (+? nonl) 248 (? (regexp ,ledger-end-note-regexp)) 249 line-end))) 250 "Match a transaction's first line (and optional notes)." 251 (actual-date full-date actual) 252 (effective-date full-date effective) 253 state 254 code 255 (note end-note)) 256 257 (ledger-define-regexp account 258 (rx (group (and (not (any blank ?\[ ?\( ?: ?\;)) (*? nonl)))) 259 "") 260 261 (ledger-define-regexp account-kind 262 (rx (group (? (any ?\[ ?\()))) 263 "") 264 265 (ledger-define-regexp full-account 266 (macroexpand 267 `(rx (and (regexp ,ledger-account-kind-regexp) 268 (regexp ,ledger-account-regexp) 269 (? (any ?\] ?\)))))) 270 "" 271 (kind account-kind) 272 (name account)) 273 274 (ledger-define-regexp commodity-no-group 275 (rx (or (and ?\" (+ (not (any ?\"))) ?\") 276 (+ (not (any blank ?\n 277 digit 278 ?- ?\[ ?\] 279 ?. ?, ?\; ?+ ?* ?/ ?^ ?? ?: ?& ?| ?! ?= 280 ?\< ?\> ?\{ ?\} ?\( ?\) ?@))))) 281 "") 282 283 (ledger-define-regexp commodity 284 (macroexpand 285 `(rx (group (regexp ,ledger-commodity-no-group-regexp)))) 286 "") 287 288 (ledger-define-regexp amount-no-group 289 (rx (and (? ?-) 290 (+ digit) 291 (* (and (any ?. ?,) (+ digit))))) 292 "") 293 294 (ledger-define-regexp amount 295 (macroexpand 296 `(rx (group (regexp ,ledger-amount-no-group-regexp)))) 297 "") 298 299 (ledger-define-regexp commoditized-amount 300 (macroexpand 301 `(rx (group 302 (or (and (regexp ,ledger-commodity-no-group-regexp) 303 (*? blank) 304 (regexp ,ledger-amount-no-group-regexp)) 305 (and (regexp ,ledger-amount-no-group-regexp) 306 (*? blank) 307 (regexp ,ledger-commodity-no-group-regexp)))))) 308 "") 309 310 (ledger-define-regexp commodity-annotations 311 (macroexpand 312 `(rx (* (+ blank) 313 (or (and ?\{ (regexp ,ledger-commoditized-amount-regexp) ?\}) 314 (and ?\[ (regexp ,ledger-iso-date-regexp) ?\]) 315 (and ?\( (not (any ?\))) ?\)))))) 316 "" 317 commoditized-amount 318 iso-date) 319 320 (ledger-define-regexp cost 321 (macroexpand 322 `(rx (and (or "@" "@@") (+ blank) 323 (regexp ,ledger-commoditized-amount-regexp)))) 324 "") 325 326 (ledger-define-regexp balance-assertion 327 (macroexpand 328 `(rx (and ?= (+ blank) 329 (regexp ,ledger-commoditized-amount-regexp)))) 330 "") 331 332 (ledger-define-regexp full-amount 333 (rx (group (+? (not (any ?\;))))) 334 "") 335 336 (ledger-define-regexp post-line 337 (macroexpand 338 `(rx (and line-start (+ blank) 339 (? (and (regexp ,ledger-state-regexp) (* blank))) 340 (regexp ,ledger-full-account-regexp) 341 (? (and (regexp ,ledger-long-space-regexp) 342 (regexp ,ledger-full-amount-regexp))) 343 (? (regexp ,ledger-end-note-regexp)) 344 line-end))) 345 "" 346 state 347 (account-kind full-account kind) 348 (account full-account name) 349 (amount full-amount) 350 (note end-note)) 351 352 (defconst ledger-amount-regex 353 (concat "\\( \\|\t\\| \t\\)[ \t]*[-+=]? *" 354 "\\(?:" ledger-commodity-regexp " *\\)?" 355 ;; We either match just a number after the commodity with no 356 ;; decimal or thousand separators or a number with thousand 357 ;; separators. If we have a decimal part starting with `,' 358 ;; or `.', because the match is non-greedy, it must leave at 359 ;; least one of those symbols for the following capture 360 ;; group, which then finishes the decimal part. 361 "\\([-+=]? *\\(?:[0-9]+\\|[0-9,.]+?\\)\\)" 362 "\\([,.][0-9)]+\\)?" 363 "\\(?: *" ledger-commodity-regexp "\\)?" 364 "\\([ \t]*[@={]@?[^\n;]+?\\)?" 365 "\\([ \t]+;.+?\\|[ \t]*\\)?$")) 366 367 (ledger-define-regexp year 368 (rx (group (+ (any "0-9")))) 369 "") 370 371 (ledger-define-regexp payee 372 (rx (group (+? nonl))) 373 "") 374 375 (ledger-define-regexp iterate 376 (macroexpand `(rx (or (and (or "Y" "year") 377 (+ (syntax ?-)) 378 (regexp ,ledger-year-regexp)) 379 (and (regexp ,ledger-full-date-regexp) 380 (? (and (+ blank) (regexp ,ledger-state-regexp))) 381 (? (and (+ blank) (regexp ,ledger-code-regexp))) 382 (+ blank) 383 (regexp ,ledger-payee-regexp) 384 (? (regexp ,ledger-end-note-regexp)))))) 385 "" 386 year 387 (actual-date full-date actual) 388 (effective-date full-date effective) 389 state 390 code 391 payee 392 (note end-note)) 393 394 (defconst ledger-incomplete-date-regexp 395 "\\(?:\\([0-9]\\{1,2\\}\\)[-/]\\)?\\([0-9]\\{1,2\\}\\)") 396 397 (defconst ledger-xact-start-regex 398 (concat "^" ledger-iso-date-regexp ;; subexp 1 399 "\\(=" ledger-iso-date-regexp "\\)?" 400 )) 401 402 (defconst ledger-xact-after-date-regex 403 (concat "\\(?:[ \t]+\\([*!]\\)\\)?" ;; mark, subexp 1 404 "\\(?:[ \t]+\\((.*?)\\)\\)?" ;; code, subexp 2 405 "\\(?:[ \t]+\\(.+?\\)\\)?" ;; desc, subexp 3 406 "\\(?:\\(?:\t\\|[ \t]\\{2,\\}\\)\\(;[^\n]*\\)\\)?$" ;; comment, subexp 4 407 )) 408 409 (defconst ledger-posting-regex 410 (concat "^[[:blank:]]+" ; initial white space 411 ;; state and account, subexp 1 412 "\\(\\([*!]\\)?" ; state, subexp 2 413 "[[:blank:]]*\\(.*?\\)\\)?" ; account, subexp 3 414 "\\(?:\\(?:\t\\|[[:blank:]]\\{2,\\}\\)" 415 "\\([^;\n]*?\\)\\)?" ; amount, subexp 4 416 "\\(?:\\(?:\t\\|[[:blank:]]\\{2,\\}\\)" 417 "\\(;.*\\)\\)?$" ; comment, subexp 5 418 )) 419 420 421 422 (defconst ledger-directive-start-regex 423 "[=~;#%|\\*[A-Za-z]") 424 425 426 (provide 'ledger-regex) 427 428 ;;; ledger-regex.el ends here