config

Personal configuration.
git clone git://code.dwrz.net/config
Log | Files | Refs

ledger-regex.el (13879B)


      1 ;;; ledger-regex.el --- Helper code for use with the "ledger" command-line tool  -*- lexical-binding: t; -*-
      2 
      3 ;; Copyright (C) 2003-2016 John Wiegley (johnw AT gnu DOT org)
      4 
      5 ;; This file is not part of GNU Emacs.
      6 
      7 ;; This is free software; you can redistribute it and/or modify it under
      8 ;; the terms of the GNU General Public License as published by the Free
      9 ;; Software Foundation; either version 2, or (at your option) any later
     10 ;; version.
     11 ;;
     12 ;; This is distributed in the hope that it will be useful, but WITHOUT
     13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
     14 ;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     15 ;; for more details.
     16 ;;
     17 ;; You should have received a copy of the GNU General Public License
     18 ;; along with GNU Emacs; see the file COPYING.  If not, write to the
     19 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
     20 ;; MA 02110-1301 USA.
     21 
     22 
     23 ;;; Commentary:
     24 ;; Regular expressions used by ledger-mode.
     25 
     26 ;;; Code:
     27 
     28 (require 'rx)
     29 (require 'cl-lib)
     30 
     31 (defvar ledger-iso-date-regexp)
     32 
     33 (defconst ledger-amount-decimal-comma-regex
     34   "-?\\(?:[1-9][0-9.]\\|0\\)*[,]?[0-9]*")
     35 
     36 (defconst ledger-amount-decimal-period-regex
     37   "-?\\(?:[1-9][0-9,]*\\|0\\)[.]?[0-9]*")
     38 
     39 (defconst ledger-other-entries-regex
     40   "\\(^[~=A-Za-z].+\\)+")
     41 
     42 (defconst ledger-comment-regex
     43   "^[;#|\\*%].*\\|[ \t]+;.*")
     44 
     45 (defconst ledger-multiline-comment-start-regex
     46   "^!comment$")
     47 (defconst ledger-multiline-comment-end-regex
     48   "^!end_comment$")
     49 (defconst ledger-multiline-comment-regex
     50   "^!comment\n\\(.*\n\\)*?!end_comment$")
     51 
     52 (defconst ledger-payee-any-status-regex
     53   "^[0-9]+[-/][-/.=0-9]+\\(?:\\s-+\\*\\)?\\(?:\\s-+(.*?)\\)?\\s-+\\(.+?\\)\\s-*\\(?:;\\|$\\)")
     54 
     55 (defconst ledger-payee-pending-regex
     56   "^[0-9]+[-/][-/.=0-9]+\\s-!\\s-+\\(([^)]+)\\s-+\\)?\\([^*].+?\\)\\s-*\\(;\\|$\\)")
     57 
     58 (defconst ledger-payee-cleared-regex
     59   "^[0-9]+[-/][-/.=0-9]+\\s-\\*\\s-+\\(([^)]+)\\s-+\\)?\\([^*].+?\\)\\s-*\\(;\\|$\\)")
     60 
     61 (defconst ledger-payee-uncleared-regex
     62   "^[0-9]+[-/][-/.=0-9]+\\s-+\\(([^)]+)\\s-+\\)?\\([^*].+?\\)\\s-*\\(;\\|$\\)")
     63 
     64 (defconst ledger-payee-directive-regex
     65   (concat "^payee[ \t]+\\(.*?\\)[ \t]*$"))
     66 
     67 (defconst ledger-payee-name-or-directive-regex
     68   (format "\\(?:%s\\|%s\\)" ledger-payee-any-status-regex ledger-payee-directive-regex))
     69 
     70 (defconst ledger-init-string-regex
     71   "^--.+?\\($\\|[ ]\\)")
     72 
     73 (defconst ledger-account-name-regex
     74   "\\(?1:[^][(); \t\r\n]+\\(?: [^][(); \t\r\n]+\\)*\\)")
     75 
     76 (defconst ledger-account-directive-regex
     77   (concat "^account[ \t]+" ledger-account-name-regex))
     78 
     79 (defconst ledger-account-name-maybe-virtual-regex
     80   (concat "[[(]?" ledger-account-name-regex "[])]?"))
     81 
     82 (defconst ledger-account-any-status-regex
     83   (concat "^[ \t]+\\(?:[!*][ \t]*\\)?" ledger-account-name-maybe-virtual-regex))
     84 
     85 ;; This would incorrectly match "account (foo)", but writing the regexp this way
     86 ;; allows us to have just one match result
     87 (defconst ledger-account-name-or-directive-regex
     88   (format "\\(?:%s\\|%s\\)" ledger-account-any-status-regex ledger-account-directive-regex))
     89 
     90 (defconst ledger-account-pending-regex
     91   (concat "\\(^[ \t]+\\)!" ledger-account-name-maybe-virtual-regex))
     92 
     93 (defconst ledger-account-cleared-regex
     94   (concat "\\(^[ \t]+\\)*" ledger-account-name-maybe-virtual-regex))
     95 
     96 (defmacro ledger-define-regexp (name regex docs &rest args)
     97   "Simplify the creation of a Ledger regex and helper functions."
     98   (let* ((regex (eval regex))
     99          (group-count (regexp-opt-depth regex))
    100          (defs
    101            (list
    102             `(defconst
    103                ,(intern (concat "ledger-" (symbol-name name) "-regexp"))
    104                ,regex
    105                ,docs)
    106             `(defconst
    107                ,(intern (concat "ledger-regex-" (symbol-name name)
    108                                 "-group--count"))
    109                ,group-count)))
    110          (addend 0) last-group)
    111     (if (null args)
    112         (progn
    113           (when (cl-plusp group-count)
    114             (nconc
    115              defs
    116              (list
    117               `(defconst
    118                  ,(intern
    119                    (concat "ledger-regex-" (symbol-name name) "-group"))
    120                  1)))
    121             (nconc
    122              defs
    123              (list
    124               `(defmacro
    125                    ,(intern (concat "ledger-regex-" (symbol-name name)))
    126                    (&optional string)
    127                  ,(format "Return the match string for the %s" name)
    128                  (match-string
    129                   ,(intern (concat "ledger-regex-" (symbol-name name)
    130                                    "-group"))
    131                   string))))))
    132 
    133       (while args
    134         (let (arg var grouping target force-increment)
    135           (setq arg (pop args))
    136 
    137           (when (eq arg :separate)
    138             (setq arg (pop args))
    139             (setq force-increment t))
    140 
    141           (if (symbolp arg)
    142               (setq var arg target arg)
    143             (cl-assert (listp arg))
    144             (if (= 2 (length arg))
    145                 (setq var (car arg)
    146                       target (cadr arg))
    147               (setq var (car arg)
    148                     grouping (cadr arg)
    149                     target (cl-caddr arg))))
    150 
    151           (if (and last-group
    152                    (or (not (eq last-group (or grouping target)))
    153                        force-increment))
    154               (cl-incf addend
    155                        (symbol-value
    156                         (intern-soft (concat "ledger-regex-"
    157                                              (symbol-name last-group)
    158                                              "-group--count")))))
    159           (nconc
    160            defs
    161            (list
    162             `(defconst
    163                ,(intern (concat "ledger-regex-" (symbol-name name)
    164                                 "-group-" (symbol-name var)))
    165                ,(+ addend
    166                    (symbol-value
    167                     (intern-soft
    168                      (if grouping
    169                          (concat "ledger-regex-" (symbol-name grouping)
    170                                  "-group-" (symbol-name target))
    171                        (concat "ledger-regex-" (symbol-name target)
    172                                "-group"))))))))
    173           (nconc
    174            defs
    175            (list
    176             `(defmacro
    177                  ,(intern (concat "ledger-regex-" (symbol-name name)
    178                                   "-" (symbol-name var)))
    179                  (&optional string)
    180                ,(format "Return the sub-group match for the %s %s."
    181                         name var)
    182                (match-string
    183                 ,(intern (concat "ledger-regex-" (symbol-name name)
    184                                  "-group-" (symbol-name var)))
    185                 string))))
    186 
    187           (setq last-group (or grouping target)))))
    188 
    189     (cons 'eval-and-compile defs)))
    190 
    191 (put 'ledger-define-regexp 'lisp-indent-function 1)
    192 
    193 (ledger-define-regexp iso-date
    194   (let ((sep '(or ?- ?/)))
    195     (rx (group
    196          (and (group (= 4 num))
    197               (eval sep)
    198               (group (and num (? num)))
    199               (eval sep)
    200               (group (and num (? num)))))))
    201   "Match a single date, in its \"written\" form.")
    202 
    203 (ledger-define-regexp full-date
    204   (macroexpand
    205    `(rx (and (regexp ,ledger-iso-date-regexp)
    206              (? (and ?= (regexp ,ledger-iso-date-regexp))))))
    207   "Match a compound date, of the form ACTUAL=EFFECTIVE"
    208   (actual iso-date)
    209   :separate
    210   (effective iso-date))
    211 
    212 (ledger-define-regexp state
    213   (rx (group (any ?! ?*)))
    214   "Match a transaction or posting's \"state\" character.")
    215 
    216 (ledger-define-regexp code
    217   (rx (and ?\( (group (+? (not (any ?\))))) ?\)))
    218   "Match the transaction code.")
    219 
    220 (ledger-define-regexp long-space
    221   (rx (and (*? blank)
    222            (or (and ?  (or ?  ?\t)) ?\t)))
    223   "Match a \"long space\".")
    224 
    225 (ledger-define-regexp note
    226   (rx (group (+ nonl)))
    227   "")
    228 
    229 (ledger-define-regexp end-note
    230   (macroexpand
    231    `(rx (and (regexp ,ledger-long-space-regexp) ?\;
    232              (regexp ,ledger-note-regexp))))
    233   "")
    234 
    235 (ledger-define-regexp full-note
    236   (macroexpand
    237    `(rx (and line-start (+ blank)
    238              ?\; (regexp ,ledger-note-regexp))))
    239   "")
    240 
    241 (ledger-define-regexp xact-line
    242   (macroexpand
    243    `(rx (and line-start
    244              (regexp ,ledger-full-date-regexp)
    245              (? (and (+ blank) (regexp ,ledger-state-regexp)))
    246              (? (and (+ blank) (regexp ,ledger-code-regexp)))
    247              (+ blank) (+? nonl)
    248              (? (regexp ,ledger-end-note-regexp))
    249              line-end)))
    250   "Match a transaction's first line (and optional notes)."
    251   (actual-date full-date actual)
    252   (effective-date full-date effective)
    253   state
    254   code
    255   (note end-note))
    256 
    257 (ledger-define-regexp account
    258   (rx (group (and (not (any blank ?\[ ?\( ?: ?\;)) (*? nonl))))
    259   "")
    260 
    261 (ledger-define-regexp account-kind
    262   (rx (group (? (any ?\[ ?\())))
    263   "")
    264 
    265 (ledger-define-regexp full-account
    266   (macroexpand
    267    `(rx (and (regexp ,ledger-account-kind-regexp)
    268              (regexp ,ledger-account-regexp)
    269              (? (any ?\] ?\))))))
    270   ""
    271   (kind account-kind)
    272   (name account))
    273 
    274 (ledger-define-regexp commodity-no-group
    275   (rx (or (and ?\" (+ (not (any ?\"))) ?\")
    276           (+ (not (any blank ?\n
    277                        digit
    278                        ?- ?\[ ?\]
    279                        ?. ?, ?\; ?+ ?* ?/ ?^ ?? ?: ?& ?| ?! ?=
    280                        ?\< ?\> ?\{ ?\} ?\( ?\) ?@)))))
    281   "")
    282 
    283 (ledger-define-regexp commodity
    284   (macroexpand
    285    `(rx (group (regexp ,ledger-commodity-no-group-regexp))))
    286   "")
    287 
    288 (ledger-define-regexp amount-no-group
    289   (rx (and (? ?-)
    290            (+ digit)
    291            (* (and (any ?. ?,) (+ digit)))))
    292   "")
    293 
    294 (ledger-define-regexp amount
    295   (macroexpand
    296    `(rx (group (regexp ,ledger-amount-no-group-regexp))))
    297   "")
    298 
    299 (ledger-define-regexp commoditized-amount
    300   (macroexpand
    301    `(rx (group
    302          (or (and (regexp ,ledger-commodity-no-group-regexp)
    303                   (*? blank)
    304                   (regexp ,ledger-amount-no-group-regexp))
    305              (and (regexp ,ledger-amount-no-group-regexp)
    306                   (*? blank)
    307                   (regexp ,ledger-commodity-no-group-regexp))))))
    308   "")
    309 
    310 (ledger-define-regexp commodity-annotations
    311   (macroexpand
    312    `(rx (* (+ blank)
    313            (or (and ?\{ (regexp ,ledger-commoditized-amount-regexp) ?\})
    314                (and ?\[ (regexp ,ledger-iso-date-regexp) ?\])
    315                (and ?\( (not (any ?\))) ?\))))))
    316   ""
    317   commoditized-amount
    318   iso-date)
    319 
    320 (ledger-define-regexp cost
    321   (macroexpand
    322    `(rx (and (or "@" "@@") (+ blank)
    323              (regexp ,ledger-commoditized-amount-regexp))))
    324   "")
    325 
    326 (ledger-define-regexp balance-assertion
    327   (macroexpand
    328    `(rx (and ?= (+ blank)
    329              (regexp ,ledger-commoditized-amount-regexp))))
    330   "")
    331 
    332 (ledger-define-regexp full-amount
    333   (rx (group (+? (not (any ?\;)))))
    334   "")
    335 
    336 (ledger-define-regexp post-line
    337   (macroexpand
    338    `(rx (and line-start (+ blank)
    339              (? (and (regexp ,ledger-state-regexp) (* blank)))
    340              (regexp ,ledger-full-account-regexp)
    341              (? (and (regexp ,ledger-long-space-regexp)
    342                      (regexp ,ledger-full-amount-regexp)))
    343              (? (regexp ,ledger-end-note-regexp))
    344              line-end)))
    345   ""
    346   state
    347   (account-kind full-account kind)
    348   (account full-account name)
    349   (amount full-amount)
    350   (note end-note))
    351 
    352 (defconst ledger-amount-regex
    353   (concat "\\(  \\|\t\\| \t\\)[ \t]*[-+=]? *"
    354           "\\(?:" ledger-commodity-regexp " *\\)?"
    355           ;; We either match just a number after the commodity with no
    356           ;; decimal or thousand separators or a number with thousand
    357           ;; separators.  If we have a decimal part starting with `,'
    358           ;; or `.', because the match is non-greedy, it must leave at
    359           ;; least one of those symbols for the following capture
    360           ;; group, which then finishes the decimal part.
    361           "\\([-+=]? *\\(?:[0-9]+\\|[0-9,.]+?\\)\\)"
    362           "\\([,.][0-9)]+\\)?"
    363           "\\(?: *" ledger-commodity-regexp "\\)?"
    364           "\\([ \t]*[@={]@?[^\n;]+?\\)?"
    365           "\\([ \t]+;.+?\\|[ \t]*\\)?$"))
    366 
    367 (ledger-define-regexp year
    368   (rx (group (+ (any "0-9"))))
    369   "")
    370 
    371 (ledger-define-regexp payee
    372   (rx (group (+? nonl)))
    373   "")
    374 
    375 (ledger-define-regexp iterate
    376   (macroexpand `(rx  (or (and (or "Y" "year")
    377                               (+ (syntax ?-))
    378                               (regexp ,ledger-year-regexp))
    379                          (and (regexp ,ledger-full-date-regexp)
    380                               (? (and (+ blank) (regexp ,ledger-state-regexp)))
    381                               (? (and (+ blank) (regexp ,ledger-code-regexp)))
    382                               (+ blank)
    383                               (regexp ,ledger-payee-regexp)
    384                               (? (regexp ,ledger-end-note-regexp))))))
    385   ""
    386   year
    387   (actual-date full-date actual)
    388   (effective-date full-date effective)
    389   state
    390   code
    391   payee
    392   (note end-note))
    393 
    394 (defconst ledger-incomplete-date-regexp
    395   "\\(?:\\([0-9]\\{1,2\\}\\)[-/]\\)?\\([0-9]\\{1,2\\}\\)")
    396 
    397 (defconst ledger-xact-start-regex
    398   (concat "^" ledger-iso-date-regexp  ;; subexp 1
    399           "\\(=" ledger-iso-date-regexp "\\)?"
    400           ))
    401 
    402 (defconst ledger-xact-after-date-regex
    403   (concat "\\(?:[ \t]+\\([*!]\\)\\)?"  ;; mark, subexp 1
    404           "\\(?:[ \t]+\\((.*?)\\)\\)?"  ;; code, subexp 2
    405           "\\(?:[ \t]+\\(.+?\\)\\)?"   ;; desc, subexp 3
    406           "\\(?:\\(?:\t\\|[ \t]\\{2,\\}\\)\\(;[^\n]*\\)\\)?$" ;; comment, subexp 4
    407           ))
    408 
    409 (defconst ledger-posting-regex
    410   (concat "^[[:blank:]]+" ; initial white space
    411           ;; state and account, subexp 1
    412           "\\(\\([*!]\\)?"            ; state,   subexp 2
    413           "[[:blank:]]*\\(.*?\\)\\)?" ; account, subexp 3
    414           "\\(?:\\(?:\t\\|[[:blank:]]\\{2,\\}\\)"
    415           "\\([^;\n]*?\\)\\)?"        ; amount,  subexp 4
    416           "\\(?:\\(?:\t\\|[[:blank:]]\\{2,\\}\\)"
    417           "\\(;.*\\)\\)?$"            ; comment, subexp 5
    418           ))
    419 
    420 
    421 
    422 (defconst ledger-directive-start-regex
    423   "[=~;#%|\\*[A-Za-z]")
    424 
    425 
    426 (provide 'ledger-regex)
    427 
    428 ;;; ledger-regex.el ends here