csv-mode.el (84013B)
1 ;;; csv-mode.el --- Major mode for editing comma/char separated values -*- lexical-binding: t -*- 2 3 ;; Copyright (C) 2003-2024 Free Software Foundation, Inc 4 5 ;; Author: "Francis J. Wright" <F.J.Wright@qmul.ac.uk> 6 ;; Maintainer: emacs-devel@gnu.org 7 ;; Version: 1.25 8 ;; Package-Requires: ((emacs "27.1") (cl-lib "0.5")) 9 ;; Keywords: convenience 10 11 ;; This package is free software; you can redistribute it and/or modify 12 ;; it under the terms of the GNU General Public License as published by 13 ;; the Free Software Foundation; either version 3, or (at your option) 14 ;; any later version. 15 16 ;; This package is distributed in the hope that it will be useful, 17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 ;; GNU General Public License for more details. 20 21 ;; You should have received a copy of the GNU General Public License 22 ;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. 23 24 ;;; Commentary: 25 26 ;; This package implements CSV mode, a major mode for editing records 27 ;; in a generalized CSV (character-separated values) format. It binds 28 ;; files with prefix ".csv" to `csv-mode' (and ".tsv" to `tsv-mode') in 29 ;; `auto-mode-alist'. 30 31 ;; In CSV mode, the following commands are available: 32 33 ;; - C-c C-s (`csv-sort-fields') and C-c C-n (`csv-sort-numeric-fields') 34 ;; respectively sort lexicographically and numerically on a 35 ;; specified field or column. 36 37 ;; - C-c C-r (`csv-reverse-region') reverses the order. (These 38 ;; commands are based closely on, and use, code in `sort.el'.) 39 40 ;; - C-c C-k (`csv-kill-fields') and C-c C-y (`csv-yank-fields') kill 41 ;; and yank fields or columns, although they do not use the normal 42 ;; kill ring. C-c C-k can kill more than one field at once, but 43 ;; multiple killed fields can be yanked only as a fixed group 44 ;; equivalent to a single field. 45 46 ;; - `csv-align-mode' keeps fields visually aligned, on-the-fly. 47 ;; It truncates fields to a maximum width that can be changed per-column 48 ;; with `csv-align-set-column-width'. 49 ;; Alternatively, C-c C-a (`csv-align-fields') aligns fields into columns 50 ;; and C-c C-u (`csv-unalign-fields') undoes such alignment; 51 ;; separators can be hidden within aligned records (controlled by 52 ;; `csv-invisibility-default' and `csv-toggle-invisibility'). 53 54 ;; - C-c C-t (`csv-transpose') interchanges rows and columns. For 55 ;; details, see the documentation for the individual commands. 56 57 ;; - `csv-set-separator' sets the CSV separator of the current buffer, 58 ;; while `csv-guess-set-separator' guesses and sets the separator 59 ;; based on the current buffer's contents. 60 ;; `csv-guess-set-separator' can be useful to add to the mode hook 61 ;; to have CSV mode guess and set the separator automatically when 62 ;; visiting a buffer: 63 ;; 64 ;; (add-hook 'csv-mode-hook 'csv-guess-set-separator) 65 66 ;; CSV mode can recognize fields separated by any of several single 67 ;; characters, specified by the value of the customizable user option 68 ;; `csv-separators'. CSV data fields can be delimited by quote 69 ;; characters (and must if they contain separator characters). This 70 ;; implementation supports quoted fields, where the quote characters 71 ;; allowed are specified by the value of the customizable user option 72 ;; `csv-field-quotes'. By default, the both commas and tabs are considered 73 ;; as separators and the only field quote is a double quote. 74 ;; These user options can be changed ONLY by customizing them, e.g. via M-x 75 ;; customize-variable. 76 77 ;; CSV mode commands ignore blank lines and comment lines beginning 78 ;; with the value of the buffer local variable `csv-comment-start', 79 ;; which by default is #. The user interface is similar to that of 80 ;; the standard commands `sort-fields' and `sort-numeric-fields', but 81 ;; see the major mode documentation below. 82 83 ;; The global minor mode `csv-field-index-mode' provides display of 84 ;; the current field index in the mode line, cf. `line-number-mode' 85 ;; and `column-number-mode'. It is on by default. 86 87 ;;;; See also: 88 89 ;; the standard GNU Emacs 21 packages align.el, which will align 90 ;; columns within a region, and delim-col.el, which helps to prettify 91 ;; columns in a text region or rectangle; 92 93 ;; csv.el by Ulf Jasper <ulf.jasper at web.de>, which provides 94 ;; functions for reading/parsing comma-separated value files and is 95 ;; available at http://de.geocities.com/ulf_jasper/emacs.html (and in 96 ;; the gnu.emacs.sources archives). 97 98 ;;; Installation: 99 100 ;; Put this file somewhere that Emacs can find it (i.e. in one of the 101 ;; directories in your `load-path' such as `site-lisp'), optionally 102 ;; byte-compile it (recommended), and put this in your .emacs file: 103 ;; 104 ;; (add-to-list 'auto-mode-alist '("\\.[Cc][Ss][Vv]\\'" . csv-mode)) 105 ;; (autoload 'csv-mode "csv-mode" 106 ;; "Major mode for editing comma-separated value files." t) 107 108 ;;; News: 109 110 ;; Since 1.25: 111 ;; - The ASCII control character 31 Unit Separator can now be 112 ;; recognized as a CSV separator by `csv-guess-separator'. 113 114 ;; Since 1.24: 115 ;; - New function `csv--unquote-value'. 116 ;; - New function `csv-parse-current-row'. 117 118 ;; Since 1.21: 119 ;; - New command `csv-insert-column'. 120 ;; - New config var `csv-align-min-width' for `csv-align-mode'. 121 ;; - New option `csv-confirm-region'. 122 123 ;; Since 1.20: 124 ;; - New command `csv-guess-set-separator' that automatically guesses 125 ;; and sets the CSV separator of the current buffer. 126 ;; - New command `csv-set-separator' for setting the CSV separator 127 ;; manually. 128 129 ;; Since 1.9: 130 ;; - `csv-align-mode' auto-aligns columns dynamically (on screen). 131 132 ;; Before that: 133 ;; Begun on 15 November 2003 to provide lexicographic sorting of 134 ;; simple CSV data by field and released as csv.el. Facilities to 135 ;; kill multiple fields and customize separator added on 9 April 2004. 136 ;; Converted to a major mode and renamed csv-mode.el on 10 April 2004, 137 ;; partly at the suggestion of Stefan Monnier <monnier at 138 ;; IRO.UMontreal.CA> to avoid conflict with csv.el by Ulf Jasper. 139 ;; Field alignment, comment support and CSV mode customization group 140 ;; added on 1 May 2004. Support for index ranges added on 6 June 141 ;; 2004. Multiple field separators added on 12 June 2004. 142 ;; Transposition added on 22 June 2004. Separator invisibility added 143 ;; on 23 June 2004. 144 145 ;;; To do (maybe): 146 147 ;; Make separators and quotes buffer-local and locally settable. 148 ;; Support (La)TeX tables: set separator and comment; support record 149 ;; end string. 150 ;; Convert comma-separated to space- or tab-separated. 151 152 ;;; Code: 153 154 (eval-when-compile 155 (require 'cl-lib) 156 (require 'subr-x)) 157 158 (defgroup CSV nil 159 "Major mode for editing files of comma-separated value type." 160 :group 'convenience) 161 162 (defvar csv-separator-chars nil 163 "Field separators as a list of character. 164 Set by customizing `csv-separators' -- do not set directly!") 165 166 (defvar csv-separator-regexp nil 167 "Regexp to match a field separator. 168 Set by customizing `csv-separators' -- do not set directly!") 169 170 (defvar csv--skip-chars nil 171 "Char set used by `skip-chars-forward' etc. to skip fields. 172 Set by customizing `csv-separators' -- do not set directly!") 173 174 (defvar csv-font-lock-keywords nil 175 "Font lock keywords to highlight the field separators in CSV mode. 176 Set by customizing `csv-separators' -- do not set directly!") 177 178 (defcustom csv-separators '("," "\t") 179 "Field separators: a list of *single-character* strings. 180 For example: (\",\"), the default, or (\",\" \";\" \":\"). 181 Neighbouring fields may be separated by any one of these characters. 182 The first is used when inserting a field separator into the buffer. 183 All must be different from the field quote characters, `csv-field-quotes'. 184 185 Changing this variable with `setq' won't affect the current Emacs 186 session. Use `customize-set-variable' instead if that is required." 187 ;; Suggested by Eckhard Neber <neber@mwt.e-technik.uni-ulm.de> 188 :type '(repeat string) 189 ;; FIXME: Character would be better, but in Emacs 21.3 does not display 190 ;; correctly in a customization buffer. 191 :set (lambda (variable value) 192 (mapc (lambda (x) 193 (if (/= (length x) 1) 194 (error "Non-single-char string %S" x)) 195 (if (and (boundp 'csv-field-quotes) 196 (member x csv-field-quotes)) 197 (error "%S is already a quote" x))) 198 value) 199 (custom-set-default variable value) 200 (setq csv-separator-chars (mapcar #'string-to-char value)) 201 (setq csv--skip-chars 202 (apply #'concat "^\n" 203 (mapcar (lambda (s) (concat "\\" s)) value))) 204 (setq csv-separator-regexp (regexp-opt value)) 205 (setq csv-font-lock-keywords 206 ;; NB: csv-separator-face variable evaluates to itself. 207 `((,csv-separator-regexp (0 'csv-separator-face)))))) 208 209 (defcustom csv-field-quotes '("\"") 210 "Field quotes: a list of *single-character* strings. 211 For example: (\"\\\"\"), the default, or (\"\\\"\" \"\\='\" \"\\=`\"). 212 A field can be delimited by a pair of any of these characters. 213 All must be different from the field separators, `csv-separators'." 214 :type '(repeat string) 215 ;; Character would be better, but in Emacs 21 does not display 216 ;; correctly in a customization buffer. 217 :set (lambda (variable value) 218 (mapc (lambda (x) 219 (if (/= (length x) 1) 220 (error "Non-single-char string %S" x)) 221 (if (member x csv-separators) 222 (error "%S is already a separator" x))) 223 value) 224 (when (boundp 'csv-mode-syntax-table) 225 ;; FIRST remove old quote syntax: 226 (with-syntax-table text-mode-syntax-table 227 (mapc (lambda (x) 228 (modify-syntax-entry 229 (string-to-char x) 230 (string (char-syntax (string-to-char x))) 231 ;; symbol-value to avoid compiler warning: 232 (symbol-value 'csv-mode-syntax-table))) 233 csv-field-quotes)) 234 ;; THEN set new quote syntax: 235 (csv-set-quote-syntax value)) 236 ;; BEFORE setting new value of `csv-field-quotes': 237 (custom-set-default variable value))) 238 239 (defun csv-set-quote-syntax (field-quotes) 240 "Set syntax for field quote characters FIELD-QUOTES to be \"string\". 241 FIELD-QUOTES should be a list of single-character strings." 242 (mapc (lambda (x) 243 (modify-syntax-entry 244 (string-to-char x) "\"" 245 ;; symbol-value to avoid compiler warning: 246 (symbol-value 'csv-mode-syntax-table))) 247 field-quotes)) 248 249 (defvar csv-comment-start nil 250 "String that starts a comment line, or nil if no comment syntax. 251 Such comment lines are ignored by CSV mode commands. 252 This variable is buffer local; its default value is that of 253 `csv-comment-start-default'. It is set by the function 254 `csv-set-comment-start' -- do not set it directly!") 255 256 (make-variable-buffer-local 'csv-comment-start) 257 258 (defcustom csv-comment-start-default "#" 259 "String that starts a comment line, or nil if no comment syntax. 260 Such comment lines are ignored by CSV mode commands. 261 Default value of buffer-local variable `csv-comment-start'. 262 Changing this variable does not affect any existing CSV mode buffer." 263 :type '(choice (const :tag "None" nil) string) 264 :set (lambda (variable value) 265 (custom-set-default variable value) 266 (setq-default csv-comment-start value))) 267 268 (defcustom csv-align-style 'left 269 "Aligned field style: one of `left', `centre', `right' or `auto'. 270 Alignment style used by `csv-align-mode' and `csv-align-fields'. 271 Auto-alignment means left align text and right align numbers." 272 :type '(choice (const left) (const centre) 273 (const right) (const auto))) 274 275 (defcustom csv-align-padding 1 276 "Aligned field spacing: must be a positive integer. 277 Number of spaces used by `csv-align-mode' and `csv-align-fields' 278 after separators." 279 :type 'integer) 280 281 (defcustom csv-header-lines 0 282 "Header lines to skip when setting region automatically." 283 :type 'integer) 284 285 (defcustom csv-invisibility-default t 286 "If non-nil, make separators in aligned records invisible." 287 :type 'boolean) 288 289 (defcustom csv-confirm-region t 290 "If non-nil, confirm that region is OK in interactive commands." 291 :type 'boolean) 292 293 (defface csv-separator-face 294 '((t :inherit escape-glyph)) 295 "CSV mode face used to highlight separators.") 296 297 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 298 ;;; Mode definition, key bindings and menu 299 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 300 301 302 (defconst csv-mode-line-format 303 '(csv-field-index-string ("" csv-field-index-string)) 304 "Mode line format string for CSV mode.") 305 306 (defvar csv-mode-map 307 (let ((map (make-sparse-keymap))) 308 (define-key map [(control ?c) (control ?v)] #'csv-toggle-invisibility) 309 (define-key map [(control ?c) (control ?t)] #'csv-transpose) 310 (define-key map [(control ?c) (control ?c)] #'csv-set-comment-start) 311 (define-key map [(control ?c) (control ?u)] #'csv-unalign-fields) 312 (define-key map [(control ?c) (control ?a)] #'csv-align-fields) 313 (define-key map [(control ?c) (control ?z)] #'csv-yank-as-new-table) 314 (define-key map [(control ?c) (control ?y)] #'csv-yank-fields) 315 (define-key map [(control ?c) (control ?k)] #'csv-kill-fields) 316 (define-key map [(control ?c) (control ?d)] #'csv-toggle-descending) 317 (define-key map [(control ?c) (control ?r)] #'csv-reverse-region) 318 (define-key map [(control ?c) (control ?n)] #'csv-sort-numeric-fields) 319 (define-key map [(control ?c) (control ?s)] #'csv-sort-fields) 320 (define-key map "\t" #'csv-tab-command) 321 (define-key map [backtab] #'csv-backtab-command) 322 map)) 323 324 ;;;###autoload 325 (define-derived-mode csv-mode text-mode "CSV" 326 "Major mode for editing files of comma-separated value type. 327 328 CSV mode is derived from `text-mode', and runs `text-mode-hook' before 329 running `csv-mode-hook'. It turns `auto-fill-mode' off by default. 330 CSV mode can be customized by user options in the CSV customization 331 group. The separators are specified by the value of `csv-separators'. 332 333 CSV mode commands ignore blank lines and comment lines beginning with 334 the value of `csv-comment-start', which delimit \"paragraphs\". 335 \"Sexp\" is re-interpreted to mean \"field\", so that `forward-sexp' 336 \(\\[forward-sexp]), `kill-sexp' (\\[kill-sexp]), etc. all apply to fields. 337 Standard comment commands apply, such as `comment-dwim' (\\[comment-dwim]). 338 339 If `font-lock-mode' is enabled then separators, quoted values and 340 comment lines are highlighted using respectively `csv-separator-face', 341 `font-lock-string-face' and `font-lock-comment-face'. 342 343 The user interface (UI) for CSV mode commands is similar to that of 344 the standard commands `sort-fields' and `sort-numeric-fields', except 345 that if there is no prefix argument then the UI prompts for the field 346 index or indices. In `transient-mark-mode' only: if the region is not 347 set then the UI attempts to set it to include all consecutive CSV 348 records around point, and prompts for confirmation; if there is no 349 prefix argument then the UI prompts for it, offering as a default the 350 index of the field containing point if the region was not set 351 explicitly. The region set automatically is delimited by blank lines 352 and comment lines, and the number of header lines at the beginning of 353 the region given by the value of `csv-header-lines' are skipped. 354 355 Sort order is controlled by `csv-descending'. 356 357 CSV mode provides the following specific keyboard key bindings: 358 359 \\{csv-mode-map}" 360 :group 'CSV 361 ;; We used to `turn-off-auto-fill' here instead, but that's not very 362 ;; effective since text-mode-hook is run afterwards anyway! 363 (setq-local normal-auto-fill-function nil) 364 ;; Set syntax for field quotes: 365 (csv-set-quote-syntax csv-field-quotes) 366 ;; Make sexp functions apply to fields: 367 (set (make-local-variable 'forward-sexp-function) #'csv-forward-field) 368 (csv-set-comment-start csv-comment-start) 369 ;; Font locking -- separator plus syntactic: 370 (setq font-lock-defaults '(csv-font-lock-keywords)) 371 (setq-local jit-lock-contextually nil) ;Each line should be independent. 372 (if csv-invisibility-default (add-to-invisibility-spec 'csv)) 373 ;; Mode line to support `csv-field-index-mode': 374 (set (make-local-variable 'mode-line-position) 375 (pcase mode-line-position 376 (`(,(or (pred consp) (pred stringp)) . ,_) 377 `(,@mode-line-position ,csv-mode-line-format)) 378 (_ `("" ,mode-line-position ,csv-mode-line-format)))) 379 (set (make-local-variable 'truncate-lines) t) 380 ;; Enable or disable `csv-field-index-mode' (could probably do this 381 ;; a bit more efficiently): 382 (csv-field-index-mode (symbol-value 'csv-field-index-mode))) 383 384 (defun csv-set-comment-start (string) 385 "Set comment start for this CSV mode buffer to STRING. 386 It must be either a string or nil." 387 (interactive 388 (list (edit-and-eval-command 389 "Comment start (string or nil): " csv-comment-start))) 390 ;; Paragraph means a group of contiguous records: 391 (set (make-local-variable 'paragraph-separate) "[[:space:]]*$") ; White space. 392 (set (make-local-variable 'paragraph-start) "\n");Must include \n explicitly! 393 ;; Remove old comment-start/end if available 394 (with-syntax-table text-mode-syntax-table 395 (when comment-start 396 (modify-syntax-entry (string-to-char comment-start) 397 (string (char-syntax (string-to-char comment-start))) 398 csv-mode-syntax-table)) 399 (modify-syntax-entry ?\n 400 (string (char-syntax ?\n)) 401 csv-mode-syntax-table)) 402 (when string 403 (setq paragraph-separate (concat paragraph-separate "\\|" string) 404 paragraph-start (concat paragraph-start "\\|" string)) 405 (set (make-local-variable 'comment-start) string) 406 (modify-syntax-entry 407 (string-to-char string) "<" csv-mode-syntax-table) 408 (modify-syntax-entry ?\n ">" csv-mode-syntax-table)) 409 (setq csv-comment-start string)) 410 411 (defvar csv--set-separator-history nil) 412 413 (defun csv-set-separator (sep) 414 "Set the CSV separator in the current buffer to SEP." 415 (interactive (list (read-char-from-minibuffer 416 "Separator: " nil 'csv--set-separator-history))) 417 (when (and (boundp 'csv-field-quotes) 418 (member (string sep) csv-field-quotes)) 419 (error "%c is already a quote" sep)) 420 (setq-local csv-separators (list (string sep))) 421 (setq-local csv-separator-chars (list sep)) 422 (setq-local csv--skip-chars (format "^\n\\%c" sep)) 423 (setq-local csv-separator-regexp (regexp-quote (string sep))) 424 (setq-local csv-font-lock-keywords 425 `((,csv-separator-regexp (0 'csv-separator-face)))) 426 (font-lock-refresh-defaults)) 427 428 ;;;###autoload 429 (add-to-list 'auto-mode-alist '("\\.[Cc][Ss][Vv]\\'" . csv-mode)) 430 431 (defvar csv-descending nil 432 "If non-nil, CSV mode sort functions sort in order of descending sort key. 433 Usually they sort in order of ascending sort key.") 434 435 (defun csv-toggle-descending () 436 "Toggle `csv-descending'." 437 (interactive) 438 (setq csv-descending (not csv-descending)) 439 (message "Sort order is %sscending" (if csv-descending "de" "a"))) 440 441 (defun csv-toggle-invisibility () 442 ;; FIXME: Make it into a proper minor mode? 443 "Toggle `buffer-invisibility-spec'." 444 (interactive) 445 (if (memq 'csv buffer-invisibility-spec) 446 (remove-from-invisibility-spec 'csv) 447 (add-to-invisibility-spec 'csv)) 448 (message "Separators in aligned records will be %svisible \ 449 \(after re-aligning if soft)" 450 (if (memq 'csv buffer-invisibility-spec) "in" "")) 451 (redraw-frame (selected-frame))) 452 453 (easy-menu-define 454 csv-menu 455 csv-mode-map 456 "CSV major mode menu keymap" 457 '("CSV" 458 ["Sort By Field Lexicographically" csv-sort-fields :active t 459 :help "Sort lines in region lexicographically by the specified field"] 460 ["Sort By Field Numerically" csv-sort-numeric-fields :active t 461 :help "Sort lines in region numerically by the specified field"] 462 ["Reverse Order of Lines" csv-reverse-region :active t 463 :help "Reverse the order of the lines in the region"] 464 ["Use Descending Sort Order" csv-toggle-descending :active t 465 :style toggle :selected csv-descending 466 :help "If selected, use descending order when sorting"] 467 "--" 468 ["Kill Fields (Columns)" csv-kill-fields :active t 469 :help "Kill specified fields of each line in the region"] 470 ["Yank Fields (Columns)" csv-yank-fields :active t 471 :help "Yank killed fields as specified field of each line in region"] 472 ["Yank As New Table" csv-yank-as-new-table :active t 473 :help "Yank killed fields as a new table at point"] 474 ["Align Fields into Columns" csv-align-fields :active t 475 :help "Align the start of every field of each line in the region"] 476 ["Unalign Columns into Fields" csv-unalign-fields :active t 477 :help "Undo soft alignment and optionally remove redundant white space"] 478 ["Transpose Rows and Columns" csv-transpose :active t 479 :help "Rewrite rows (which may have different lengths) as columns"] 480 "--" 481 ["Forward Field" forward-sexp :active t 482 :help "Move forward across one field; with ARG, do it that many times"] 483 ["Backward Field" backward-sexp :active t 484 :help "Move backward across one field; with ARG, do it that many times"] 485 ["Kill Field Forward" kill-sexp :active t 486 :help "Kill field following cursor; with ARG, do it that many times"] 487 ["Kill Field Backward" backward-kill-sexp :active t 488 :help "Kill field preceding cursor; with ARG, do it that many times"] 489 "--" 490 ("Alignment Style" 491 ["Left" (setq csv-align-style 'left) :active t 492 :style radio :selected (eq csv-align-style 'left) 493 :help "If selected, `csv-align' left aligns fields"] 494 ["Centre" (setq csv-align-style 'centre) :active t 495 :style radio :selected (eq csv-align-style 'centre) 496 :help "If selected, `csv-align' centres fields"] 497 ["Right" (setq csv-align-style 'right) :active t 498 :style radio :selected (eq csv-align-style 'right) 499 :help "If selected, `csv-align' right aligns fields"] 500 ["Auto" (setq csv-align-style 'auto) :active t 501 :style radio :selected (eq csv-align-style 'auto) 502 :help "\ 503 If selected, `csv-align' left aligns text and right aligns numbers"] 504 ) 505 ["Set header line" csv-header-line :active t] 506 ["Auto-(re)align fields" csv-align-mode 507 :style toggle :selected csv-align-mode] 508 ["Show Current Field Index" csv-field-index-mode :active t 509 :style toggle :selected csv-field-index-mode 510 :help "If selected, display current field index in mode line"] 511 ["Make Separators Invisible" csv-toggle-invisibility :active t 512 :style toggle :selected (memq 'csv buffer-invisibility-spec) 513 :visible (not (tsv--mode-p)) 514 :help "If selected, separators in aligned records are invisible"] 515 ["Set Buffer's Comment Start" csv-set-comment-start :active t 516 :help "Set comment start string for this buffer"] 517 ["Customize CSV Mode" (customize-group 'CSV) :active t 518 :help "Open a customization buffer to change CSV mode options"] 519 )) 520 521 (require 'sort) 522 523 (defsubst csv-not-looking-at-record () 524 "Return t if looking at blank or comment line, nil otherwise. 525 Assumes point is at beginning of line." 526 (looking-at paragraph-separate)) 527 528 (defun csv-interactive-args (&optional type) 529 "Get arg or field(s) and region interactively, offering sensible defaults. 530 Signal an error if the buffer is read-only. 531 If TYPE is noarg then return a list (beg end). 532 Otherwise, return a list (arg beg end), where arg is: 533 the raw prefix argument by default; 534 a single field index if TYPE is single; 535 a list of field indices or index ranges if TYPE is multiple. 536 Field defaults to the current prefix arg; if not set, prompt user. 537 538 A field index list consists of positive or negative integers or ranges, 539 separated by any non-integer characters. A range has the form m-n, 540 where m and n are positive or negative integers, m < n, and n defaults 541 to the last field index if omitted. 542 543 In transient mark mode, if the mark is not active then automatically 544 select and highlight CSV records around point, and query user. 545 The default field when read interactively is the current field." 546 ;; Must be run interactively to activate mark! 547 (let* ((arg current-prefix-arg) (default-field 1) 548 (region 549 (if (not (use-region-p)) 550 ;; Set region automatically: 551 (save-excursion 552 (if arg 553 (beginning-of-line) 554 (let ((lbp (line-beginning-position))) 555 (while (re-search-backward csv-separator-regexp lbp 1) 556 ;; Move as far as possible, i.e. to beginning of line. 557 (setq default-field (1+ default-field))))) 558 (if (csv-not-looking-at-record) 559 (error "Point must be within CSV records")) 560 (let ((startline (point))) 561 ;; Set mark at beginning of region: 562 (while (not (or (bobp) (csv-not-looking-at-record))) 563 (forward-line -1)) 564 (if (csv-not-looking-at-record) (forward-line 1)) 565 ;; Skip header lines: 566 (forward-line csv-header-lines) 567 (set-mark (point)) ; OK since in save-excursion 568 ;; Move point to end of region: 569 (goto-char startline) 570 (beginning-of-line) 571 (while (not (or (eobp) (csv-not-looking-at-record))) 572 (forward-line 1)) 573 ;; Show mark briefly if necessary: 574 (unless (and (pos-visible-in-window-p) 575 (pos-visible-in-window-p (mark))) 576 (exchange-point-and-mark) 577 (sit-for 1) 578 (exchange-point-and-mark)) 579 (when csv-confirm-region 580 (or (y-or-n-p "Region OK? ") 581 (error "Action aborted by user")) 582 (message nil)) ; clear y-or-n-p message 583 (list (region-beginning) (region-end)))) 584 ;; Use region set by user: 585 (list (region-beginning) (region-end))))) 586 (setq default-field (number-to-string default-field)) 587 (cond 588 ((eq type 'multiple) 589 (if arg 590 ;; Ensure that field is a list: 591 (or (consp arg) 592 (setq arg (list (prefix-numeric-value arg)))) 593 ;; Read field interactively, ignoring non-integers: 594 (setq arg 595 (mapcar 596 (lambda (x) 597 (if (string-match "-" x 1) ; not first character 598 ;; Return a range as a pair - the cdr may be nil: 599 (let ((m (substring x 0 (match-beginning 0))) 600 (n (substring x (match-end 0)))) 601 (cons (car (read-from-string m)) 602 (and (not (string= n "")) 603 (car (read-from-string n))))) 604 ;; Return a number as a number: 605 (car (read-from-string x)))) 606 (split-string 607 (read-string 608 "Fields (sequence of integers or ranges): " default-field) 609 "[^-+0-9]+"))))) 610 ((eq type 'single) 611 (if arg 612 (setq arg (prefix-numeric-value arg)) 613 (while (not (integerp arg)) 614 (setq arg (eval-minibuffer "Field (integer): " default-field)))))) 615 (if (eq type 'noarg) region (cons arg region)))) 616 617 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 618 ;;; Sorting by field 619 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 620 621 (defun csv-nextrecfun () 622 "Called by `csv-sort-fields-1' with point at end of previous record. 623 It moves point to the start of the next record. 624 It should move point to the end of the buffer if there are no more records." 625 (forward-line) 626 (while (and (not (eobp)) (csv-not-looking-at-record)) 627 (forward-line))) 628 629 (defun csv-sort-fields-1 (field beg end startkeyfun endkeyfun) 630 "Modified version of `sort-fields-1' that skips blank or comment lines. 631 632 FIELD is a single field index, and BEG and END specify the region to 633 sort. 634 635 STARTKEYFUN moves from the start of the record to the start of the key. 636 It may return either a non-nil value to be used as the key, or 637 else the key is the substring between the values of point after 638 STARTKEYFUN and ENDKEYFUN are called. If STARTKEYFUN is nil, the key 639 starts at the beginning of the record. 640 641 ENDKEYFUN moves from the start of the sort key to the end of the sort key. 642 ENDKEYFUN may be nil if STARTKEYFUN returns a value or if it would be the 643 same as ENDRECFUN." 644 (let ((tbl (syntax-table))) 645 (if (zerop field) (setq field 1)) 646 (unwind-protect 647 (save-excursion 648 (save-restriction 649 (narrow-to-region beg end) 650 (goto-char (point-min)) 651 (set-syntax-table sort-fields-syntax-table) 652 (sort-subr csv-descending 653 'csv-nextrecfun 'end-of-line 654 startkeyfun endkeyfun))) 655 (set-syntax-table tbl)))) 656 657 (defun csv-sort-fields (field beg end) 658 "Sort lines in region lexicographically by the ARGth field of each line. 659 If not set, the region defaults to the CSV records around point. 660 Fields are separated by `csv-separators' and null fields are allowed anywhere. 661 Field indices increase from 1 on the left or decrease from -1 on the right. 662 A prefix argument specifies a single field, otherwise prompt for field index. 663 Ignore blank and comment lines. The variable `sort-fold-case' 664 determines whether alphabetic case affects the sort order. 665 When called non-interactively, FIELD is a single field index; 666 BEG and END specify the region to sort." 667 ;; (interactive "*P\nr") 668 (interactive (csv-interactive-args 'single)) 669 (barf-if-buffer-read-only) 670 (csv-sort-fields-1 field beg end 671 (lambda () (csv-sort-skip-fields field) nil) 672 (lambda () (skip-chars-forward csv--skip-chars)))) 673 674 (defun csv-sort-numeric-fields (field beg end) 675 "Sort lines in region numerically by the ARGth field of each line. 676 If not set, the region defaults to the CSV records around point. 677 Fields are separated by `csv-separators'. 678 Null fields are allowed anywhere and sort as zeros. 679 Field indices increase from 1 on the left or decrease from -1 on the right. 680 A prefix argument specifies a single field, otherwise prompt for field index. 681 Specified non-null field must contain a number in each line of the region, 682 which may begin with \"0x\" or \"0\" for hexadecimal and octal values. 683 Otherwise, the number is interpreted according to sort-numeric-base. 684 Ignore blank and comment lines. 685 When called non-interactively, FIELD is a single field index; 686 BEG and END specify the region to sort." 687 ;; (interactive "*P\nr") 688 (interactive (csv-interactive-args 'single)) 689 (barf-if-buffer-read-only) 690 (csv-sort-fields-1 field beg end 691 (lambda () 692 (csv-sort-skip-fields field) 693 (let* ((case-fold-search t) 694 (base 695 (if (looking-at "\\(0x\\)[0-9a-f]\\|\\(0\\)[0-7]") 696 (cond ((match-beginning 1) 697 (goto-char (match-end 1)) 698 16) 699 ((match-beginning 2) 700 (goto-char (match-end 2)) 701 8) 702 (t nil))))) 703 (string-to-number (buffer-substring (point) 704 (save-excursion 705 (forward-sexp 1) 706 (point))) 707 (or base sort-numeric-base)))) 708 nil)) 709 710 (defun csv-reverse-region (beg end) 711 "Reverse the order of the lines in the region. 712 This is just a CSV-mode style interface to `reverse-region', which is 713 the function that should be used non-interactively. It takes two 714 point or marker arguments, BEG and END, delimiting the region." 715 ;; (interactive "*P\nr") 716 (interactive (csv-interactive-args 'noarg)) 717 (barf-if-buffer-read-only) 718 (reverse-region beg end)) 719 720 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 721 ;;; Moving by field 722 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 723 724 (defun csv-end-of-field () 725 "Skip forward over one field." 726 (skip-chars-forward " ") 727 ;; If the first character is a double quote, then we have a quoted 728 ;; value. 729 (when (eq (char-syntax (following-char)) ?\") 730 (forward-char) 731 (let ((ended nil)) 732 (while (not ended) 733 (cond ((not (eq (char-syntax (following-char)) ?\")) 734 (forward-char 1)) 735 ;; According to RFC-4180 (sec 2.7), quotes inside quoted strings 736 ;; are quoted by doubling the quote char: a,"b""c,",d 737 ;; FIXME: Maybe we should handle this via syntax-propertize? 738 ((let ((c (char-after (1+ (point))))) 739 (and c (eq (char-syntax c) ?\"))) 740 (forward-char 2)) 741 (t 742 (setq ended t)))))) 743 (skip-chars-forward csv--skip-chars)) 744 745 (defun csv--bof-p () 746 (or (bolp) 747 (memq (preceding-char) csv-separator-chars))) 748 749 (defun csv--eof-p () 750 (or (eolp) 751 (memq (following-char) csv-separator-chars))) 752 753 (defun csv-beginning-of-field () 754 "Skip backward over one field." 755 (skip-syntax-backward " ") 756 (if (eq (char-syntax (preceding-char)) ?\") 757 (goto-char (scan-sexps (point) -1))) 758 (skip-chars-backward csv--skip-chars)) 759 760 (defun csv-forward-field (arg) 761 "Move forward across one field, cf. `forward-sexp'. 762 With ARG, do it that many times. Negative arg -N means 763 move backward across N fields." 764 (interactive "p") 765 (if (< arg 0) 766 (csv-backward-field (- arg)) 767 (while (>= (setq arg (1- arg)) 0) 768 (if (or (bolp) 769 (when (and (not (eobp)) (eolp)) (forward-char) t)) 770 (while (and (not (eobp)) (csv-not-looking-at-record)) 771 (forward-line 1))) 772 (if (memq (following-char) csv-separator-chars) (forward-char)) 773 (csv-end-of-field)))) 774 775 (defun csv-backward-field (arg) 776 "Move backward across one field, cf. `backward-sexp'. 777 With ARG, do it that many times. Negative arg -N means 778 move forward across N fields." 779 (interactive "p") 780 (if (< arg 0) 781 (csv-forward-field (- arg)) 782 (while (>= (setq arg (1- arg)) 0) 783 (when (or (eolp) 784 (when (and (not (bobp)) (bolp)) (backward-char) t)) 785 (while (progn 786 (beginning-of-line) 787 (csv-not-looking-at-record)) 788 (backward-char)) 789 (end-of-line)) 790 (if (memq (preceding-char) csv-separator-chars) (backward-char)) 791 (csv-beginning-of-field)))) 792 793 (defun csv-tab-command () 794 "Skip to the next field on the same line. 795 Create a new field at end of line, if needed." 796 (interactive) 797 (skip-chars-forward csv--skip-chars) 798 (if (eolp) 799 (insert (car csv-separators)) 800 (forward-char 1))) 801 802 (defun csv-backtab-command () 803 "Skip to the beginning of the previous field." 804 (interactive) 805 (skip-chars-backward csv--skip-chars) 806 (forward-char -1) 807 (skip-chars-backward csv--skip-chars)) 808 809 (defun csv-sort-skip-fields (n &optional yank) 810 "Position point at the beginning of field N on the current line. 811 Fields are separated by `csv-separators'; null terminal field allowed. 812 Assumes point is initially at the beginning of the line. 813 YANK non-nil allows N to be greater than the number of fields, in 814 which case extend the record as necessary." 815 (if (> n 0) 816 ;; Skip across N - 1 fields. 817 (let ((i (1- n))) 818 (while (> i 0) 819 (csv-end-of-field) 820 (if (eolp) 821 (if yank 822 (if (> i 1) (insert (car csv-separators))) 823 (error "Line has too few fields: %s" 824 (buffer-substring 825 (save-excursion (beginning-of-line) (point)) 826 (save-excursion (end-of-line) (point))))) 827 (forward-char)) ; skip separator 828 (setq i (1- i)))) 829 (end-of-line) 830 ;; Skip back across -N - 1 fields. 831 (let ((i (1- (- n)))) 832 (while (> i 0) 833 (csv-beginning-of-field) 834 (if (bolp) 835 (error "Line has too few fields: %s" 836 (buffer-substring 837 (save-excursion (beginning-of-line) (point)) 838 (save-excursion (end-of-line) (point))))) 839 (backward-char) ; skip separator 840 (setq i (1- i))) 841 ;; Position at the front of the field 842 ;; even if moving backwards. 843 (csv-beginning-of-field)))) 844 845 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 846 ;;; Field index mode 847 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 848 849 ;; Based partly on paren.el 850 851 (defcustom csv-field-index-delay 0.125 852 "Time in seconds to delay before updating field index display." 853 :type '(number :tag "seconds")) 854 855 (defvar csv-field-index-idle-timer nil) 856 857 (defvar csv-field-index-string nil) 858 (make-variable-buffer-local 'csv-field-index-string) 859 860 (defvar csv-field-index-old nil) 861 (make-variable-buffer-local 'csv-field-index-old) 862 863 (define-minor-mode csv-field-index-mode 864 "Toggle CSV-Field-Index mode. 865 With prefix ARG, turn CSV-Field-Index mode on if and only if ARG is positive. 866 Returns the new status of CSV-Field-Index mode (non-nil means on). 867 When CSV-Field-Index mode is enabled, the current field index appears in 868 the mode line after `csv-field-index-delay' seconds of Emacs idle time." 869 :global t 870 :init-value t ; for documentation, since default is t 871 ;; This macro generates a function that first sets the mode 872 ;; variable, then runs the following code, runs the mode hooks, 873 ;; displays a message if interactive, updates the mode line and 874 ;; finally returns the variable value. 875 876 ;; First, always disable the mechanism (to avoid having two timers): 877 (when csv-field-index-idle-timer 878 (cancel-timer csv-field-index-idle-timer) 879 (setq csv-field-index-idle-timer nil)) 880 ;; Now, if the mode is on and any buffer is in CSV mode then 881 ;; re-initialize and enable the mechanism by setting up a new timer: 882 (if csv-field-index-mode 883 (if (memq t (mapcar (lambda (buffer) 884 (with-current-buffer buffer 885 (when (derived-mode-p 'csv-mode) 886 (setq csv-field-index-string nil 887 csv-field-index-old nil) 888 t))) 889 (buffer-list))) 890 (setq csv-field-index-idle-timer 891 (run-with-idle-timer csv-field-index-delay t 892 #'csv-field-index))) 893 ;; but if the mode is off then remove the display from the mode 894 ;; lines of all CSV buffers: 895 (mapc (lambda (buffer) 896 (with-current-buffer buffer 897 (when (derived-mode-p 'csv-mode) 898 (setq csv-field-index-string nil 899 csv-field-index-old nil) 900 (force-mode-line-update)))) 901 (buffer-list)))) 902 903 (defun csv--field-index () 904 (save-excursion 905 (let ((start (point)) 906 (field 0)) 907 (beginning-of-line) 908 (while (and (<= (point) start) 909 (not (eolp))) 910 (csv-end-of-field) 911 (unless (eolp) 912 (forward-char 1)) 913 (setq field (1+ field))) 914 field))) 915 916 (defun csv-field-index () 917 "Construct `csv-field-index-string' to display in mode line. 918 Called by `csv-field-index-idle-timer'." 919 (if (derived-mode-p 'csv-mode) 920 (let ((field (csv--field-index))) 921 (when (not (eq field csv-field-index-old)) 922 (setq csv-field-index-old field 923 csv-field-index-string 924 (and field (format "F%d" field))) 925 (force-mode-line-update))))) 926 927 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 928 ;;; Killing and yanking fields 929 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 930 931 (defvar csv-killed-fields nil 932 "A list of the fields or sub-records last killed by `csv-kill-fields'.") 933 934 (defun csv-kill-fields (fields beg end) 935 "Kill specified fields of each line in the region. 936 If not set, the region defaults to the CSV records around point. 937 Fields are separated by `csv-separators' and null fields are allowed anywhere. 938 Field indices increase from 1 on the left or decrease from -1 on the right. 939 The fields are stored for use by `csv-yank-fields'. Fields can be 940 specified in any order but are saved in increasing index order. 941 Ignore blank and comment lines. 942 943 When called interactively, a prefix argument specifies a single field, 944 otherwise prompt for a field list, which may include ranges in the form 945 m-n, where m < n and n defaults to the last field index if omitted. 946 947 When called non-interactively, FIELDS is a single field index or a 948 list of field indices, with ranges specified as (m.n) or (m), and BEG 949 and END specify the region to process." 950 ;; (interactive "*P\nr") 951 (interactive (csv-interactive-args 'multiple)) 952 (barf-if-buffer-read-only) 953 ;; Kill the field(s): 954 (setq csv-killed-fields nil) 955 (save-excursion 956 (save-restriction 957 (narrow-to-region beg end) 958 (goto-char (point-min)) 959 (if (or (cdr fields) (consp (car fields))) 960 (csv-kill-many-columns fields) 961 (csv-kill-one-column (car fields))))) 962 (setq csv-killed-fields (nreverse csv-killed-fields))) 963 964 (defun csv-kill-one-field (field) 965 "Kill field with index FIELD in current line. 966 Return killed text. Assumes point is at beginning of line." 967 ;; Move to start of field to kill: 968 (csv-sort-skip-fields field) 969 ;; Kill to end of field (cf. `kill-region'): 970 (prog1 (delete-and-extract-region 971 (point) 972 (progn (csv-end-of-field) (point))) 973 (if (eolp) 974 (unless (bolp) (delete-char -1)) ; Delete trailing separator at eol 975 (delete-char 1)))) ; or following separator otherwise. 976 977 (defun csv-kill-one-column (field) 978 "Kill field with index FIELD in all lines in (narrowed) buffer. 979 Save killed fields in `csv-killed-fields'. 980 Assumes point is at `point-min'. Called by `csv-kill-fields'. 981 Ignore blank and comment lines." 982 (while (not (eobp)) 983 (or (csv-not-looking-at-record) 984 (push (csv-kill-one-field field) csv-killed-fields)) 985 (forward-line))) 986 987 (defun csv-insert-column (field) 988 "Insert an empty column at point." 989 (interactive 990 (let ((cur (csv--field-index))) 991 (list (if (and (csv--eof-p) (not (csv--bof-p))) (1+ cur) cur)))) 992 (save-excursion 993 (goto-char (point-min)) 994 (while (not (eobp)) 995 (or (csv-not-looking-at-record) 996 (progn 997 (csv-sort-skip-fields field t) 998 (insert (car csv-separators)))) 999 (forward-line 1)) 1000 (csv--jit-flush-columns))) 1001 1002 (defun csv-kill-many-columns (fields) 1003 "Kill several fields in all lines in (narrowed) buffer. 1004 FIELDS is an unordered list of field indices. 1005 Save killed fields in increasing index order in `csv-killed-fields'. 1006 Assumes point is at `point-min'. Called by `csv-kill-fields'. 1007 Ignore blank and comment lines." 1008 (if (eolp) (error "First record is empty")) 1009 ;; Convert non-positive to positive field numbers: 1010 (let ((last 1) (f fields)) 1011 (csv-end-of-field) 1012 (while (not (eolp)) 1013 (forward-char) ; skip separator 1014 (csv-end-of-field) 1015 (setq last (1+ last))) ; last = # fields in first record 1016 (while f 1017 (cond ((consp (car f)) 1018 ;; Expand a field range: (m.n) -> m m+1 ... n-1 n. 1019 ;; If n is nil then it defaults to the number of fields. 1020 (let* ((range (car f)) (cdrf (cdr f)) 1021 (m (car range)) (n (cdr range))) 1022 (if (< m 0) (setq m (+ m last 1))) 1023 (if n 1024 (if (< n 0) (setq n (+ n last 1))) 1025 (setq n last)) 1026 (setq range (list n)) 1027 (while (> n m) (push (setq n (1- n)) range)) 1028 (setcar f (car range)) 1029 (setcdr f (cdr range)) 1030 (setcdr (setq f (last range)) cdrf))) 1031 ((zerop (car f)) (setcar f 1)) 1032 ((< (car f) 0) (setcar f (+ f last 1)))) 1033 (setq f (cdr f)))) 1034 (goto-char (point-min)) 1035 ;; Kill from right to avoid miscounting: 1036 (setq fields (sort fields #'>)) 1037 (while (not (eobp)) 1038 (or (csv-not-looking-at-record) 1039 (let ((fields fields) killed-fields field) 1040 (while fields 1041 (setq field (car fields) 1042 fields (cdr fields)) 1043 (beginning-of-line) 1044 (push (csv-kill-one-field field) killed-fields)) 1045 (push (mapconcat #'identity killed-fields (car csv-separators)) 1046 csv-killed-fields))) 1047 (forward-line))) 1048 1049 (defun csv-yank-fields (field beg end) 1050 "Yank fields as the ARGth field of each line in the region. 1051 ARG may be arbitrarily large and records are extended as necessary. 1052 If not set, the region defaults to the CSV records around point; 1053 if point is not in a CSV record then offer to yank as a new table. 1054 The fields yanked are those last killed by `csv-kill-fields'. 1055 Fields are separated by `csv-separators' and null fields are allowed anywhere. 1056 Field indices increase from 1 on the left or decrease from -1 on the right. 1057 A prefix argument specifies a single field, otherwise prompt for field index. 1058 Ignore blank and comment lines. When called non-interactively, FIELD 1059 is a single field index; BEG and END specify the region to process." 1060 ;; (interactive "*P\nr") 1061 (interactive (condition-case err 1062 (csv-interactive-args 'single) 1063 (error (list nil nil err)))) 1064 (barf-if-buffer-read-only) 1065 (if (null beg) 1066 (if (y-or-n-p (concat (error-message-string end) 1067 ". Yank as a new table? ")) 1068 (csv-yank-as-new-table) 1069 (error (error-message-string end))) 1070 (if (<= field 0) (setq field (1+ field))) 1071 (save-excursion 1072 (save-restriction 1073 (narrow-to-region beg end) 1074 (goto-char (point-min)) 1075 (let ((fields csv-killed-fields)) 1076 (while (not (eobp)) 1077 (unless (csv-not-looking-at-record) 1078 ;; Yank at start of specified field if possible, 1079 ;; otherwise yank at end of record: 1080 (if (zerop field) 1081 (end-of-line) 1082 (csv-sort-skip-fields field 'yank)) 1083 (and (eolp) (insert (car csv-separators))) 1084 (when fields 1085 (insert (car fields)) 1086 (setq fields (cdr fields))) 1087 (or (eolp) (insert (car csv-separators)))) 1088 (forward-line))))))) 1089 1090 (defun csv-yank-as-new-table () 1091 "Yank fields as a new table starting at point. 1092 The fields yanked are those last killed by `csv-kill-fields'." 1093 (interactive "*") 1094 (let ((fields csv-killed-fields)) 1095 (while fields 1096 (insert (car fields) ?\n) 1097 (setq fields (cdr fields))))) 1098 1099 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1100 ;;; Aligning fields 1101 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1102 1103 (defun csv--make-overlay (beg end &optional buffer front-advance rear-advance props) 1104 (let ((o (make-overlay beg end buffer front-advance rear-advance))) 1105 (overlay-put o 'csv t) 1106 (while props 1107 (overlay-put o (pop props) (pop props))) 1108 o)) 1109 1110 (defun csv--delete-overlay (o) 1111 (and (overlay-get o 'csv) (delete-overlay o))) 1112 1113 (defun csv--column-widths (beg end) 1114 "Return a list of two lists (COLUMN-WIDTHS FIELD-WIDTHS). 1115 COLUMN-WIDTHS is a list of elements (WIDTH START END) 1116 indicating the widths of the columns after point (and the position of the 1117 widest field that determined the overall width). 1118 FIELD-WIDTHS contains the widths of each individual field after 1119 point." 1120 (let ((column-widths '()) 1121 (field-widths '())) 1122 (goto-char beg) 1123 ;; Construct list of column widths: 1124 (while (< (point) end) ; for each record... 1125 (or (csv-not-looking-at-record) 1126 (let ((w column-widths) 1127 (col (current-column)) 1128 (beg (point)) 1129 field-width) 1130 (while (not (eolp)) 1131 (csv-end-of-field) 1132 (setq field-width (- (current-column) col)) 1133 (push field-width field-widths) 1134 (if w 1135 (if (> field-width (caar w)) 1136 (setcar w (list field-width beg (point)))) 1137 (setq w (list (list field-width beg (point))) 1138 column-widths (nconc column-widths w))) 1139 (or (eolp) (forward-char)) ; Skip separator. 1140 (setq w (cdr w) col (current-column) beg (point))))) 1141 (forward-line)) 1142 (list column-widths (nreverse field-widths)))) 1143 1144 (defun csv-align-fields (hard beg end) 1145 "Align all the fields in the region to form columns. 1146 The alignment style is specified by `csv-align-style'. The number of 1147 spaces specified by `csv-align-padding' appears after each separator. 1148 Use soft alignment done by displaying virtual white space after the 1149 separators unless invoked with an argument, in which case insert real 1150 space characters into the buffer after the separators. 1151 Unalign first (see `csv-unalign-fields'). Ignore blank and comment lines. 1152 1153 In hard-aligned records, separators become invisible whenever 1154 `buffer-invisibility-spec' is non-nil. In soft-aligned records, make 1155 separators invisible if and only if `buffer-invisibility-spec' is 1156 non-nil when the records are aligned; this can be changed only by 1157 re-aligning. \(Unaligning always makes separators visible.) 1158 1159 When called non-interactively, use hard alignment if HARD is non-nil; 1160 BEG and END specify the region to align. 1161 If there is no selected region, default to the whole buffer." 1162 (interactive (cons current-prefix-arg 1163 (if (use-region-p) 1164 (list (region-beginning) (region-end)) 1165 (list (point-min) (point-max))))) 1166 ;; FIXME: Use csv--jit-align when applicable! 1167 (setq end (copy-marker end)) 1168 (csv-unalign-fields hard beg end) ; If hard then barfs if buffer read only. 1169 (save-excursion 1170 (pcase-let ((`(,column-widths ,field-widths) (csv--column-widths beg end))) 1171 (save-restriction 1172 (narrow-to-region beg end) 1173 (set-marker end nil) 1174 1175 ;; Align fields: 1176 (goto-char (point-min)) 1177 (while (not (eobp)) ; for each record... 1178 (unless (csv-not-looking-at-record) 1179 (let ((w column-widths) 1180 (column 0)) ;Desired position of left-side of this column. 1181 (while (and w (not (eolp))) 1182 (let* ((beg (point)) 1183 (align-padding (if (bolp) 0 csv-align-padding)) 1184 (left-padding 0) (right-padding 0) 1185 (field-width (pop field-widths)) 1186 (column-width (car (pop w))) 1187 (x (- column-width field-width))) ; Required padding. 1188 (csv-end-of-field) 1189 (set-marker end (point)) ; End of current field. 1190 ;; beg = beginning of current field 1191 ;; end = (point) = end of current field 1192 1193 ;; Compute required padding: 1194 (cond 1195 ((eq csv-align-style 'left) 1196 ;; Left align -- pad on the right: 1197 (setq left-padding align-padding 1198 right-padding x)) 1199 ((eq csv-align-style 'right) 1200 ;; Right align -- pad on the left: 1201 (setq left-padding (+ align-padding x))) 1202 ((eq csv-align-style 'auto) 1203 ;; Auto align -- left align text, right align numbers: 1204 (if (string-match "\\`[-+.[:digit:]]+\\'" 1205 (buffer-substring beg (point))) 1206 ;; Right align -- pad on the left: 1207 (setq left-padding (+ align-padding x)) 1208 ;; Left align -- pad on the right: 1209 (setq left-padding align-padding 1210 right-padding x))) 1211 ((eq csv-align-style 'centre) 1212 ;; Centre -- pad on both left and right: 1213 (let ((y (/ x 2))) ; truncated integer quotient 1214 (setq left-padding (+ align-padding y) 1215 right-padding (- x y))))) 1216 1217 (cond 1218 (hard ;; Hard alignment... 1219 (when (> left-padding 0) ; Pad on the left. 1220 ;; Insert spaces before field: 1221 (if (= beg end) ; null field 1222 (insert (make-string left-padding ?\ )) 1223 (goto-char beg) ; beginning of current field 1224 (insert (make-string left-padding ?\ )) 1225 (goto-char end))) ; end of current field 1226 (unless (eolp) 1227 (if (> right-padding 0) ; pad on the right 1228 ;; Insert spaces after field: 1229 (insert (make-string right-padding ?\ ))) 1230 ;; Make separator (potentially) invisible; 1231 ;; in Emacs 21.3, neighbouring overlays 1232 ;; conflict, so use the following only 1233 ;; with hard alignment: 1234 (csv--make-overlay (point) (1+ (point)) nil t nil 1235 '(invisible csv evaporate t)) 1236 (forward-char))) ; skip separator 1237 1238 ;; Soft alignment... 1239 ((or (memq 'csv buffer-invisibility-spec) 1240 ;; For TSV, hidden or not doesn't make much difference, 1241 ;; but the behavior is slightly better when we "hide" 1242 ;; the TABs with a `display' property than if we add 1243 ;; before/after-strings. 1244 (tsv--mode-p)) 1245 1246 ;; Hide separators... 1247 ;; Merge right-padding from previous field 1248 ;; with left-padding from this field: 1249 (if (zerop column) 1250 (when (> left-padding 0) 1251 ;; Display spaces before first field 1252 ;; by overlaying first character: 1253 (csv--make-overlay 1254 beg (1+ beg) nil nil nil 1255 `(before-string ,(make-string left-padding ?\ )))) 1256 ;; Display separator as spaces: 1257 (with-silent-modifications 1258 (put-text-property 1259 (1- beg) beg 1260 'display `(space :align-to 1261 ,(+ left-padding column))))) 1262 (unless (eolp) (forward-char)) ; Skip separator. 1263 (setq column (+ column column-width align-padding))) 1264 1265 (t ;; Do not hide separators... 1266 (let ((overlay (csv--make-overlay beg (point) nil nil t))) 1267 (when (> left-padding 0) ; Pad on the left. 1268 ;; Display spaces before field: 1269 (overlay-put overlay 'before-string 1270 (make-string left-padding ?\ ))) 1271 (unless (eolp) 1272 (if (> right-padding 0) ; Pad on the right. 1273 ;; Display spaces after field: 1274 (overlay-put 1275 overlay 1276 'after-string (make-string right-padding ?\ ))) 1277 (forward-char)))) ; Skip separator. 1278 1279 ))))) 1280 (forward-line))))) 1281 (set-marker end nil)) 1282 1283 (defun csv-unalign-fields (hard beg end) 1284 "Undo soft alignment and optionally remove redundant white space. 1285 Undo soft alignment introduced by `csv-align-fields'. If invoked with 1286 an argument then also remove all spaces and tabs around separators. 1287 Also make all invisible separators visible again. 1288 Ignore blank and comment lines. When called non-interactively, remove 1289 spaces and tabs if HARD non-nil; BEG and END specify region to unalign. 1290 If there is no selected region, default to the whole buffer." 1291 (interactive (cons current-prefix-arg 1292 (if (use-region-p) 1293 (list (region-beginning) (region-end)) 1294 (list (point-min) (point-max))))) 1295 ;; Remove any soft alignment: 1296 (mapc #'csv--delete-overlay (overlays-in beg end)) 1297 (with-silent-modifications 1298 (remove-list-of-text-properties beg end '(display invisible))) 1299 (when hard 1300 (barf-if-buffer-read-only) 1301 ;; Remove any white-space padding around separators: 1302 (save-excursion 1303 (save-restriction 1304 (narrow-to-region beg end) 1305 (goto-char (point-min)) 1306 (while (not (eobp)) 1307 (or (csv-not-looking-at-record) 1308 (while (not (eolp)) 1309 ;; Delete horizontal white space forward: 1310 ;; (delete-horizontal-space) 1311 ;; This relies on left-to-right argument evaluation; 1312 ;; see info node (elisp) Function Forms. 1313 (delete-region (point) 1314 (+ (point) (skip-chars-forward " \t"))) 1315 (csv-end-of-field) 1316 ;; Delete horizontal white space backward: 1317 ;; (delete-horizontal-space t) 1318 (delete-region (point) 1319 (+ (point) (skip-chars-backward " \t"))) 1320 (or (eolp) (forward-char)))) 1321 (forward-line)))))) 1322 1323 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1324 ;;; Transposing rows and columns 1325 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1326 1327 (defun csv-transpose (beg end) 1328 "Rewrite rows (which may have different lengths) as columns. 1329 Null fields are introduced as necessary within records but are 1330 stripped from the ends of records. Preserve soft alignment. 1331 This function is its own inverse. Ignore blank and comment lines. 1332 When called non-interactively, BEG and END specify region to process." 1333 ;; (interactive "*P\nr") 1334 (interactive (csv-interactive-args 'noarg)) 1335 (barf-if-buffer-read-only) 1336 (save-excursion 1337 (save-restriction 1338 (narrow-to-region beg end) 1339 (goto-char (point-min)) 1340 ;; Delete rows and collect them as a reversed list of lists of 1341 ;; fields, skipping comment and blank lines: 1342 (let ((sep (car csv-separators)) 1343 (align (overlays-in beg end)) 1344 rows columns) 1345 ;; Remove soft alignment if necessary: 1346 (when align 1347 (mapc #'csv--delete-overlay align) 1348 (setq align t)) 1349 (while (not (eobp)) 1350 (if (csv-not-looking-at-record) 1351 ;; Skip blank and comment lines: 1352 (forward-line) 1353 (let ((lep (line-end-position))) 1354 (push 1355 (csv--collect-fields lep) 1356 rows) 1357 (delete-region (point) lep) 1358 (or (eobp) (delete-char 1))))) 1359 ;; Rows must have monotonic decreasing lengths to be 1360 ;; transposable, so ensure this by padding with null fields. 1361 ;; rows is currently a reversed list of field lists, which 1362 ;; must therefore have monotonic increasing lengths. 1363 (let ((oldlen (length (car rows))) newlen 1364 (r (cdr rows))) 1365 (while r 1366 (setq newlen (length (car r))) 1367 (if (< newlen oldlen) 1368 (nconc (car r) (make-list (- oldlen newlen) nil)) 1369 (setq oldlen newlen)) 1370 (setq r (cdr r)))) 1371 ;; Collect columns as a reversed list of lists of fields: 1372 (while rows 1373 (let (column (r rows) row) 1374 (while r 1375 (setq row (car r)) 1376 ;; Provided it would not be a trailing null field, push 1377 ;; field onto column: 1378 (if (or column (string< "" (car row))) 1379 (push (car row) column)) 1380 ;; Pop field off row: 1381 (setcar r (cdr row)) 1382 ;; If row is now empty then remove it: 1383 (or (car r) (setq rows (cdr rows))) 1384 (setq r (cdr r))) 1385 (push column columns))) 1386 ;; Insert columns into buffer as rows: 1387 (setq columns (nreverse columns)) 1388 (while columns 1389 (insert (mapconcat #'identity (car columns) sep) ?\n) 1390 (setq columns (cdr columns))) 1391 ;; Re-do soft alignment if necessary: 1392 (if align (csv-align-fields nil (point-min) (point-max))))))) 1393 1394 (defun csv--collect-fields (row-end-position) 1395 "Collect the fields of a row. 1396 Splits a row into fields, honoring quoted fields, and returns 1397 the list of fields. ROW-END-POSITION is the end-of-line position. 1398 point is assumed to be at the beginning of the line." 1399 (let ((csv-field-quotes-regexp (apply #'concat `("[" ,@csv-field-quotes "]"))) 1400 (row-text (buffer-substring-no-properties (point) row-end-position)) 1401 fields field-start) 1402 (if (not (string-match csv-field-quotes-regexp row-text)) 1403 (split-string row-text csv-separator-regexp) 1404 (save-excursion 1405 (while (< (setq field-start (point)) row-end-position) 1406 ;; csv-forward-field will skip a separator if point is on 1407 ;; it, and we'll miss an empty field 1408 (unless (memq (following-char) csv-separator-chars) 1409 (csv-forward-field 1)) 1410 (push 1411 (buffer-substring-no-properties field-start (point)) 1412 fields) 1413 (if (memq (following-char) csv-separator-chars) 1414 (forward-char))) 1415 (nreverse fields))))) 1416 1417 (defun csv--unquote-value (value) 1418 "Remove quotes around VALUE. 1419 If VALUE contains escaped quote characters, un-escape them. If 1420 VALUE is not quoted, return it unchanged." 1421 (save-match-data 1422 (let ((quote-regexp (apply #'concat `("[" ,@csv-field-quotes "]")))) 1423 (if-let (((string-match (concat "^\\(" quote-regexp "\\)\\(.*\\)\\(" quote-regexp "\\)$") value)) 1424 (quote-char (match-string 1 value)) 1425 ((equal quote-char (match-string 3 value))) 1426 (unquoted (match-string 2 value))) 1427 (replace-regexp-in-string (concat quote-char quote-char) quote-char unquoted) 1428 value)))) 1429 1430 (defun csv-parse-current-row () 1431 "Parse the current CSV line. 1432 Return the field values as a list." 1433 (save-mark-and-excursion 1434 (goto-char (line-beginning-position)) 1435 (mapcar #'csv--unquote-value (csv--collect-fields (line-end-position))))) 1436 1437 (defvar-local csv--header-line nil) 1438 (defvar-local csv--header-hscroll nil) 1439 (defvar-local csv--header-string nil) 1440 1441 (defun csv-header-line (&optional use-current-line) 1442 "Set/unset the header line. 1443 If the optional prefix arg USE-CURRENT-LINE is nil, use the first line 1444 as the header line. 1445 If there is already a header line, then unset the header line." 1446 (interactive "P") 1447 (if csv--header-line 1448 (progn 1449 (delete-overlay csv--header-line) 1450 (setq csv--header-line nil) 1451 (kill-local-variable 'header-line-format)) 1452 (save-excursion 1453 (unless use-current-line (goto-char (point-min))) 1454 (setq csv--header-line (make-overlay (line-beginning-position) 1455 (line-end-position) 1456 nil nil t)) 1457 (overlay-put csv--header-line 'modification-hooks 1458 '(csv--header-flush))) 1459 (csv--header-flush) 1460 ;; These are introduced in Emacs 29. 1461 (unless (boundp 'header-line-indent) 1462 (setq-local header-line-indent "" 1463 header-line-indent-width 0)) 1464 (setq header-line-format 1465 '("" header-line-indent (:eval (csv--header-string)))))) 1466 1467 (defun csv--header-flush (&rest _) 1468 ;; Force re-computation of the header-line. 1469 (setq csv--header-hscroll nil)) 1470 1471 (defun csv--header-string () 1472 ;; FIXME: Won't work with multiple windows showing that same buffer. 1473 (if (eql (window-hscroll) csv--header-hscroll) 1474 csv--header-string 1475 (setq csv--header-hscroll (window-hscroll)) 1476 (setq csv--header-string 1477 (csv--compute-header-string)))) 1478 1479 (defun csv--compute-header-string () 1480 (with-demoted-errors "csv--compute-header-string %S" 1481 (save-excursion 1482 (goto-char (overlay-start csv--header-line)) 1483 ;; Re-set the line-end-position, just in case. 1484 (move-overlay csv--header-line (point) (line-end-position)) 1485 (jit-lock-fontify-now (point) (line-end-position)) 1486 ;; Not sure why it is sometimes nil! 1487 (move-to-column (or csv--header-hscroll 0)) 1488 (let ((str (replace-regexp-in-string 1489 "%" "%%" (buffer-substring (point) (line-end-position)))) 1490 (i 0)) 1491 (while (and i (< i (length str))) 1492 (let ((prop (get-text-property i 'display str))) 1493 (and (eq (car-safe prop) 'space) 1494 (eq (car-safe (cdr prop)) :align-to) 1495 (let* ((x (nth 2 prop)) 1496 (nexti (next-single-property-change i 'display str)) 1497 (newprop 1498 `(space :align-to 1499 (+ ,(if (numberp x) 1500 (- x (or csv--header-hscroll 0)) 1501 `(- ,x csv--header-hscroll)) 1502 header-line-indent-width)))) 1503 (put-text-property i (or nexti (length str)) 1504 'display newprop str) 1505 (setq i nexti)))) 1506 (setq i (next-single-property-change i 'display str))) 1507 (concat (propertize " " 'display '((space :align-to 0))) str))))) 1508 1509 ;;; Auto-alignment 1510 1511 (defcustom csv-align-max-width 40 1512 "Maximum width of a column in `csv-align-mode'. 1513 This does not apply to the last column (for which the usual `truncate-lines' 1514 setting works better)." 1515 :type 'integer) 1516 1517 (defcustom csv-align-min-width 1 1518 "Minimum width of a column in `csv-align-mode'." 1519 :type 'integer) 1520 1521 (defvar-local csv--config-column-widths nil 1522 "Settings per column, stored as a list indexed by the column.") 1523 1524 (defun csv-align--set-column (column value) 1525 (let ((len (length csv--config-column-widths))) 1526 (if (< len column) 1527 (setq csv--config-column-widths 1528 (nconc csv--config-column-widths (make-list (- column len) nil)))) 1529 (setf (nth (1- column) csv--config-column-widths) value))) 1530 1531 (defun csv-align-set-column-width (column width) 1532 "Set the max WIDTH to use for COLUMN." 1533 (interactive 1534 (let* ((field (or (csv--field-index) 1)) 1535 (curwidth (nth (1- field) csv--config-column-widths))) 1536 (list field 1537 (cond 1538 ((numberp current-prefix-arg) 1539 current-prefix-arg) 1540 (current-prefix-arg 1541 (read-number (format "Column width (for field %d): " field) 1542 curwidth)) 1543 (t (if curwidth nil (csv--ellipsis-width))))))) 1544 (when (eql width csv-align-max-width) 1545 (setq width nil)) 1546 (csv-align--set-column column width) 1547 (jit-lock-refontify)) 1548 1549 (defvar-local csv--jit-columns nil) 1550 1551 (defun csv--jit-flush-columns () 1552 "Throw away all cached info about column widths." 1553 ;; FIXME: Maybe we should kill its overlays as well. 1554 (setq csv--jit-columns nil)) 1555 1556 (defun csv--jit-merge-columns (column-widths) 1557 ;; FIXME: The incremental update (delayed by jit-lock-context-time) of column 1558 ;; width is a bit jarring at times. It's OK while scrolling or when 1559 ;; extending a column, but not right when enabling the csv-align-mode or 1560 ;; when shortening the longest field (or deleting the line containing it), 1561 ;; because in that case we have *several* cascaded updates, e.g.: 1562 ;; - Remove the line with the longest field of column N. 1563 ;; - Edit some line: this line is updated as if its field was the widest, 1564 ;; hence its subsequent fields are too much to the left. 1565 ;; - The rest is updated starting from the first few lines (according 1566 ;; to jit-lock-chunk-size). 1567 ;; - After the first few lines, come the next set of few lines, 1568 ;; which may cause the previous few lines to need refresh again. 1569 ;; - etc.. until arriving again at the edited line which is re-aligned 1570 ;; again. 1571 ;; - etc.. until the end of the windows, potentially causing yet more 1572 ;; refreshes as we discover yet-wider fields for this column. 1573 (let ((old-columns csv--jit-columns) 1574 (changed nil)) 1575 (while (and old-columns column-widths) 1576 (when (or (> (caar column-widths) (caar old-columns)) 1577 ;; Apparently modification-hooks aren't run when the 1578 ;; whole text containing the overlay is deleted (e.g. 1579 ;; the whole line), so detect this case here. 1580 ;; It's a bit too late, but better than never. 1581 (null (overlay-buffer (cdar old-columns)))) 1582 (setq changed t) ;; Return non-nil if some existing column changed. 1583 (pcase-let ((`(,width ,beg ,end) (car column-widths))) 1584 (setf (caar old-columns) width) 1585 (move-overlay (cdar old-columns) beg end))) 1586 (setq old-columns (cdr old-columns)) 1587 (setq column-widths (cdr column-widths))) 1588 (when column-widths 1589 ;; New columns appeared. 1590 (setq csv--jit-columns 1591 (nconc csv--jit-columns 1592 (mapcar (lambda (x) 1593 (pcase-let* 1594 ((`(,width ,beg ,end) x) 1595 (ol (make-overlay beg end))) 1596 (overlay-put ol 'csv-width t) 1597 (overlay-put ol 'evaporate t) 1598 (overlay-put ol 'modification-hooks 1599 (list #'csv--jit-width-change)) 1600 (cons width ol))) 1601 column-widths)))) 1602 changed)) 1603 1604 (defun csv--jit-width-change (ol after _beg _end &optional len) 1605 (when (and after (> len 0)) 1606 ;; (let ((x (rassq ol csv--jit-columns))) 1607 ;; (when x (setf (car x) -1))) 1608 (delete-overlay ol))) 1609 1610 (defun csv--jit-unalign (beg end) 1611 (with-silent-modifications 1612 (remove-text-properties beg end 1613 '( display nil csv--jit nil invisible nil 1614 cursor-sensor-functions nil csv--revealed nil)) 1615 (remove-overlays beg end 'csv--jit t))) 1616 1617 (defun csv--jit-flush (beg end) 1618 "Cause all the buffer (except for the BEG...END region) to be re-aligned." 1619 (cl-assert (>= end beg)) 1620 ;; The buffer shouldn't have changed since beg/end were computed, 1621 ;; but just in case, let's make sure they're still sane. 1622 (when (< beg (point-min)) 1623 (setq beg (point-min) end (max end beg))) 1624 (when (< (point-max) end) 1625 (setq end (point-max) beg (min end beg))) 1626 (let ((pos (point-min))) 1627 (while (and (< pos beg) 1628 (setq pos (text-property-any pos beg 'csv--jit t))) 1629 (jit-lock-refontify 1630 pos (setq pos (or (text-property-any pos beg 'csv--jit nil) beg)))) 1631 (setq pos end) 1632 (while (and (< pos (point-max)) 1633 (setq pos (text-property-any pos (point-max) 'csv--jit t))) 1634 (jit-lock-refontify 1635 pos (setq pos (or (text-property-any pos (point-max) 'csv--jit nil) 1636 (point-max)))))) 1637 (csv--header-flush)) 1638 1639 (defun csv--ellipsis-width () 1640 (let ((ellipsis 1641 (when standard-display-table 1642 (display-table-slot standard-display-table 1643 'selective-display)))) 1644 (if ellipsis (length ellipsis) 3))) 1645 1646 (defun csv-align--cursor-truncated (window oldpos dir) 1647 ;; FIXME: Neither the `entered' nor the `left' event are guaranteed 1648 ;; to be sent, and for the `left' case, even when we do get called, 1649 ;; it may be unclear where the revealed text was (it's somewhere around 1650 ;; `oldpos', but that position can be stale). 1651 ;; Worse, if we have several windows displaying the buffer, when one 1652 ;; cursor leaves we may need to keep the text revealed because of 1653 ;; another window's cursor. 1654 (let* ((prop (if (eq dir 'entered) 'invisible 'csv--revealed)) 1655 (pos (cond 1656 ((eq dir 'entered) (window-point window)) 1657 (t (max (point-min) 1658 (min (point-max) 1659 (or oldpos (window-point window))))))) 1660 (start (cond 1661 ((and (> pos (point-min)) 1662 (eq (get-text-property (1- pos) prop) 'csv-truncate)) 1663 (or (previous-single-property-change pos prop) (point-min))) 1664 (t pos))) 1665 (end (if (eq (get-text-property pos prop) 'csv-truncate) 1666 (or (next-single-property-change pos prop) (point-max)) 1667 pos))) 1668 (unless (eql start end) 1669 (with-silent-modifications 1670 (put-text-property start end 1671 (if (eq dir 'entered) 'csv--revealed 'invisible) 1672 'csv-truncate) 1673 (remove-text-properties start end (list prop)))))) 1674 1675 (defun csv--jit-align (beg end) 1676 (save-excursion 1677 ;; This is run with inhibit-modification-hooks set, so the overlays' 1678 ;; modification-hook doesn't work :-( 1679 (and csv--header-line 1680 (<= beg (overlay-end csv--header-line)) 1681 (>= end (overlay-start csv--header-line)) 1682 (csv--header-flush)) 1683 ;; First, round up to a whole number of lines. 1684 (goto-char end) 1685 (unless (bolp) (forward-line 1) (setq end (point))) 1686 (goto-char beg) 1687 (unless (bolp) (forward-line 1) (setq beg (point))) 1688 (csv--jit-unalign beg end) 1689 (put-text-property beg end 'csv--jit t) 1690 1691 (pcase-let* ((`(,column-widths ,field-widths) (csv--column-widths beg end)) 1692 (changed (csv--jit-merge-columns column-widths)) 1693 (ellipsis-width (csv--ellipsis-width))) 1694 (when changed 1695 ;; Do it after the current redisplay is over. 1696 (run-with-timer jit-lock-context-time nil #'csv--jit-flush beg end)) 1697 1698 ;; Align fields: 1699 (goto-char beg) 1700 (while (< (point) end) 1701 (unless (csv-not-looking-at-record) 1702 (let ((w csv--jit-columns) 1703 (widths-config csv--config-column-widths) 1704 (column 0)) ;Desired position of left-side of this column. 1705 (while (and w (not (eolp))) 1706 (let* ((field-beg (point)) 1707 (width-config (pop widths-config)) 1708 (align-padding (if (bolp) 0 csv-align-padding)) 1709 (left-padding 0) (right-padding 0) 1710 (field-width (pop field-widths)) 1711 (column-width 1712 (min (max csv-align-min-width 1713 (car (pop w))) 1714 (or width-config 1715 ;; Don't apply csv-align-max-width 1716 ;; to the last field! 1717 (if w csv-align-max-width 1718 most-positive-fixnum)))) 1719 (x (- column-width field-width)) ; Required padding. 1720 (truncate nil)) 1721 (csv-end-of-field) 1722 ;; beg = beginning of current field 1723 ;; end = (point) = end of current field 1724 (when (< x 0) 1725 (setq truncate (max column 1726 (+ column column-width 1727 align-padding (- ellipsis-width)))) 1728 (setq x 0)) 1729 ;; Compute required padding: 1730 (pcase csv-align-style 1731 ('left 1732 ;; Left align -- pad on the right: 1733 (setq left-padding align-padding 1734 right-padding x)) 1735 ('right 1736 ;; Right align -- pad on the left: 1737 (setq left-padding (+ align-padding x))) 1738 ('auto 1739 ;; Auto align -- left align text, right align numbers: 1740 (if (string-match "\\`[-+.[:digit:]]+\\'" 1741 (buffer-substring field-beg (point))) 1742 ;; Right align -- pad on the left: 1743 (setq left-padding (+ align-padding x)) 1744 ;; Left align -- pad on the right: 1745 (setq left-padding align-padding 1746 right-padding x))) 1747 ('centre 1748 ;; Centre -- pad on both left and right: 1749 (let ((y (/ x 2))) ; truncated integer quotient 1750 (setq left-padding (+ align-padding y) 1751 right-padding (- x y))))) 1752 1753 (cond 1754 1755 ((or (memq 'csv buffer-invisibility-spec) 1756 ;; For TSV, hidden or not doesn't make much difference, 1757 ;; but the behavior is slightly better when we "hide" 1758 ;; the TABs with a `display' property than if we add 1759 ;; before/after-strings. 1760 (tsv--mode-p)) 1761 1762 ;; Hide separators... 1763 ;; Merge right-padding from previous field 1764 ;; with left-padding from this field: 1765 (if (zerop column) 1766 (when (> left-padding 0) 1767 ;; Display spaces before first field 1768 ;; by overlaying first character: 1769 (csv--make-overlay 1770 field-beg (1+ field-beg) nil nil nil 1771 `(before-string ,(make-string left-padding ?\ ) 1772 csv--jit t))) 1773 ;; Display separator as spaces: 1774 (with-silent-modifications 1775 (put-text-property 1776 (1- field-beg) field-beg 1777 'display `(space :align-to 1778 ,(+ left-padding column)))))) 1779 1780 (t ;; Do not hide separators... 1781 (let ((overlay (csv--make-overlay field-beg (point) 1782 nil nil t 1783 '(csv--jit t)))) 1784 (when (> left-padding 0) ; Pad on the left. 1785 ;; Display spaces before field: 1786 (overlay-put overlay 'before-string 1787 (make-string left-padding ?\ ))) 1788 (unless (eolp) 1789 (if (> right-padding 0) ; Pad on the right. 1790 ;; Display spaces after field: 1791 (overlay-put 1792 overlay 1793 'after-string (make-string right-padding ?\ ))))))) 1794 (setq column (+ column column-width align-padding)) 1795 ;; Do it after applying the property, so `move-to-column' can 1796 ;; take it into account. 1797 (when truncate 1798 (let ((trunc-pos 1799 (save-excursion 1800 ;; ¡¡ BIG UGLY HACK !! 1801 ;; `current-column' and `move-to-column' count 1802 ;; text hidden with an ellipsis "as if" it were 1803 ;; fully visible, which is completely wrong here, 1804 ;; so circumvent this by temporarily pretending 1805 ;; that `csv-truncate' is fully invisible (which 1806 ;; isn't quite right either, but should work 1807 ;; just well enough for us here). 1808 (let ((buffer-invisibility-spec 1809 buffer-invisibility-spec)) 1810 (add-to-invisibility-spec 'csv-truncate) 1811 (move-to-column truncate)) 1812 (point)))) 1813 (put-text-property trunc-pos (point) 1814 'invisible 'csv-truncate) 1815 (when (> (- (point) trunc-pos) 1) 1816 ;; Arrange to temporarily untruncate the string when 1817 ;; cursor moves into it. 1818 ;; FIXME: This only works if 1819 ;; `global-disable-point-adjustment' is non-nil! 1820 ;; Arguably this should be fixed by making 1821 ;; point-adjustment code pay attention to 1822 ;; cursor-sensor-functions! 1823 (put-text-property 1824 (1+ trunc-pos) (point) 1825 'cursor-sensor-functions 1826 (list #'csv-align--cursor-truncated))))) 1827 (unless (eolp) (forward-char)) ; Skip separator. 1828 )))) 1829 (forward-line))) 1830 `(jit-lock-bounds ,beg . ,end))) 1831 1832 (define-minor-mode csv-align-mode 1833 "Align columns on the fly." 1834 :global nil 1835 (csv-unalign-fields nil (point-min) (point-max)) ;Just in case. 1836 (cond 1837 (csv-align-mode 1838 (add-to-invisibility-spec '(csv-truncate . t)) 1839 (kill-local-variable 'csv--jit-columns) 1840 (cursor-sensor-mode 1) 1841 (when (fboundp 'header-line-indent-mode) 1842 (header-line-indent-mode)) 1843 (jit-lock-register #'csv--jit-align) 1844 (jit-lock-refontify)) 1845 (t 1846 (remove-from-invisibility-spec '(csv-truncate . t)) 1847 (jit-lock-unregister #'csv--jit-align) 1848 (csv--jit-unalign (point-min) (point-max)))) 1849 (csv--header-flush)) 1850 1851 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1852 ;;; Separator guessing 1853 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1854 1855 (defvar csv--preferred-separators 1856 '(?, ?\; ?\t) 1857 "Preferred separator characters in case of a tied score.") 1858 1859 (defun csv-guess-set-separator () 1860 "Guess and set the CSV separator of the current buffer. 1861 1862 Add it to the mode hook to have CSV mode guess and set the 1863 separator automatically when visiting a buffer: 1864 1865 (add-hook \\='csv-mode-hook \\='csv-guess-set-separator)" 1866 (interactive) 1867 (let ((sep (csv-guess-separator 1868 (buffer-substring-no-properties 1869 (point-min) 1870 ;; We're probably only going to look at the first 2048 1871 ;; or so chars, but take more than we probably need to 1872 ;; minimize the chance of breaking the input in the 1873 ;; middle of a (long) row. 1874 (min 8192 (point-max))) 1875 2048))) 1876 (when sep 1877 (csv-set-separator sep)))) 1878 1879 (defun csv-guess-separator (text &optional cutoff) 1880 "Return a guess of which character is the CSV separator in TEXT." 1881 (let ((best-separator nil) 1882 (best-score 0)) 1883 (dolist (candidate (csv--separator-candidates text cutoff)) 1884 (let ((candidate-score 1885 (csv--separator-score candidate text cutoff))) 1886 (when (or (> candidate-score best-score) 1887 (and (= candidate-score best-score) 1888 (member candidate csv--preferred-separators))) 1889 (setq best-separator candidate) 1890 (setq best-score candidate-score)))) 1891 best-separator)) 1892 1893 (defun csv--separator-candidates (text &optional cutoff) 1894 "Return a list of candidate CSV separators in TEXT. 1895 When CUTOFF is passed, look only at the first CUTOFF number of characters." 1896 (let ((chars (make-hash-table))) 1897 (dolist (c (string-to-list 1898 (if cutoff 1899 (substring text 0 (min cutoff (length text))) 1900 text))) 1901 (when (and (not (gethash c chars)) 1902 (or (= c ?\t) 1903 (= c ?\C-_) 1904 (and (not (member c '(?. ?/ ?\" ?'))) 1905 (not (member (get-char-code-property c 'general-category) 1906 '(Lu Ll Lt Lm Lo Nd Nl No Ps Pe Cc Co)))))) 1907 (puthash c t chars))) 1908 (hash-table-keys chars))) 1909 1910 (defun csv--separator-score (separator text &optional cutoff) 1911 "Return a score on how likely SEPARATOR is a separator in TEXT. 1912 1913 When CUTOFF is passed, stop the calculation at the next whole 1914 line after having read CUTOFF number of characters. 1915 1916 The scoring is based on the idea that most CSV data is tabular, 1917 i.e. separators should appear equally often on each line. 1918 Furthermore, more commonly appearing characters are scored higher 1919 than those who appear less often. 1920 1921 Adapted from the paper \"Wrangling Messy CSV Files by Detecting 1922 Row and Type Patterns\" by Gerrit J.J. van den Burg , Alfredo 1923 Nazábal, and Charles Sutton: https://arxiv.org/abs/1811.11242." 1924 (let ((groups 1925 (with-temp-buffer 1926 (csv-set-separator separator) 1927 (save-excursion 1928 (insert text)) 1929 (let ((groups (make-hash-table)) 1930 (chars-read 0)) 1931 (while (and (/= (point) (point-max)) 1932 (or (not cutoff) 1933 (< chars-read cutoff))) 1934 (let* ((lep (line-end-position)) 1935 (nfields (length (csv--collect-fields lep)))) 1936 (cl-incf (gethash nfields groups 0)) 1937 (cl-incf chars-read (- lep (point))) 1938 (goto-char (+ lep 1)))) 1939 groups))) 1940 (sum 0)) 1941 (maphash 1942 (lambda (length num) 1943 (cl-incf sum (* num (/ (- length 1) (float length))))) 1944 groups) 1945 (let ((unique-groups (hash-table-count groups))) 1946 (if (= 0 unique-groups) 1947 0 1948 (/ sum unique-groups))))) 1949 1950 ;;; TSV support 1951 1952 ;; Since "the" CSV format is really a bunch of different formats, it includes 1953 ;; TSV as a subcase, but this subcase is sufficiently interesting that it has 1954 ;; its own mime-type and mostly standard file extension, also it suffers 1955 ;; less from the usual quoting problems of CSV (because the only problematic 1956 ;; chars are LF and TAB, really, which are much less common inside fields than 1957 ;; commas, space, and semi-colons) so it's "better behaved". 1958 1959 (defvar tsv-mode-syntax-table 1960 ;; Inherit from `text-mode-syntax-table' rather than from 1961 ;; `csv-mode-syntax-table' so as not to inherit the 1962 ;; `csv-field-quotes' settings. 1963 (let ((st (make-syntax-table text-mode-syntax-table))) 1964 st)) 1965 1966 (defvar tsv-mode-map 1967 (let ((map (make-sparse-keymap))) 1968 ;; In `tsv-mode', the `csv-invisibility-default/csv-toggle-invisibility' 1969 ;; business doesn't make much sense. 1970 (define-key map [remap csv-toggle-invisibility] #'undefined) 1971 map)) 1972 1973 ;;;###autoload 1974 (add-to-list 'auto-mode-alist '("\\.tsv\\'" . tsv-mode)) 1975 1976 (defun tsv--mode-p () 1977 (equal csv-separator-chars '(?\t))) 1978 1979 ;;;###autoload 1980 (define-derived-mode tsv-mode csv-mode "TSV" 1981 "Major mode for editing files of tab-separated value type." 1982 :group 'CSV 1983 ;; In TSV we know TAB is the only possible separator. 1984 (setq-local csv-separators '("\t")) 1985 ;; FIXME: Copy&pasted from the `:set'ter of csv-separators! 1986 (setq-local csv-separator-chars '(?\t)) 1987 (setq-local csv--skip-chars "^\n\t") 1988 (setq-local csv-separator-regexp "\t") 1989 (setq-local csv-font-lock-keywords 1990 ;; NB: csv-separator-face variable evaluates to itself. 1991 `((,csv-separator-regexp (0 'csv-separator-face)))) 1992 1993 ;; According to wikipedia, TSV doesn't use quotes but uses backslash escapes 1994 ;; of the form \n, \t, \r, and \\ instead. 1995 (setq-local csv-field-quotes nil)) 1996 1997 1998 (provide 'csv-mode) 1999 2000 ;;; csv-mode.el ends here