;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2021-2023 Ludovic Courtès ;;; Copyright © 2023 Kierin Bell ;;; ;;; This file is part of GNU Guix. ;;; ;;; GNU Guix is free software; you can redistribute it and/or modify it ;;; under the terms of the GNU General Public License as published by ;;; the Free Software Foundation; either version 3 of the License, or (at ;;; your option) any later version. ;;; ;;; GNU Guix is distributed in the hope that it will be useful, but ;;; WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with GNU Guix. If not, see . (define-module (guix read-print) #:use-module (ice-9 control) #:use-module (ice-9 match) #:use-module (ice-9 rdelim) #:use-module (ice-9 vlist) #:use-module (ice-9 format) #:use-module (srfi srfi-1) #:use-module (srfi srfi-26) #:use-module (srfi srfi-34) #:use-module (srfi srfi-35) #:use-module (guix i18n) #:use-module ((guix diagnostics) #:select (formatted-message &fix-hint &error-location location)) #:use-module (language elisp parser) #:export (pretty-print-with-comments pretty-print-with-comments/splice read-with-comments read-with-comments/sequence object->string* blank? vertical-space vertical-space? vertical-space-height canonicalize-vertical-space page-break page-break? comment comment? comment->string comment-margin? canonicalize-comment)) ;;; Commentary: ;;; ;;; This module provides a comment-preserving reader and a comment-preserving ;;; pretty-printer smarter than (ice-9 pretty-print). ;;; ;;; Code: ;;; ;;; Comment-preserving reader. ;;; (define ;; The parent class for "blanks". (make-record-type ' '() (lambda (obj port) (format port "#" (number->string (object-address obj) 16))) #:extensible? #t)) (define blank? (record-predicate )) (define (make-record-type ' '(height) #:parent #:extensible? #f)) (define vertical-space? (record-predicate )) (define vertical-space (record-type-constructor )) (define vertical-space-height (record-accessor 'height)) (define canonicalize-vertical-space (let ((unit (vertical-space 1))) (lambda (space) "Return a vertical space corresponding to a single blank line." unit))) (define (make-record-type ' '() #:parent #:extensible? #f)) (define page-break? (record-predicate )) (define page-break (let ((break ((record-type-constructor )))) (lambda () break))) (define ;; Comments. (make-record-type ' '(str margin?) #:parent #:extensible? #f)) (define comment? (record-predicate )) (define string->comment (record-type-constructor )) (define comment->string (record-accessor 'str)) (define comment-margin? (record-accessor 'margin?)) (define* (comment str #:optional margin?) "Return a new comment made from STR. When MARGIN? is true, return a margin comment; otherwise return a line comment. STR must start with a semicolon and end with newline, otherwise an error is raised." (when (or (string-null? str) (not (eqv? #\; (string-ref str 0))) (not (string-suffix? "\n" str))) (raise (condition (&message (message "invalid comment string"))))) (string->comment str margin?)) (define char-set:whitespace-sans-page-break ;; White space, excluding #\page. (char-set-difference char-set:whitespace (char-set #\page))) (define (space? chr) "Return true if CHR is white space, except for page breaks." (char-set-contains? char-set:whitespace-sans-page-break chr)) (define (read-vertical-space port) "Read from PORT until a non-vertical-space character is met, and return a single record." (let loop ((height 1)) (match (read-char port) (#\newline (loop (+ 1 height))) ((? eof-object?) (vertical-space height)) ((? space?) (loop height)) (chr (unread-char chr port) (vertical-space height))))) (define (read-until-end-of-line port) "Read white space from PORT until the end of line, included." (let loop () (match (read-char port) (#\newline #t) ((? eof-object?) #t) ((? space?) (loop)) (chr (unread-char chr port))))) (define* (read-with-comments port #:key (blank-line? #t) (elisp? #f) (unelisp-extensions? #f)) "Like 'read', but include objects when they're encountered. When BLANK-LINE? is true, assume PORT is at the beginning of a new line. When ELISP? is true, read Elisp, and when UNELISP-EXTENSIONS? is true, convert objects into lists suitable for use with the `elisp' macro in the `(gnu home services emacs)' module." ;; Note: Instead of implementing this functionality in 'read' proper, which ;; is the best approach long-term, this code is a layer on top of 'read', ;; such that we don't have to rely on a specific Guile version. (define dot (list 'dot)) (define (dot? x) (eq? x dot)) (define (missing-closing-paren-error) (raise (make-compound-condition (formatted-message (G_ "unexpected end of file")) (condition (&error-location (location (match (port-filename port) (#f #f) (file (location file (port-line port) (port-column port)))))) (&fix-hint (hint (G_ "Did you forget a closing parenthesis?"))))))) (define (invalid-array-error) (raise (make-compound-condition (formatted-message (G_ "invalid array syntax")) (condition (&error-location (location (match (port-filename port) (#f #f) (file (location file (port-line port) (port-column port)))))) (&fix-hint (hint (G_ "Did you mean to write a dotted list?"))))))) (define (reverse/dot lst array?) ;; Reverse LST and make it an improper list if it contains DOT. (let loop ((result '()) (lst lst)) (match lst (() (if array? (list->array 1 result) result)) (((? dot?) . rest) (if array? (invalid-array-error) (if (pair? rest) (let ((dotted (reverse rest))) (set-cdr! (last-pair dotted) (car result)) dotted) (car result)))) ((('%set-lexical-binding-mode . _) . rest) (loop result rest)) ((x . rest) (loop (cons x result) rest))))) (let loop ((blank-line? blank-line?) (return (const 'unbalanced))) (match (read-char port) ((? eof-object? eof) eof) ;oops! (chr (cond ((eqv? chr #\newline) (if blank-line? (unless (and elisp? unelisp-extensions?) (read-vertical-space port)) (loop #t return))) ((eqv? chr #\page) ;; Assume that a page break is on a line of its own and read ;; subsequent white space and newline. (read-until-end-of-line port) (unless (and elisp? unelisp-extensions?) (page-break))) ((char-set-contains? char-set:whitespace chr) (loop blank-line? return)) ((memv chr '(#\( #\[)) (let/ec return (let liip ((lst '()) (arr? (and elisp? (eqv? chr #\[)))) (define item (loop (match lst (((? blank?) . _) #t) (_ #f)) (lambda () (return (reverse/dot lst arr?))))) (if (eof-object? item) (missing-closing-paren-error) (liip (cons item lst) arr?))))) ((memv chr '(#\) #\])) (return)) ((eq? chr #\') (list 'quote (loop #f return))) ((eq? chr #\`) (list 'quasiquote (loop #f return))) ((eq? chr #\#) (cond ((and elisp? unelisp-extensions?) ;; Return list for `elisp' macro in `(gnu home services emacs)' (match (read-char port) (#\$ (match (read-char port) (#\@ (list 'unelisp-splicing (read port))) (chr (unread-char chr port) (list 'unelisp (read port))))) (#\; (unread-char #\; port) (list 'unelisp-comment (read-line port 'concat))) (#\> (list 'unelisp-newline)) (#\^ (match (read-char port) (#\L (list 'unelisp-page-break)) (chr (unread-char chr port) (unread-char #\^ port) (unread-char #\# port) (read-elisp port)))) (chr (unread-char chr port) (unread-char #\# port) (read-elisp port)))) (elisp? ;; Read normal Elisp (unread-char #\# port) (read-elisp port)) (else ;; Read Scheme (match (read-char port) (#\~ (list 'gexp (loop #f return))) (#\$ (list (match (peek-char port) (#\@ (read-char port) ;consume 'ungexp-splicing) (_ 'ungexp)) (loop #f return))) (#\+ (list (match (peek-char port) (#\@ (read-char port) ;consume 'ungexp-native-splicing) (_ 'ungexp-native)) (loop #f return))) (chr (unread-char chr port) (unread-char #\# port) (read port)))))) ((eq? chr #\,) (list (match (peek-char port) (#\@ (read-char port) 'unquote-splicing) (_ 'unquote)) (loop #f return))) ((eqv? chr #\;) (if (and elisp? unelisp-extensions?) (begin (read-line port 'concat) ;consume (loop blank-line? return)) (begin (unread-char chr port) (string->comment (read-line port 'concat) (not blank-line?))))) ((eqv? chr #\?) (if elisp? (begin ;; Elisp character; improve upon `read-elisp' by returning ;; Scheme characters instead of integers. ;; Character read syntax support by `read-elisp': ;; ?X (supported), ?\uXXXX (supported), ?\uXXXXXXXX ;; (supported), ?\X (supported), ?\XXX (octal, supported), ;; ?\N{NAME} (returns same as ?\N), ?\N{U+X} (returns same ;; as ?\N), or \xXX (unsupported, signals error) ;; `integer->char' will signal error if integer is not in ;; range 0-#xD7FF or #xE000-#x10FFFF. (unread-char #\? port) (integer->char (read-elisp port))) (begin ;; Scheme symbol (unread-char #\? port) (read port)))) (else (cond ;; Unlike for Scheme `read', `.' is an invalid read syntax for ;; `read-elisp'. ((and elisp? (eqv? chr #\.) (char-set-contains? char-set:whitespace ;redundant (peek-char port))) dot) (elisp? (unread-char chr port) (read-elisp port)) (else (unread-char chr port) (match (read port) ((and token '#{.}#) (if (eq? chr #\.) dot token)) (token token)))))))))) (define* (read-with-comments/sequence port #:key elisp?) "Read from PORT until the end-of-file is reached and return the list of expressions and blanks that were read." (let loop ((lst '()) (blank-line? #t)) (match (read-with-comments port #:blank-line? blank-line? #:elisp? elisp?) ((? eof-object?) (reverse! lst)) ((? blank? blank) (loop (cons blank lst) #t)) (exp (loop (cons exp lst) #f))))) ;;; ;;; Comment-preserving pretty-printer. ;;; (define-syntax vhashq (syntax-rules (quote) ((_) vlist-null) ((_ (key (quote (lst ...))) rest ...) (vhash-consq key '(lst ...) (vhashq rest ...))) ((_ (key value) rest ...) (vhash-consq key '((() . value)) (vhashq rest ...))))) (define %special-forms ;; Forms that are indented specially. The number is meant to be understood ;; like Emacs' 'scheme-indent-function' symbol property. When given an ;; alist instead of a number, the alist gives "context" in which the symbol ;; is a special form; for instance, context (modify-phases) means that the ;; symbol must appear within a (modify-phases ...) expression. (vhashq ('begin 1) ('case 2) ('cond 1) ('lambda 2) ('lambda* 2) ('match-lambda 1) ('match-lambda* 1) ('define 2) ('define* 2) ('define-public 2) ('define*-public 2) ('define-syntax 2) ('define-syntax-rule 2) ('define-module 2) ('define-gexp-compiler 2) ('define-record-type 2) ('define-record-type* 4) ('define-configuration 2) ('package/inherit 2) ('let 2) ('let* 2) ('letrec 2) ('letrec* 2) ('match 2) ('match-record 3) ('match-record-lambda 2) ('when 2) ('unless 2) ('package 1) ('origin 1) ('channel 1) ('modify-inputs 2) ('modify-phases 2) ('add-after '(((modify-phases) . 3))) ('add-before '(((modify-phases) . 3))) ('replace '(((modify-phases) . 2))) ;different from 'modify-inputs' ('substitute* 2) ('substitute-keyword-arguments 2) ('call-with-input-file 2) ('call-with-output-file 2) ('with-output-to-file 2) ('with-input-from-file 2) ('with-directory-excursion 2) ('wrap-program 2) ('wrap-script 2) ;; (gnu system) and (gnu services). ('operating-system 1) ('bootloader-configuration 1) ('mapped-device 1) ('file-system 1) ('swap-space 1) ('user-account 1) ('user-group 1) ('setuid-program 1) ('modify-services 2) ;; (gnu home). ('home-environment 1))) (define %newline-forms ;; List heads that must be followed by a newline. The second argument is ;; the context in which they must appear. This is similar to a special form ;; of 1, except that indent is 1 instead of 2 columns. (vhashq ('arguments '(package)) ('sha256 '(origin source package)) ('base32 '(sha256 origin)) ('git-reference '(uri origin source)) ('search-paths '(package)) ('native-search-paths '(package)) ('search-path-specification '()) ('services '(operating-system)) ('set-xorg-configuration '()) ('services '(home-environment)) ('home-bash-configuration '(service)) ('home-emacs-configuration '()) ('introduction '(channel)))) (define %elisp-special-forms ;; Forms that should be indented specially in Elisp, adapted from the ;; `lisp-indent-function' property for each symbol by adding 1 to each ;; integer value and substituting 3 for `defun', for compatibility with ;; `%special-forms'. This is a non-exhaustive list, generated by mapping ;; over the obarray of a minimal Emacs environment, and then removing ;; symbols that are obsolete or unlikely to ever appear in an Emacs package ;; or configuration file. (vhashq ('and-let* 2) ('atomic-change-group 1) ('autoload 3) ('benchmark-progn 1) ('benchmark-run 2) ('benchmark-run-compiled 2) ('byte-compile-maybe-guarded 2) ('catch 2) ('cl-block 2) ('cl-callf 3) ('cl-callf2 4) ('cl-case 2) ('cl-defgeneric 3) ('cl-define-compiler-macro 3) ('cl-defmacro 3) ('cl-defmethod 4) ('cl-defstruct 2) ('cl-defsubst 3) ('cl-deftype 3) ('cl-defun 3) ('cl-destructuring-bind 3) ('cl-do 3) ('cl-do* 3) ('cl-do-all-symbols 2) ('cl-do-symbols 2) ('cl-dolist 2) ('cl-dotimes 2) ('cl-ecase 2) ('cl-etypecase 2) ('cl-eval-when 2) ('cl-flet 2) ('cl-flet* 2) ('cl-generic-define-context-rewriter 4) ('cl-generic-define-generalizer 2) ('cl-iter-defun 3) ('cl-labels 2) ('cl-letf 2) ('cl-letf* 2) ('cl-macrolet 2) ('cl-multiple-value-bind 3) ('cl-multiple-value-setq 2) ('cl-once-only 2) ('cl-progv 3) ('cl-return-from 2) ('cl-symbol-macrolet 2) ('cl-the 2) ('cl-typecase 2) ('cl-with-gensyms 2) ('combine-after-change-calls 1) ('combine-change-calls 3) ('condition-case 3) ('condition-case-unless-debug 3) ('def-edebug-elem-spec 2) ('def-edebug-spec 2) ('defadvice 3) ('defalias 3) ('defclass 3) ('defconst 3) ('defcustom 3) ('defface 3) ('defgroup 3) ('defimage 3) ('define-abbrev 3) ('define-abbrev-table 3) ('define-advice 3) ('define-alternatives 3) ('define-auto-insert 3) ('define-button-type 3) ('define-category 3) ('define-char-code-property 3) ('define-derived-mode 3) ('define-fringe-bitmap 3) ('define-generic-mode 2) ('define-globalized-minor-mode 3) ('define-inline 3) ('define-keymap 3) ('define-mail-user-agent 3) ('define-minor-mode 3) ('define-multisession-variable 3) ('define-obsolete-function-alias 3) ('define-obsolete-variable-alias 3) ('define-short-documentation-group 3) ('define-skeleton 3) ('define-widget 3) ('define-widget-keywords 3) ('defmacro 3) ('defmath 3) ('defsubst 3) ('deftheme 2) ('defun 3) ('defvar 3) ('defvar-keymap 2) ('defvar-local 3) ('defvaralias 3) ('delay-mode-hooks 1) ('dlet 2) ('dolist 2) ('dolist-with-progress-reporter 3) ('dotimes 2) ('dotimes-with-progress-reporter 3) ('easy-menu-define 3) ('easy-mmode-defmap 2) ('easy-mmode-defsyntax 2) ('ert-deftest 3) ('eval-after-load 2) ('eval-and-compile 1) ('eval-when-compile 1) ('gv-define-expander 2) ('gv-define-setter 3) ('gv-letplace 3) ('if 2) ; Changed from 3 ('if-let 2) ; Changed from 3 ('if-let* 2) ; Changed from 3 ('ignore-error 2) ('ignore-errors 1) ('isearch-define-mode-toggle 4) ('keymap-set-after 4) ('lambda 2) ; Changed from 3 ('let 2) ('let* 2) ('let-alist 2) ('let-when-compile 2) ('letrec 2) ('macroexp-let2 4) ('macroexp-let2* 3) ('minibuffer-with-setup-hook 2) ('named-let 3) ('oclosure-define 2) ('oclosure-lambda 3) ('pcase 2) ('pcase-defmacro 3) ('pcase-dolist 2) ('pcase-exhaustive 2) ('pcase-lambda 4) ('pcase-let 2) ('pcase-let* 2) ('prog1 2) ('prog2 3) ('progn 1) ('rx-define 3) ('rx-let 2) ('rx-let-eval 2) ('save-current-buffer 1) ('save-excursion 1) ('save-mark-and-excursion 1) ('save-match-data 1) ('save-restriction 1) ('save-selected-window 1) ('save-window-excursion 1) ('seq-doseq 2) ('seq-let 3) ('thread-first 1) ('thread-last 1) ('track-mouse 1) ('unless 2) ('unwind-protect 2) ('use-package 2) ; Changed from 3 ('when 2) ('when-let 2) ('when-let* 2) ('while 2) ('while-let 2) ('while-no-input 1) ('with-auto-compression-mode 1) ('with-buffer-unmodified-if-unchanged 1) ('with-case-table 2) ('with-category-table 2) ('with-coding-priority 2) ('with-current-buffer 2) ('with-current-buffer-window 4) ('with-decoded-time-value 2) ('with-delayed-message 2) ('with-demoted-errors 2) ('with-displayed-buffer-window 4) ('with-environment-variables 2) ('with-eval-after-load 2) ('with-existing-directory 1) ('with-file-modes 2) ('with-help-window 2) ('with-local-quit 1) ('with-locale-environment 2) ('with-memoization 2) ('with-minibuffer-completions-window 1) ('with-minibuffer-selected-window 1) ('with-mutex 2) ('with-no-warnings 1) ('with-output-to-string 1) ('with-output-to-temp-buffer 2) ('with-selected-frame 2) ('with-selected-window 2) ('with-silent-modifications 1) ('with-slots 3) ('with-suppressed-warnings 2) ('with-syntax-table 2) ('with-temp-buffer 1) ('with-temp-buffer-window 4) ('with-temp-file 2) ('with-temp-message 2) ('with-timeout 2) ('with-undo-amalgamate 1) ('with-window-non-dedicated 2))) (define (prefix? candidate lst) "Return true if CANDIDATE is a prefix of LST." (let loop ((candidate candidate) (lst lst)) (match candidate (() #t) ((head1 . rest1) (match lst (() #f) ((head2 . rest2) (and (equal? head1 head2) (loop rest1 rest2)))))))) (define* (special-form-lead symbol context #:key elisp? (special-forms '())) "If SYMBOL is a special form in the given CONTEXT, return its number of arguments; otherwise return #f. CONTEXT is a stack of symbols lexically surrounding SYMBOL. If ELISP? is true, return the number of arguments for the Emacs Lisp form matching SYMBOL. If SYMBOL is a key in the alist SPECIAL-FORMS, return the value of the first matching alist entry instead." ;; XXX: A value N in SPECIAL-FORMS is equivalent to a value of N+1 in the ;; `%special-forms' or `%elisp-special-forms' vhashes; this makes ;; SPECIAL-FORMS similar to the `lisp-indent-function' symbol property in ;; Emacs and probably less confusing. (or (assq-ref special-forms symbol) (match (vhash-assq symbol (if elisp? %elisp-special-forms %special-forms)) (#f #f) ((_ . alist) (any (match-lambda ((prefix . level) (and (prefix? prefix context) (- level 1)))) alist))))) (define (newline-form? symbol context) "Return true if parenthesized expressions starting with SYMBOL must be followed by a newline." (let ((matches (vhash-foldq* cons '() symbol %newline-forms))) (find (cut prefix? <> context) matches))) (define (escaped-string str) "Return STR with backslashes and double quotes escaped. Everything else, in particular newlines, is left as is." (list->string `(#\" ,@(string-fold-right (lambda (chr lst) (match chr (#\" (cons* #\\ #\" lst)) (#\\ (cons* #\\ #\\ lst)) (_ (cons chr lst)))) '() str) #\"))) (define %natural-whitespace-string-forms ;; When a string has one of these forms as its parent, only double quotes ;; and backslashes are escaped; newlines, tabs, etc. are left as-is. '(synopsis description G_ N_)) (define %elisp-natural-whitespace-string-forms '(defun)) (define* (printed-string str context #:key elisp?) "Return the read syntax for STR depending on CONTEXT and ELISP?." (define (preserve-newlines? str) (and (> (string-length str) 40) (string-index str #\newline))) (match context (() (if (preserve-newlines? str) (escaped-string str) (object->string str))) ((head . _) (if (or (memq head (if elisp? %elisp-natural-whitespace-string-forms %natural-whitespace-string-forms)) (preserve-newlines? str)) (escaped-string str) (object->string str))))) (define (string-width str) "Return the \"width\" of STR--i.e., the width of the longest line of STR." (apply max (map string-length (string-split str #\newline)))) (define (canonicalize-comment comment indent) "Canonicalize COMMENT, which is to be printed at INDENT, ensuring it has the \"right\" number of leading semicolons." (if (zero? indent) comment ;leave top-level comments unchanged (let ((line (string-trim-both (string-trim (comment->string comment) (char-set #\;))))) (string->comment (string-append (if (comment-margin? comment) ";" (if (string-null? line) ";;" ;no trailing space ";; ")) line "\n") (comment-margin? comment))))) (define %not-newline (char-set-complement (char-set #\newline))) (define (print-multi-line-comment str indent port) "Print to PORT STR as a multi-line comment, with INDENT spaces preceding each line except the first one (they're assumed to be already there)." ;; While 'read-with-comments' only returns one-line comments, user-provided ;; comments might span multiple lines, which is why this is necessary. (let loop ((lst (string-tokenize str %not-newline))) (match lst (() #t) ((last) (display last port) (newline port)) ((head tail ...) (display head port) (newline port) (display (make-string indent #\space) port) (loop tail))))) (define %integer-forms ;; Forms that take an integer as their argument, where said integer should ;; be printed in base other than decimal base. (letrec-syntax ((vhashq (syntax-rules () ((_) vlist-null) ((_ (key value) rest ...) (vhash-consq key value (vhashq rest ...)))))) (vhashq ('chmod 8) ('umask 8) ('mkdir 8) ('mkstemp 8) ('logand 16) ('logior 16) ('logxor 16) ('lognot 16)))) (define (integer->string integer context) "Render INTEGER as a string using a base suitable based on CONTEXT." (define (form-base form) (match (vhash-assq form %integer-forms) (#f 10) ((_ . base) base))) (define (octal? form) (= 8 (form-base form))) (define base (match context ((head . tail) (match (form-base head) (8 8) (16 (if (any octal? tail) 8 16)) (10 10))) (_ 10))) (string-append (match base (10 "") (16 "#x") (8 "#o")) (number->string integer base))) (define %special-non-extended-symbols ;; Special symbols that can be written without the #{...}# notation for ;; extended symbols: 1+, 1-, 123/, etc. (make-regexp "^[0-9]+[[:graph:]]+$" regexp/icase)) (define %elisp-special-symbol-chars ;; Characters that need to be backslash-escaped within an Elisp symbol (see ;; (elisp) Symbol Type). (char-set-complement (char-set-union char-set:letter+digit (char-set #\- #\+ #\= #\( #\/ #\_ #\~ #\! #\@ #\$ #\% #\^ #\& #\: #\< #\> #\{ #\} #\? #\*)))) (define %elisp-confusable-number-symbols ;; Symbols that must begin with a backslash in order to prevent them from ;; being read as Elisp numbers. (make-regexp (string-append "(^[+-]?[0-9]+(\\.[0-9]*[eE]?(\\+NaN|\\+INF|[0-9]+)?)?$)" "|(^[0-9]+[eE][0-9]+$)"))) (define* (symbol->display-string symbol context #:key elisp?) "Return the most appropriate representation of SYMBOL, resorting to extended symbol notation only when strictly necessary." (let ((str (symbol->string symbol))) (if elisp? (let ((str* (list->string (string-fold-right (lambda (chr lst) (if (char-set-contains? %elisp-special-symbol-chars chr) (cons* #\\ chr lst) (cons chr lst))) '() str)))) (if (regexp-exec %elisp-confusable-number-symbols str*) (string-append "\\" str*) str*)) (if (regexp-exec %special-non-extended-symbols str) str ;no need for the #{...}# notation (object->string symbol))))) (define %elisp-basic-chars ;; Characters that can safely be specified using the Elisp character read ;; syntax without backslash-escapes. (char-set-union char-set:letter+digit (char-set #\~ #\! #\@ #\$ #\% #\^ #\& #\* #\- #\_ #\= #\+ #\{ #\} #\/ #\? #\< #\>))) (define %elisp-simple-escape-chars ;; Whitespace, control, and other special characters that can be specified ;; using the `?\X' Elisp read syntax, where X is a single character that has ;; a special meaning. (char-set #\alarm #\backspace #\tab #\newline #\vtab #\page #\return #\esc #\space #\\ #\delete)) (define (atom->elisp-string obj) "Return a string representation of atom OBJ that is suitable for the Emacs Lisp reader. Pairs and arrays should be serialized with `pretty-print-with-comments' instead." (match obj (#t "t") (() "()") ((? nil?) "nil") ((? char?) (cond ((char-set-contains? %elisp-basic-chars obj) (list->string (list #\? obj))) ((char-set-contains? %elisp-simple-escape-chars obj) (list->string (list #\? #\\ (case obj ((#\alarm) #\a) ((#\backspace) #\b) ((#\tab) #\t) ((#\newline) #\n) ((#\vtab) #\v) ((#\page) #\f) ((#\return) #\r) ((#\esc) #\e) ((#\space) #\s) ((#\\) #\\) ((#\delete) #\d))))) (else (let ((num (char->integer obj))) (if (<= num 65535) (format #f "?\\u~4,'0x" num) (format #f "?\\U~:@(~8,'0x~)" num)))))) ((? string?) (printed-string obj '() #:elisp? #t)) ((? symbol?) (symbol->display-string obj '() #:elisp? #t)) ((? keyword?) (string-append ":" (symbol->display-string (keyword->symbol obj) '() #:elisp? #t))) ((? number? num) (match num ((? exact-integer?) ;; E.g., 123 (object->string num)) ((? exact?) ;; E.g., 1/2 (object->string (exact->inexact num))) ((? rational?) ;; E.g., 1.5 (object->string num)) ((? nan?) ;; Not implemented by `read-elisp'. "0.0e+NaN") ((? inf?) ;; Not implemented by `read-elisp'. (if (negative? num) "-1.0e+INF" "1.0e+INF")) (_ ;; Complex numbers (raise (formatted-message (G_ "cannot serialize complex number to Elisp: ~a") num))))) (_ ;; Not an atom. (raise (formatted-message (G_ "Error serializing object to Elisp: ~a") obj))))) (define* (pretty-print-with-comments port obj #:key (format-comment (lambda (comment indent) comment)) (format-vertical-space identity) (indent 0) (max-width 78) (long-list 5) (elisp? #f) (special-forms '())) "Pretty-print OBJ to PORT, attempting to at most MAX-WIDTH character columns and assuming the current column is INDENT. Comments present in OBJ are included in the output. Lists longer than LONG-LIST are written as one element per line. Comments are passed through FORMAT-COMMENT before being emitted; a useful value for FORMAT-COMMENT is 'canonicalize-comment'. Vertical space is passed through FORMAT-VERTICAL-SPACE; a useful value of 'canonicalize-vertical-space'. If ELISP? is true, OBJ is printed as Emacs Lisp, simulating the indentation used by Emacs for many common forms. To specify additional rules for special indentation, use SPECIAL-FORMS, an association list where each entry is a pair of the form (SYMBOL . INDENT). When SYMBOL occurs at the beginning of a list in OBJ, the first INDENT expressions after SYMBOL will be indented as arguments and the rest will be indented as body expressions. When ELISP? is true, arguments that cannot be printed on the same line as SYMBOL will be indented 4 columns beyond the base indentation of the enclosing list, and body expressions will be indented 2 columns beyond the base indentation." (define gexp-syntax? (if (not elisp?) (cut memq <> '(gexp ungexp ungexp-native ungexp-splicing ungexp-native-splicing)) (const #f))) (define elisp-syntax? (if elisp? (cut eq? <> 'function) (const #f))) (define (read-syntax? obj) (or (memq obj '(quote unquote unquote-splicing)) (gexp-syntax? obj) (elisp-syntax? obj))) (define (list-of-lists? head tail) ;; Return true if HEAD and TAIL denote a list of lists--e.g., a list of ;; 'let' bindings or an alist. (match head ((thing . _) (and (not (read-syntax? thing)) (match tail (((? pair?) . _) #t) (_ #f)))) (_ #f))) (define list?* (match-lambda (((not (? read-syntax?)) . _) #t) (_ #f))) (define (starts-with-line-comment? lst) ;; Return true if LST starts with a line comment. (match lst ((x . _) (and (comment? x) (not (comment-margin? x)))) (_ #f))) (define (array?* obj) (and (array? obj) (not (string? obj)))) (define (symbol->display-string* symbol context) (symbol->display-string symbol context #:elisp? elisp?)) (define (printed-string* str context) (printed-string str context #:elisp? elisp?)) (define (length* x) ;; Return the length of list or dotted list X. (let lp ((lst x) (len 0)) (match lst (() len) ((not (? pair?)) (+ len 1)) ((head . tail) (lp tail (+ len 1)))))) (define (dotted-list->list exp) (let lp ((lst exp) (acc '())) (match lst (() (reverse acc)) ((not (? pair?)) (lp '() (cons lst acc))) ((head . tail) (lp tail (cons head acc)))))) (let loop ((indent indent) (column indent) (delimited? #t) ;true if comes after a delimiter (context '()) ;list of "parent" symbols (obj obj)) (define (print-sequence context indent column lst delimited? force-newline?) (define dotted? (dotted-list? lst)) (define long? ;; For lists that are function calls, omit heads from long list count, ;; but include them for lists that aren't function calls. (> (+ (length* lst) (if (or dotted? (match context (((not (? symbol?)) . _) #t) ((_ 'quote . _) #t) (_ #f))) 1 0)) long-list)) (let print ((lst lst) (first? #t) (delimited? delimited?) (column column) (unquote? #f) ;end of list when, e.g., `(a b . ,c) (kw? #f)) ;previous item was a keyword (cond ((null? lst) column) ((blank? lst) ;; Comments or whitespace cannot occur at the end of a dotted list. column) ((or unquote? (not (pair? lst))) ;; End of improper list. (let ((newline? (or long? (sequence-would-protrude? (+ column 2 (if unquote? 1 0)) lst) (read-syntax-would-protrude? (+ column 2 (if unquote? 1 0)) lst)))) (if newline? (begin (newline port) (display (make-string indent #\space) port)) (display " " port)) (display ". " port) (when unquote? (display "," port)) (let ((column (+ (if newline? (+ indent 2) (+ column 3)) (if unquote? 1 0)))) (loop indent column #t context lst)))) (else (match lst (('unquote obj) ;; A form like `(a b . ,OBJ) was expanded into (quasiquote (a b ;; unquote OBJ)), which will still be properly expanded by ;; `quasiquote' into (a b . OBJ). (print obj #f #f column #t kw?)) ((item . tail) (define kw-item?* (if elisp? (cond ((keyword? item) #t) ((symbol? item) (string-prefix? ":" (symbol->string item))) (else #f)) (and (keyword? item) (not (eq? item #:allow-other-keys))))) (define newline? ;; Insert a newline if ITEM is itself a list, or if TAIL is ;; long, but only if ITEM is not the first item. Also insert a ;; newline before a keyword, and before a read syntax (e.g., ;; `'', `#~', '#'') that would protrude. We need to test ;; before invocation of `print-sequence' whether the first ITEM ;; would protrude, since INDENT must then be less than usual. ;; We thread the results of that test to here with ;; FORCE-NEWLINE?. (or (and first? force-newline?) (and (or (list?* item) long? (read-syntax-would-protrude? (+ column 1) item) kw-item?*) (or dotted? ;newline after head of improper list (not first?) (and first? (match context (((and (not (? symbol?)) (not (? keyword?))) . _) ;; Allow newline before first item when ;; head of list is not a symbol. ;; E.g.: ;; (use-package foo ;; :bind (("C-c f f" . foo) ;\n ;; :map foo-map ;; ("C-c f g" . foo-status))) #t) ((_ 'quote _ ...) ;; E.g.: ;; '(a ;\n ;; b) #t) (_ #f)))) (not kw?) ;previous ITEM not a keyword (not delimited?) (not (blank? item))))) (when newline? (newline port) (display (make-string indent #\space) port)) (let ((column (if newline? indent column))) (print tail #f (blank? item) (loop indent column (or newline? delimited?) context item) #f kw-item?*)))))))) (define (sequence-would-protrude? indent lst) ;; Return true if elements of LST written at INDENT would protrude ;; beyond MAX-WIDTH. This is implemented as a cheap test with false ;; negatives to avoid actually rendering all of LST. (find (match-lambda ((? string? str) (>= (+ (string-width (printed-string* str '())) 2 indent) max-width)) ((? symbol? symbol) (>= (+ (string-width (symbol->display-string* symbol context)) indent) max-width)) ((? boolean?) (>= (+ 2 indent) max-width)) (() (>= (+ 2 indent) max-width)) (_ ;don't know #f)) (if (dotted-list? lst) (dotted-list->list lst) lst))) (define (read-syntax-would-protrude? indent lst) (match lst ((or ((? read-syntax? syntax) exp) (((? read-syntax? syntax) exp) . _)) (sequence-would-protrude? (+ indent (case syntax ((quote) 1) ((unquote) 1) ((ungexp-splicing) 3) ((ungexp-native-splicing) 3) (else 2))) exp)) (_ #f))) (define (special-form-lead* head) (special-form-lead head context #:elisp? elisp? #:special-forms special-forms)) (define (special-form? head) (special-form-lead* head)) (match obj ((? comment? comment) (if (comment-margin? comment) (begin (display " " port) (display (comment->string (format-comment comment indent)) port)) (begin ;; When already at the beginning of a line, for example because ;; COMMENT follows a margin comment, no need to emit a newline. (unless (= column indent) (newline port) (display (make-string indent #\space) port)) (print-multi-line-comment (comment->string (format-comment comment indent)) indent port))) (display (make-string indent #\space) port) indent) ((? vertical-space? space) (unless delimited? (newline port)) (let loop ((i (vertical-space-height (format-vertical-space space)))) (unless (zero? i) (newline port) (loop (- i 1)))) (display (make-string indent #\space) port) indent) ((? page-break?) (unless delimited? (newline port)) (display #\page port) (newline port) (display (make-string indent #\space) port) indent) (('quote lst) (unless delimited? (display " " port)) (display "'" port) (loop indent (+ column (if delimited? 1 2)) #t (cons 'quote context) lst)) (('quasiquote lst) (unless delimited? (display " " port)) (display "`" port) (loop indent (+ column (if delimited? 1 2)) #t context lst)) (('unquote lst) (unless delimited? (display " " port)) (display "," port) (loop indent (+ column (if delimited? 1 2)) #t context lst)) (('unquote-splicing lst) (unless delimited? (display " " port)) (display ",@" port) (loop indent (+ column (if delimited? 2 3)) #t context lst)) (((? gexp-syntax? head) obj) (unless delimited? (display " " port)) (match head ('gexp (display "#~" port) (loop indent (+ column (if delimited? 2 3)) #t context obj)) ('ungexp (display "#$" port) (loop indent (+ column (if delimited? 2 3)) #t context obj)) ('ungexp-native (display "#+" port) (loop indent (+ column (if delimited? 2 3)) #t context obj)) ('ungexp-splicing (display "#$@" port) (loop indent (+ column (if delimited? 3 4)) #t context obj)) ('ungexp-native-splicing (display "#+@" port) (loop indent (+ column (if delimited? 3 4)) #t context obj)))) (((? elisp-syntax? head) obj) (unless delimited? (display " " port)) (display "#'" port) (loop indent (+ column (if delimited? 2 3)) #t context obj)) (((? special-form? head) arguments ...) ;; Special-case 'let', 'lambda', 'modify-inputs', etc. so the second ;; and following arguments are less indented. (let* ((lead (special-form-lead* head)) (context (cons head context)) (head (symbol->display-string* head (cdr context))) (total (length arguments)) (body (drop arguments (min lead total)))) (unless delimited? (display " " port)) (display "(" port) (display head port) (unless (zero? lead) (display " " port)) ;; Print the first LEAD arguments. (let* ((indent (+ column 2 (if delimited? 0 1))) (old-column column) (column (+ column 1 (if (zero? lead) 0 1) (if delimited? 0 1) (string-length head))) (initial-indent (if elisp? ;; Indent arguments 4 columns, like Emacs (+ old-column 4 (if delimited? 0 1)) column))) (define new-column (let inner ((n lead) (arguments (take arguments (min lead total))) (column column) (newline? #f)) (if (zero? n) (if (null? body) ;no newline when body is empty column (begin (newline port) (display (make-string indent #\space) port) indent)) (match arguments (() column) ((head . tail) (when newline? ;; Print a newline when previous argument was a list. (newline port) (display (make-string initial-indent #\space) port)) (inner (- n 1) tail (loop initial-indent (if newline? initial-indent column) (or newline? (= n lead)) context head) (list?* head))))))) ;; Print the remaining arguments. (let ((column (print-sequence context indent new-column body #t #f))) (display ")" port) (+ column 1))))) ((? array?* obj) ;; Vectors, arrays, bytevectors, bitvectors. (if elisp? (let* ((lst (array->list obj)) (overflow? (>= column max-width)) (column (if overflow? (+ indent 1) (+ column (if delimited? 1 2))))) (if overflow? (begin (newline port) (display (make-string indent #\space) port)) (unless delimited? (display " " port))) (display "[" port) (let ((column (print-sequence context column column lst #t #f))) (display "]" port) (+ column 1))) ;; For Scheme, `object->string' prints the proper Guile syntax for ;; the specific type of array, but with long arrays on one line. (let* ((str (object->string obj)) (len (string-width str))) (if (and (> (+ column 1 len) max-width) (not delimited?)) (begin (newline port) (display (make-string indent #\space) port) (display str port) (+ indent len)) (begin (unless delimited? (display " " port)) (display str port) (+ column (if delimited? 0 1) len)))))) ((head . tail) ;; Lists and improper lists. (let* ((overflow? (>= column max-width)) (column (if overflow? (+ indent 1) (+ column (if delimited? 1 2)))) ;; Newline for `let' bindings, alists, long lists of constants. (newline? (or (and (not (null? tail)) (or (newline-form? head context) (list-of-lists? head tail))) (starts-with-line-comment? tail))) (context (cons head context))) (if overflow? (begin (newline port) (display (make-string indent #\space) port)) (unless delimited? (display " " port))) (display "(" port) (let* ((new-column (loop column column #t context head)) (force-newline? (and (not newline?) (or (read-syntax-would-protrude? (+ new-column 1) tail) (match tail (((and lst ((not (? read-syntax?)) . _)) . _) ;; Newline before initial list ;; argument with long element(s). (sequence-would-protrude? (+ new-column 1) lst)) (_ #f))))) (indent (if (or (>= new-column max-width) force-newline? newline? (not (symbol? head)) (match context ((_ 'quote _ ...) #t) (_ #f)) (dotted-list? (cons head tail)) (sequence-would-protrude? (+ new-column 1) tail)) column (+ new-column 1)))) (when newline? ;; Insert a newline right after HEAD. (newline port) (display (make-string indent #\space) port)) (let ((column (print-sequence context indent (if newline? indent new-column) tail newline? force-newline?))) (display ")" port) (+ column 1))))) (_ (let* ((str (cond ((string? obj) (printed-string* obj context)) ((integer? obj) (if elisp? (atom->elisp-string obj) (integer->string obj context))) ((symbol? obj) (symbol->display-string* obj context)) (else (if elisp? (atom->elisp-string obj) (object->string obj))))) (len (string-width str))) (if (and (> (+ column 1 len) max-width) (not delimited?)) (begin (newline port) (display (make-string indent #\space) port) (display str port) (+ indent len)) (begin (unless delimited? (display " " port)) (display str port) (+ column (if delimited? 0 1) len)))))))) (define (object->string* obj indent . args) "Pretty-print OBJ with INDENT columns as the initial indent. ARGS are passed as-is to 'pretty-print-with-comments'." (call-with-output-string (lambda (port) (apply pretty-print-with-comments port obj #:indent indent args)))) (define* (pretty-print-with-comments/splice port lst #:rest rest) "Write to PORT the expressions and blanks listed in LST." (for-each (lambda (exp) (apply pretty-print-with-comments port exp rest) (unless (blank? exp) (newline port))) lst))