From 649c6f9c8aa994b992f3353d2ad373461ed24d15 Mon Sep 17 00:00:00 2001 From: Naoya Yamashita Date: Sat, 27 Feb 2021 02:55:19 +0900 Subject: [PATCH] Interpret #r"..." as a raw string * src/lread.c (read1): Add new reader symbol, #r", indicates raw string * test/src/lread-tests.el (lread-raw-string-1, lread-raw-string-2, lread-raw-string-usage-1, lread-raw-string-usage-2): Add testcases --- src/lread.c | 67 +++++++++++++++++++++++++++++++++++++++++ test/src/lread-tests.el | 36 ++++++++++++++++++++++ 2 files changed, 103 insertions(+) diff --git a/src/lread.c b/src/lread.c index dea1b232ff..d2d7eee407 100644 --- a/src/lread.c +++ b/src/lread.c @@ -2835,6 +2835,73 @@ read1 (Lisp_Object readcharfun, int *pch, bool first_in_list) case '#': c = READCHAR; + if (c == 'r') + { + c = READCHAR; + if (c == '"') + { + ptrdiff_t count = SPECPDL_INDEX (); + char *read_buffer = stackbuf; + ptrdiff_t read_buffer_size = sizeof stackbuf; + char *heapbuf = NULL; + char *p = read_buffer; + char *end = read_buffer + read_buffer_size; + int ch; + /* True if we saw an escape sequence specifying + a multibyte character. */ + bool force_multibyte = false; + /* True if we saw an escape sequence specifying + a single-byte character. */ + bool force_singlebyte = false; + bool cancel = false; + ptrdiff_t nchars = 0; + + while ((ch = READCHAR) >= 0 + && ch != '\"') + { + if (end - p < MAX_MULTIBYTE_LENGTH) + { + ptrdiff_t offset = p - read_buffer; + read_buffer = grow_read_buffer (read_buffer, offset, + &heapbuf, &read_buffer_size, + count); + p = read_buffer + offset; + end = read_buffer + read_buffer_size; + } + + p += CHAR_STRING (ch, (unsigned char *) p); + if (CHAR_BYTE8_P (ch)) + force_singlebyte = true; + else if (! ASCII_CHAR_P (ch)) + force_multibyte = true; + nchars++; + } + + if (ch < 0) + end_of_file_error (); + + /* If purifying, and string starts with \ newline, + return zero instead. This is for doc strings + that we are really going to find in etc/DOC.nn.nn. */ + if (!NILP (Vpurify_flag) && NILP (Vdoc_file_name) && cancel) + return unbind_to (count, make_fixnum (0)); + + if (! force_multibyte && force_singlebyte) + { + /* READ_BUFFER contains raw 8-bit bytes and no multibyte + forms. Convert it to unibyte. */ + nchars = str_as_unibyte ((unsigned char *) read_buffer, + p - read_buffer); + p = read_buffer + nchars; + } + + Lisp_Object result + = make_specified_string (read_buffer, nchars, p - read_buffer, + (force_multibyte + || (p - read_buffer != nchars))); + return unbind_to (count, result); + } + } if (c == 's') { c = READCHAR; diff --git a/test/src/lread-tests.el b/test/src/lread-tests.el index f2a60bcf32..4357c27ee0 100644 --- a/test/src/lread-tests.el +++ b/test/src/lread-tests.el @@ -28,6 +28,42 @@ (require 'ert) (require 'ert-x) +(ert-deftest lread-raw-string-1 () + (should (string-equal + (read "#r\"\\(?:def\\(?:macro\\|un\\)\\)\"") + "\\(?:def\\(?:macro\\|un\\)\\)"))) + +(ert-deftest lread-raw-string-2 () + (should (string-equal + (read "#r\"\\n\"") + "\\n"))) + +(ert-deftest lread-raw-string-usage-1 () + (should (equal + (let ((str "(defmacro leaf () nil)")) + (string-match "(\\(def\\(?:macro\\|un\\)\\) \\([^ ]+\\)" str) + (list (match-string 1 str) (match-string 2 str))) + '("defmacro" "leaf"))) + + (should (equal + (let ((str "(defmacro leaf () nil)")) + (string-match #r"(\(def\(?:macro\|un\)\) \([^ ]+\)" str) + (list (match-string 1 str) (match-string 2 str))) + '("defmacro" "leaf")))) + +(ert-deftest lread-raw-string-usage-2 () + (should (equal + (let ((str "(def\\macro leaf () nil)")) + (string-match "(\\(def\\\\macro\\) \\([^ ]+\\)" str) + (list (match-string 1 str) (match-string 2 str))) + '("def\\macro" "leaf"))) + + (should (equal + (let ((str "(def\\macro leaf () nil)")) + (string-match #r"(\(def\macro\) \([^ ]+\)" str) + (list (match-string 1 str) (match-string 2 str))) + '("def\\macro" "leaf")))) + (ert-deftest lread-char-number () (should (equal (read "?\\N{U+A817}") #xA817))) -- 2.30.1