all messages for Emacs-related lists mirrored at yhetil.org
 help / color / mirror / code / Atom feed
* [PATCH] Interpret #r"..." as a raw string
@ 2021-02-26 18:18 Naoya Yamashita
  2021-02-26 18:27 ` [External] : " Drew Adams
                   ` (5 more replies)
  0 siblings, 6 replies; 75+ messages in thread
From: Naoya Yamashita @ 2021-02-26 18:18 UTC (permalink / raw)
  To: emacs-devel

[-- Attachment #1: Type: Text/Plain, Size: 674 bytes --]

Hi, all.

I write a patch to allow Emacs reader interpret raw string.

As you know, we already has some special marker using `#` to make
Emacs reader work in a special way.  For example, we have `#[` to
indicate byte-compiled object and `#s(` to indicate hash-table.

I introduce raw string using this architecture, if users put `#r`
before string, Emacs reader interpret it as a raw string.

Many programming language has a Raw string feature[^1], so I want to
use raw string in Emacs-lisp.

To see more concrete example, please see the attached patch testcases.


^1: https://en.wikipedia.org/wiki/Comparison_of_programming_languages_(strings)#Quoted_raw


Regards,
Naoya

[-- Attachment #2: 0001-Interpret-r-.-as-a-raw-string.patch --]
[-- Type: Text/X-Patch, Size: 4546 bytes --]

From 649c6f9c8aa994b992f3353d2ad373461ed24d15 Mon Sep 17 00:00:00 2001
From: Naoya Yamashita <conao3@gmail.com>
Date: Sat, 27 Feb 2021 02:55:19 +0900
Subject: [PATCH] Interpret #r"..." as a raw string

* src/lread.c (read1): Add new reader symbol, #r", indicates raw string
* test/src/lread-tests.el (lread-raw-string-1, lread-raw-string-2,
lread-raw-string-usage-1, lread-raw-string-usage-2): Add testcases
---
 src/lread.c             | 67 +++++++++++++++++++++++++++++++++++++++++
 test/src/lread-tests.el | 36 ++++++++++++++++++++++
 2 files changed, 103 insertions(+)

diff --git a/src/lread.c b/src/lread.c
index dea1b232ff..d2d7eee407 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -2835,6 +2835,73 @@ read1 (Lisp_Object readcharfun, int *pch, bool first_in_list)
 
     case '#':
       c = READCHAR;
+      if (c == 'r')
+	{
+	  c = READCHAR;
+	  if (c == '"')
+	    {
+	      ptrdiff_t count = SPECPDL_INDEX ();
+	      char *read_buffer = stackbuf;
+	      ptrdiff_t read_buffer_size = sizeof stackbuf;
+	      char *heapbuf = NULL;
+	      char *p = read_buffer;
+	      char *end = read_buffer + read_buffer_size;
+	      int ch;
+	      /* True if we saw an escape sequence specifying
+		 a multibyte character.  */
+	      bool force_multibyte = false;
+	      /* True if we saw an escape sequence specifying
+		 a single-byte character.  */
+	      bool force_singlebyte = false;
+	      bool cancel = false;
+	      ptrdiff_t nchars = 0;
+
+	      while ((ch = READCHAR) >= 0
+		     && ch != '\"')
+		{
+		  if (end - p < MAX_MULTIBYTE_LENGTH)
+		    {
+		      ptrdiff_t offset = p - read_buffer;
+		      read_buffer = grow_read_buffer (read_buffer, offset,
+						      &heapbuf, &read_buffer_size,
+						      count);
+		      p = read_buffer + offset;
+		      end = read_buffer + read_buffer_size;
+		    }
+
+		  p += CHAR_STRING (ch, (unsigned char *) p);
+		  if (CHAR_BYTE8_P (ch))
+		    force_singlebyte = true;
+		  else if (! ASCII_CHAR_P (ch))
+		    force_multibyte = true;
+		  nchars++;
+		}
+
+	      if (ch < 0)
+		end_of_file_error ();
+
+	      /* If purifying, and string starts with \ newline,
+		 return zero instead.  This is for doc strings
+		 that we are really going to find in etc/DOC.nn.nn.  */
+	      if (!NILP (Vpurify_flag) && NILP (Vdoc_file_name) && cancel)
+		return unbind_to (count, make_fixnum (0));
+
+	      if (! force_multibyte && force_singlebyte)
+		{
+		  /* READ_BUFFER contains raw 8-bit bytes and no multibyte
+		     forms.  Convert it to unibyte.  */
+		  nchars = str_as_unibyte ((unsigned char *) read_buffer,
+					   p - read_buffer);
+		  p = read_buffer + nchars;
+		}
+
+	      Lisp_Object result
+		= make_specified_string (read_buffer, nchars, p - read_buffer,
+					 (force_multibyte
+					  || (p - read_buffer != nchars)));
+	      return unbind_to (count, result);
+	    }
+	}
       if (c == 's')
 	{
 	  c = READCHAR;
diff --git a/test/src/lread-tests.el b/test/src/lread-tests.el
index f2a60bcf32..4357c27ee0 100644
--- a/test/src/lread-tests.el
+++ b/test/src/lread-tests.el
@@ -28,6 +28,42 @@
 (require 'ert)
 (require 'ert-x)
 
+(ert-deftest lread-raw-string-1 ()
+  (should (string-equal
+           (read "#r\"\\(?:def\\(?:macro\\|un\\)\\)\"")
+           "\\(?:def\\(?:macro\\|un\\)\\)")))
+
+(ert-deftest lread-raw-string-2 ()
+  (should (string-equal
+           (read "#r\"\\n\"")
+           "\\n")))
+
+(ert-deftest lread-raw-string-usage-1 ()
+  (should (equal
+           (let ((str "(defmacro leaf () nil)"))
+             (string-match "(\\(def\\(?:macro\\|un\\)\\) \\([^ ]+\\)" str)
+             (list (match-string 1 str) (match-string 2 str)))
+           '("defmacro" "leaf")))
+
+  (should (equal
+           (let ((str "(defmacro leaf () nil)"))
+             (string-match #r"(\(def\(?:macro\|un\)\) \([^ ]+\)" str)
+             (list (match-string 1 str) (match-string 2 str)))
+           '("defmacro" "leaf"))))
+
+(ert-deftest lread-raw-string-usage-2 ()
+  (should (equal
+           (let ((str "(def\\macro leaf () nil)"))
+             (string-match "(\\(def\\\\macro\\) \\([^ ]+\\)" str)
+             (list (match-string 1 str) (match-string 2 str)))
+           '("def\\macro" "leaf")))
+
+  (should (equal
+           (let ((str "(def\\macro leaf () nil)"))
+             (string-match #r"(\(def\macro\) \([^ ]+\)" str)
+             (list (match-string 1 str) (match-string 2 str)))
+           '("def\\macro" "leaf"))))
+
 (ert-deftest lread-char-number ()
   (should (equal (read "?\\N{U+A817}") #xA817)))
 
-- 
2.30.1


^ permalink raw reply related	[flat|nested] 75+ messages in thread

end of thread, other threads:[~2021-03-07 17:20 UTC | newest]

Thread overview: 75+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-02-26 18:18 [PATCH] Interpret #r"..." as a raw string Naoya Yamashita
2021-02-26 18:27 ` [External] : " Drew Adams
2021-02-26 18:53   ` Naoya Yamashita
2021-02-26 19:03     ` Drew Adams
2021-02-26 19:48     ` Stefan Monnier
2021-02-26 20:23       ` Naoya Yamashita
2021-02-26 20:34         ` Andreas Schwab
2021-02-26 20:39           ` Naoya Yamashita
2021-02-26 20:45             ` Andreas Schwab
2021-02-26 20:50               ` Naoya Yamashita
2021-02-26 20:54                 ` Andreas Schwab
2021-02-26 20:03     ` Eli Zaretskii
2021-02-26 20:34       ` Naoya Yamashita
2021-02-26 19:09 ` Andreas Schwab
2021-02-26 20:00 ` Eli Zaretskii
2021-02-27  0:39   ` Daniel Brooks
2021-02-27 16:14     ` Richard Stallman
2021-02-27 16:18       ` Stefan Monnier
2021-03-01  5:19         ` Richard Stallman
2021-03-02  5:45           ` Matt Armstrong
2021-03-03  5:53             ` Richard Stallman
2021-03-03  6:14               ` Daniel Brooks
2021-03-03  7:00               ` Eli Zaretskii
2021-03-04  2:47                 ` Matt Armstrong
2021-03-04 13:49                   ` Eli Zaretskii
2021-03-04 16:55                     ` Matt Armstrong
2021-03-05  5:44                       ` Richard Stallman
2021-03-05  5:39                   ` Richard Stallman
2021-03-05  8:01                     ` Eli Zaretskii
2021-03-06  5:13                       ` Richard Stallman
2021-03-06  6:04                         ` Matt Armstrong
2021-03-07  6:13                           ` Richard Stallman
2021-03-07 17:20                             ` [External] : " Drew Adams
2021-03-06  8:27                         ` Eli Zaretskii
2021-03-06  9:51                           ` Daniel Brooks
2021-03-06 10:24                             ` Eli Zaretskii
2021-03-07  6:08                           ` Richard Stallman
2021-02-27 20:41       ` Daniel Brooks
2021-02-28  6:22 ` Zhu Zihao
2021-03-01  5:26   ` Richard Stallman
2021-03-01 12:06 ` Alan Mackenzie
2021-03-01 12:13   ` Andreas Schwab
2021-03-02  5:59   ` Matt Armstrong
2021-03-02  9:56     ` Daniel Brooks
2021-03-02 10:13       ` Andreas Schwab
2021-03-02 10:55         ` Daniel Brooks
2021-03-02 11:18           ` Andreas Schwab
2021-03-02 11:26             ` Daniel Brooks
2021-03-02 11:14       ` Alan Mackenzie
2021-03-02 11:52         ` Daniel Brooks
2021-03-02 12:01     ` Dmitry Gutov
2021-03-02 14:14       ` Alan Mackenzie
2021-03-02 14:32         ` Dmitry Gutov
2021-03-02 15:06           ` Alan Mackenzie
2021-03-02 11:41 ` Aurélien Aptel
2021-03-02 13:49   ` Stefan Monnier
2021-03-02 14:46     ` Aurélien Aptel
2021-03-02 15:11       ` Stefan Monnier
2021-03-02 16:07         ` Aurélien Aptel
2021-03-03  7:31           ` Alfred M. Szmidt
2021-03-03 16:02           ` Stefan Monnier
2021-03-02 20:36     ` Daniel Brooks
2021-03-03  0:27       ` Stefan Monnier
2021-03-03  0:42         ` Daniel Brooks
2021-03-03  8:16       ` Andreas Schwab
2021-03-03  9:25         ` Daniel Brooks
2021-03-03  9:29           ` Andreas Schwab
2021-03-03 10:02             ` Daniel Brooks
2021-03-03 10:11               ` Daniel Brooks
2021-03-03 10:14                 ` Andreas Schwab
2021-03-03 11:48                   ` Daniel Brooks
2021-03-03 10:12       ` Michael Albinus
2021-03-03 10:42         ` Daniel Brooks
2021-03-03 10:49           ` Michael Albinus
2021-03-03 16:12           ` Stefan Monnier

Code repositories for project(s) associated with this external index

	https://git.savannah.gnu.org/cgit/emacs.git
	https://git.savannah.gnu.org/cgit/emacs/org-mode.git

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.