unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
* [PATCH] Interpret #r"..." as a raw string
@ 2021-02-26 18:18 Naoya Yamashita
  2021-02-26 18:27 ` [External] : " Drew Adams
                   ` (5 more replies)
  0 siblings, 6 replies; 75+ messages in thread
From: Naoya Yamashita @ 2021-02-26 18:18 UTC (permalink / raw)
  To: emacs-devel

[-- Attachment #1: Type: Text/Plain, Size: 674 bytes --]

Hi, all.

I write a patch to allow Emacs reader interpret raw string.

As you know, we already has some special marker using `#` to make
Emacs reader work in a special way.  For example, we have `#[` to
indicate byte-compiled object and `#s(` to indicate hash-table.

I introduce raw string using this architecture, if users put `#r`
before string, Emacs reader interpret it as a raw string.

Many programming language has a Raw string feature[^1], so I want to
use raw string in Emacs-lisp.

To see more concrete example, please see the attached patch testcases.


^1: https://en.wikipedia.org/wiki/Comparison_of_programming_languages_(strings)#Quoted_raw


Regards,
Naoya

[-- Attachment #2: 0001-Interpret-r-.-as-a-raw-string.patch --]
[-- Type: Text/X-Patch, Size: 4546 bytes --]

From 649c6f9c8aa994b992f3353d2ad373461ed24d15 Mon Sep 17 00:00:00 2001
From: Naoya Yamashita <conao3@gmail.com>
Date: Sat, 27 Feb 2021 02:55:19 +0900
Subject: [PATCH] Interpret #r"..." as a raw string

* src/lread.c (read1): Add new reader symbol, #r", indicates raw string
* test/src/lread-tests.el (lread-raw-string-1, lread-raw-string-2,
lread-raw-string-usage-1, lread-raw-string-usage-2): Add testcases
---
 src/lread.c             | 67 +++++++++++++++++++++++++++++++++++++++++
 test/src/lread-tests.el | 36 ++++++++++++++++++++++
 2 files changed, 103 insertions(+)

diff --git a/src/lread.c b/src/lread.c
index dea1b232ff..d2d7eee407 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -2835,6 +2835,73 @@ read1 (Lisp_Object readcharfun, int *pch, bool first_in_list)
 
     case '#':
       c = READCHAR;
+      if (c == 'r')
+	{
+	  c = READCHAR;
+	  if (c == '"')
+	    {
+	      ptrdiff_t count = SPECPDL_INDEX ();
+	      char *read_buffer = stackbuf;
+	      ptrdiff_t read_buffer_size = sizeof stackbuf;
+	      char *heapbuf = NULL;
+	      char *p = read_buffer;
+	      char *end = read_buffer + read_buffer_size;
+	      int ch;
+	      /* True if we saw an escape sequence specifying
+		 a multibyte character.  */
+	      bool force_multibyte = false;
+	      /* True if we saw an escape sequence specifying
+		 a single-byte character.  */
+	      bool force_singlebyte = false;
+	      bool cancel = false;
+	      ptrdiff_t nchars = 0;
+
+	      while ((ch = READCHAR) >= 0
+		     && ch != '\"')
+		{
+		  if (end - p < MAX_MULTIBYTE_LENGTH)
+		    {
+		      ptrdiff_t offset = p - read_buffer;
+		      read_buffer = grow_read_buffer (read_buffer, offset,
+						      &heapbuf, &read_buffer_size,
+						      count);
+		      p = read_buffer + offset;
+		      end = read_buffer + read_buffer_size;
+		    }
+
+		  p += CHAR_STRING (ch, (unsigned char *) p);
+		  if (CHAR_BYTE8_P (ch))
+		    force_singlebyte = true;
+		  else if (! ASCII_CHAR_P (ch))
+		    force_multibyte = true;
+		  nchars++;
+		}
+
+	      if (ch < 0)
+		end_of_file_error ();
+
+	      /* If purifying, and string starts with \ newline,
+		 return zero instead.  This is for doc strings
+		 that we are really going to find in etc/DOC.nn.nn.  */
+	      if (!NILP (Vpurify_flag) && NILP (Vdoc_file_name) && cancel)
+		return unbind_to (count, make_fixnum (0));
+
+	      if (! force_multibyte && force_singlebyte)
+		{
+		  /* READ_BUFFER contains raw 8-bit bytes and no multibyte
+		     forms.  Convert it to unibyte.  */
+		  nchars = str_as_unibyte ((unsigned char *) read_buffer,
+					   p - read_buffer);
+		  p = read_buffer + nchars;
+		}
+
+	      Lisp_Object result
+		= make_specified_string (read_buffer, nchars, p - read_buffer,
+					 (force_multibyte
+					  || (p - read_buffer != nchars)));
+	      return unbind_to (count, result);
+	    }
+	}
       if (c == 's')
 	{
 	  c = READCHAR;
diff --git a/test/src/lread-tests.el b/test/src/lread-tests.el
index f2a60bcf32..4357c27ee0 100644
--- a/test/src/lread-tests.el
+++ b/test/src/lread-tests.el
@@ -28,6 +28,42 @@
 (require 'ert)
 (require 'ert-x)
 
+(ert-deftest lread-raw-string-1 ()
+  (should (string-equal
+           (read "#r\"\\(?:def\\(?:macro\\|un\\)\\)\"")
+           "\\(?:def\\(?:macro\\|un\\)\\)")))
+
+(ert-deftest lread-raw-string-2 ()
+  (should (string-equal
+           (read "#r\"\\n\"")
+           "\\n")))
+
+(ert-deftest lread-raw-string-usage-1 ()
+  (should (equal
+           (let ((str "(defmacro leaf () nil)"))
+             (string-match "(\\(def\\(?:macro\\|un\\)\\) \\([^ ]+\\)" str)
+             (list (match-string 1 str) (match-string 2 str)))
+           '("defmacro" "leaf")))
+
+  (should (equal
+           (let ((str "(defmacro leaf () nil)"))
+             (string-match #r"(\(def\(?:macro\|un\)\) \([^ ]+\)" str)
+             (list (match-string 1 str) (match-string 2 str)))
+           '("defmacro" "leaf"))))
+
+(ert-deftest lread-raw-string-usage-2 ()
+  (should (equal
+           (let ((str "(def\\macro leaf () nil)"))
+             (string-match "(\\(def\\\\macro\\) \\([^ ]+\\)" str)
+             (list (match-string 1 str) (match-string 2 str)))
+           '("def\\macro" "leaf")))
+
+  (should (equal
+           (let ((str "(def\\macro leaf () nil)"))
+             (string-match #r"(\(def\macro\) \([^ ]+\)" str)
+             (list (match-string 1 str) (match-string 2 str)))
+           '("def\\macro" "leaf"))))
+
 (ert-deftest lread-char-number ()
   (should (equal (read "?\\N{U+A817}") #xA817)))
 
-- 
2.30.1


^ permalink raw reply related	[flat|nested] 75+ messages in thread

* RE: [External] : [PATCH] Interpret #r"..." as a raw string
  2021-02-26 18:18 [PATCH] Interpret #r"..." as a raw string Naoya Yamashita
@ 2021-02-26 18:27 ` Drew Adams
  2021-02-26 18:53   ` Naoya Yamashita
  2021-02-26 19:09 ` Andreas Schwab
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 75+ messages in thread
From: Drew Adams @ 2021-02-26 18:27 UTC (permalink / raw)
  To: Naoya Yamashita, emacs-devel@gnu.org

> I introduce raw string using this architecture, if users put `#r`
> before string, Emacs reader interpret it as a raw string.
> 
> Many programming language has a Raw string feature[^1], so I want to
> use raw string in Emacs-lisp.

Why?  Is the reason just because "many programming
languages" have a raw-string data type?  Why would
that, by itself, be a good reason for Emacs Lisp
to have such a data type?

I'm guessing you have some other reasons.  What?



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [External] : [PATCH] Interpret #r"..." as a raw string
  2021-02-26 18:27 ` [External] : " Drew Adams
@ 2021-02-26 18:53   ` Naoya Yamashita
  2021-02-26 19:03     ` Drew Adams
                       ` (2 more replies)
  0 siblings, 3 replies; 75+ messages in thread
From: Naoya Yamashita @ 2021-02-26 18:53 UTC (permalink / raw)
  To: drew.adams; +Cc: emacs-devel

Thanks drew,

> Why?  Is the reason just because "many programming
> languages" have a raw-string data type?  Why would
> that, by itself, be a good reason for Emacs Lisp
> to have such a data type?

> I'm guessing you have some other reasons.  What?

As my understanding, raw-string is not a data type, but a
notation that does not interpret escape sequences as written by
the user.

Please see backslash info[^1].  This is a reason to support raw string.

    Note that ‘\’ also has special meaning in the read syntax of Lisp
    strings (see String Type), and must be quoted with ‘\’. For
    example, the regular expression that matches the ‘\’ character is
    ‘\\’. To write a Lisp string that contains the characters ‘\\’,
    Lisp syntax requires you to quote each ‘\’ with another
    ‘\’. Therefore, the read syntax for a regular expression matching
    ‘\’ is "\\\\".

If we have a raw-string notation, we can write string as is
without any escape.

See this testcase,

    (ert-deftest lread-raw-string-usage-2 ()
      (should (equal
               (let ((str "(def\\macro leaf () nil)"))
                 (string-match "(\\(def\\\\macro\\) \\([^ ]+\\)" str)
                 (list (match-string 1 str) (match-string 2 str)))
               '("def\\macro" "leaf")))
    
      (should (equal
               (let ((str "(def\\macro leaf () nil)"))
                 (string-match #r"(\(def\macro\) \([^ ]+\)" str)
                 (list (match-string 1 str) (match-string 2 str)))
               '("def\\macro" "leaf"))))

First one is normal Emacs-lisp, if you want to match `\`, you
should write `\\\\` as info say.  Second one uses raw-string
notaion I introduce, if you want to match `\`, you can write just `\`.

[^1]: https://www.gnu.org/software/emacs/manual/html_node/elisp/Regexp-Special.html#Regexp-Special

^ permalink raw reply	[flat|nested] 75+ messages in thread

* RE: [External] : [PATCH] Interpret #r"..." as a raw string
  2021-02-26 18:53   ` Naoya Yamashita
@ 2021-02-26 19:03     ` Drew Adams
  2021-02-26 19:48     ` Stefan Monnier
  2021-02-26 20:03     ` Eli Zaretskii
  2 siblings, 0 replies; 75+ messages in thread
From: Drew Adams @ 2021-02-26 19:03 UTC (permalink / raw)
  To: Naoya Yamashita; +Cc: emacs-devel@gnu.org

> Thanks drew,

Thanks for the reasons.

> As my understanding, raw-string is not a data type, but a
> notation that does not interpret escape sequences as written by
> the user.
> 
> Please see backslash info[^1].  This is a reason to support raw string.
> 
> If we have a raw-string notation, we can write string as is
> without any escape.

Is that the only reason?  Why is that important?

I don't mean to argue.  I don't understand why
this would be useful/needed.

I do realize that learning about escaping \ in
a Lisp string is necessary otherwise, but is
that a big bother?

Is there another reason?  After all, as you said,
many programming languages have raw-string support.
Is _their_ reason for that only a reason similar
to what you state for Lisp (e.g. escaping backslash
chars)?



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-02-26 18:18 [PATCH] Interpret #r"..." as a raw string Naoya Yamashita
  2021-02-26 18:27 ` [External] : " Drew Adams
@ 2021-02-26 19:09 ` Andreas Schwab
  2021-02-26 20:00 ` Eli Zaretskii
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 75+ messages in thread
From: Andreas Schwab @ 2021-02-26 19:09 UTC (permalink / raw)
  To: Naoya Yamashita; +Cc: emacs-devel

You also need to teach forward-sexp and emacs-lisp-mode font-locking
about raw strings.

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [External] : [PATCH] Interpret #r"..." as a raw string
  2021-02-26 18:53   ` Naoya Yamashita
  2021-02-26 19:03     ` Drew Adams
@ 2021-02-26 19:48     ` Stefan Monnier
  2021-02-26 20:23       ` Naoya Yamashita
  2021-02-26 20:03     ` Eli Zaretskii
  2 siblings, 1 reply; 75+ messages in thread
From: Stefan Monnier @ 2021-02-26 19:48 UTC (permalink / raw)
  To: Naoya Yamashita; +Cc: drew.adams, emacs-devel

>     (ert-deftest lread-raw-string-usage-2 ()
>       (should (equal
>                (let ((str "(def\\macro leaf () nil)"))
>                  (string-match "(\\(def\\\\macro\\) \\([^ ]+\\)" str)
>                  (list (match-string 1 str) (match-string 2 str)))
>                '("def\\macro" "leaf")))
>     
>       (should (equal
>                (let ((str "(def\\macro leaf () nil)"))
>                  (string-match #r"(\(def\macro\) \([^ ]+\)" str)
>                  (list (match-string 1 str) (match-string 2 str)))
>                '("def\\macro" "leaf"))))

[ Note the above has a bug: the raw-string regexp equivalent to
  "(\\(def\\\\macro\\) \\([^ ]+\\)" is #r"(\(def\\macro\) \([^ ]+\)"
  notice the double backslash between "def" and "macro".  ]

For regexps, this has been discussed to death already, but a better
option is arguably to introduce a macro that converts from the
non-backslashed regexp style to the backslashed regexp style.
That would require even fewer backslashes in most cases (tho not in
this example because of the presence of a literal \ in the regexp).

    (should (equal
             (let ((str "(def\\macro leaf () nil)"))
               (string-match (re "\\((def\\\\macro) ([^ ]+)" str)
               (list (match-string 1 str) (match-string 2 str)))
             '("def\\macro" "leaf"))))

IOW, I think regexps are a poor motivation to introduce raw strings.


        Stefan




^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-02-26 18:18 [PATCH] Interpret #r"..." as a raw string Naoya Yamashita
  2021-02-26 18:27 ` [External] : " Drew Adams
  2021-02-26 19:09 ` Andreas Schwab
@ 2021-02-26 20:00 ` Eli Zaretskii
  2021-02-27  0:39   ` Daniel Brooks
  2021-02-28  6:22 ` Zhu Zihao
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 75+ messages in thread
From: Eli Zaretskii @ 2021-02-26 20:00 UTC (permalink / raw)
  To: Naoya Yamashita; +Cc: emacs-devel

> Date: Sat, 27 Feb 2021 03:18:57 +0900 (JST)
> From: Naoya Yamashita <conao3@gmail.com>
> 
> I write a patch to allow Emacs reader interpret raw string.

What is a "raw string", and how does it differ from regular Lisp
strings?

Thanks.



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [External] : [PATCH] Interpret #r"..." as a raw string
  2021-02-26 18:53   ` Naoya Yamashita
  2021-02-26 19:03     ` Drew Adams
  2021-02-26 19:48     ` Stefan Monnier
@ 2021-02-26 20:03     ` Eli Zaretskii
  2021-02-26 20:34       ` Naoya Yamashita
  2 siblings, 1 reply; 75+ messages in thread
From: Eli Zaretskii @ 2021-02-26 20:03 UTC (permalink / raw)
  To: Naoya Yamashita; +Cc: drew.adams, emacs-devel

> Date: Sat, 27 Feb 2021 03:53:36 +0900 (JST)
> From: Naoya Yamashita <conao3@gmail.com>
> Cc: emacs-devel@gnu.org
> 
> As my understanding, raw-string is not a data type, but a
> notation that does not interpret escape sequences as written by
> the user.

So this is just to allow a string be specified without escaping a
backslash with more backslashes?



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [External] : [PATCH] Interpret #r"..." as a raw string
  2021-02-26 19:48     ` Stefan Monnier
@ 2021-02-26 20:23       ` Naoya Yamashita
  2021-02-26 20:34         ` Andreas Schwab
  0 siblings, 1 reply; 75+ messages in thread
From: Naoya Yamashita @ 2021-02-26 20:23 UTC (permalink / raw)
  To: monnier; +Cc: drew.adams, emacs-devel

Thanks Stefan,

>>     (ert-deftest lread-raw-string-usage-2 ()
>>       (should (equal
>>                (let ((str "(def\\macro leaf () nil)"))
>>                  (string-match "(\\(def\\\\macro\\) \\([^ ]+\\)" str)
>>                  (list (match-string 1 str) (match-string 2 str)))
>>                '("def\\macro" "leaf")))
>>     
>>       (should (equal
>>                (let ((str "(def\\macro leaf () nil)"))
>>                  (string-match #r"(\(def\macro\) \([^ ]+\)" str)
>>                  (list (match-string 1 str) (match-string 2 str)))
>>                '("def\\macro" "leaf"))))
> 
> [ Note the above has a bug: the raw-string regexp equivalent to
>   "(\\(def\\\\macro\\) \\([^ ]+\\)" is #r"(\(def\\macro\) \([^ ]+\)"
>   notice the double backslash between "def" and "macro".  ]

I think it has no bug, all test I introduce is passed by my patched Emacs.

    the raw-string regexp equivalent to
    "(\\(def\\\\macro\\) \\([^ ]+\\)" is #r"(\(def\\macro\) \([^ ]+\)

Yes, I agree, but

    notice the double backslash between "def" and "macro".

I have question on it.  You 'see' double backslash between "def"
and "macro" but actually one backslash is placed between "def" and "macro"

If I wrote test string with raw-string notation, it shold be below.

    (should (equal
             (let ((str #r"(def\macro leaf () nil)"))
               (string-match #r"(\(def\macro\) \([^ ]+\)" str)
               (list (match-string 1 str) (match-string 2 str)))
             '(#r"def\macro" "leaf")))

Wow, this is easy to understand.  We can now write the matching
string as a literal as is!

> For regexps, this has been discussed to death already, but a better
> option is arguably to introduce a macro that converts from the
> non-backslashed regexp style to the backslashed regexp style.
> That would require even fewer backslashes in most cases (tho not in
> this example because of the presence of a literal \ in the regexp).

As my understanding, macro is not useful to allow raw-string notion.
We need reader-macro instead?  But we don't have a reader-macro in Elisp.

I think it is better by introducing reader-macro and making the
reader user-extensible, it would be a straightforward way to
define this raw-string notation in user-space, but the job was
too big, so I added just this process to the reader.

> IOW, I think regexps are a poor motivation to introduce raw strings.

Thanks, but many language support this sort of notation to easy
write string literal without thinking any escape.
I want to still want to use also in Elisp.



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [External] : [PATCH] Interpret #r"..." as a raw string
  2021-02-26 20:03     ` Eli Zaretskii
@ 2021-02-26 20:34       ` Naoya Yamashita
  0 siblings, 0 replies; 75+ messages in thread
From: Naoya Yamashita @ 2021-02-26 20:34 UTC (permalink / raw)
  To: eliz; +Cc: drew.adams, emacs-devel

Thanks, Eli.

>> As my understanding, raw-string is not a data type, but a
>> notation that does not interpret escape sequences as written by
>> the user.
> 
> So this is just to allow a string be specified without escaping a
> backslash with more backslashes?

Yes.  But not only escaping backslash.  Raw string notation
ignore all backslack escaping.



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [External] : [PATCH] Interpret #r"..." as a raw string
  2021-02-26 20:23       ` Naoya Yamashita
@ 2021-02-26 20:34         ` Andreas Schwab
  2021-02-26 20:39           ` Naoya Yamashita
  0 siblings, 1 reply; 75+ messages in thread
From: Andreas Schwab @ 2021-02-26 20:34 UTC (permalink / raw)
  To: Naoya Yamashita; +Cc: monnier, drew.adams, emacs-devel

On Feb 27 2021, Naoya Yamashita wrote:

> If I wrote test string with raw-string notation, it shold be below.
>
>     (should (equal
>              (let ((str #r"(def\macro leaf () nil)"))
>                (string-match #r"(\(def\macro\) \([^ ]+\)" str)
>                (list (match-string 1 str) (match-string 2 str)))
>              '(#r"def\macro" "leaf")))
>
> Wow, this is easy to understand.  We can now write the matching
> string as a literal as is!

\m is not a valid regex escape.  It surely won't match a backslash.

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [External] : [PATCH] Interpret #r"..." as a raw string
  2021-02-26 20:34         ` Andreas Schwab
@ 2021-02-26 20:39           ` Naoya Yamashita
  2021-02-26 20:45             ` Andreas Schwab
  0 siblings, 1 reply; 75+ messages in thread
From: Naoya Yamashita @ 2021-02-26 20:39 UTC (permalink / raw)
  To: schwab; +Cc: monnier, drew.adams, emacs-devel

Thanks, Andreas.

> \m is not a valid regex escape.  It surely won't match a backslash.

Please test my patch; patch your HEAD Emacs and see the testcase pass.
I mentioned all testcase is green on my patched Emacs.

I try to answer your question, in my raw-string notion, all
backslash is literaly literal char.  If we write #r"def\macro",
it is literaly `def\macro`.  It is not escape.



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [External] : [PATCH] Interpret #r"..." as a raw string
  2021-02-26 20:39           ` Naoya Yamashita
@ 2021-02-26 20:45             ` Andreas Schwab
  2021-02-26 20:50               ` Naoya Yamashita
  0 siblings, 1 reply; 75+ messages in thread
From: Andreas Schwab @ 2021-02-26 20:45 UTC (permalink / raw)
  To: Naoya Yamashita; +Cc: monnier, drew.adams, emacs-devel

On Feb 27 2021, Naoya Yamashita wrote:

> I try to answer your question, in my raw-string notion, all
> backslash is literaly literal char.  If we write #r"def\macro",
> it is literaly `def\macro`.  It is not escape.

But \m is _not_ a valid regexp.  It has undefined behaviour.

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [External] : [PATCH] Interpret #r"..." as a raw string
  2021-02-26 20:45             ` Andreas Schwab
@ 2021-02-26 20:50               ` Naoya Yamashita
  2021-02-26 20:54                 ` Andreas Schwab
  0 siblings, 1 reply; 75+ messages in thread
From: Naoya Yamashita @ 2021-02-26 20:50 UTC (permalink / raw)
  To: schwab; +Cc: monnier, drew.adams, emacs-devel

> But \m is _not_ a valid regexp.  It has undefined behaviour.

It is not escaped char.  It just literal string.
Please try below snippet on patched Emacs.
(patch is attached my first E-mail)

    (string= #r"def\macro" "def\\macro")
    ;;=> t



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [External] : [PATCH] Interpret #r"..." as a raw string
  2021-02-26 20:50               ` Naoya Yamashita
@ 2021-02-26 20:54                 ` Andreas Schwab
  0 siblings, 0 replies; 75+ messages in thread
From: Andreas Schwab @ 2021-02-26 20:54 UTC (permalink / raw)
  To: Naoya Yamashita; +Cc: monnier, drew.adams, emacs-devel

On Feb 27 2021, Naoya Yamashita wrote:

>> But \m is _not_ a valid regexp.  It has undefined behaviour.
>
> It is not escaped char.  It just literal string.

You use it as a regexp, so it needs to have regexp syntax.  And \m is
_not_ a valid regexp syntax.

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-02-26 20:00 ` Eli Zaretskii
@ 2021-02-27  0:39   ` Daniel Brooks
  2021-02-27 16:14     ` Richard Stallman
  0 siblings, 1 reply; 75+ messages in thread
From: Daniel Brooks @ 2021-02-27  0:39 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: Naoya Yamashita, emacs-devel

Eli Zaretskii <eliz@gnu.org> writes:

>> Date: Sat, 27 Feb 2021 03:18:57 +0900 (JST)
>> From: Naoya Yamashita <conao3@gmail.com>
>> 
>> I write a patch to allow Emacs reader interpret raw string.
>
> What is a "raw string", and how does it differ from regular Lisp
> strings?
>
> Thanks.

Many languages have multiple string types because they simplify the
process of writing strings that contain quotation characters,
backslashes, or other syntax such as interpolation.

Think of sh, where double–quoted strings allow substitutions, while
single–quoted strings do not. The single–quoted strings are similar to
raw strings. Or Perl, where similar but more complex rules apply,
including strings that look like q{foo} and can be delimited by any
punctuation characters. Or Raku, which allows unicode punctuation as
delimiters such as q«foo». Or Rust, where r"foo" is a raw string that
can be delimited not just by double quotes, but also double quotes plus
an arbitrary number of # characters.

For example, suppose I am writing a shell script and I want to print out
an html anchor:

    echo "<a href=\"https://example.com/\">click here for an example</a>"

vs:

    echo '<a href="https://example.com/">click here for an example</a>'

The single–quoted string is nicer because I don’t have to escape the
quotes. Of course, HTML also allows me to use single quotes in place of
double quotes (and with no change of the semantics of the HTML), so
changing them would also be an option. Perhaps an even better example
would be a shell script that emits elisp, where strings must be
double–quoted.

Of course the primary difference between single– and double–quoted
strings in Shell and Perl is interpolation, rather than escape
characters. In Raku this is extended so that there are half a dozen
different features that can be independently turned on or off for any
given quoted item. Q"foo" is a raw string. q"foo" adds the backslash
escape mechanism for concisely representing various characters such as
tabs, newlines, and so on. qq"foo" adds interpolation on top of
escaping. qw"foo bar" and qqw"foo bar" add word splitting, so that you
get not a single string but a list of the words in the string. qx"foo"
is like the backtick syntax in Shell; it runs the quoted item in a
subshell. qqx"foo" does interpolation on it before running it in the
subshell. Heredocs allow for multiline strings. All of these forms allow
you to use arbitrary punctuation characters as delimiters. Then there is
a whole thing with adjectives where you can pick and choose those
features using an even more uniform syntax. And finally regexes are yet
more fun on top of all of that. Raku even has an unquoting mechanism
that is rather similar to the lisp unquote; it allows the nesting of
different string types.

Most languages don’t go to this extreme, but in languages that have raw
strings they are a way to turn off complicated features that you don’t
want to use in every instance.

As written, Naoya’s raw string patch allows the user to turn off string
escaping, but not to chose alternative delimiters (which has little or
no precedent in elisp) or to turn off string interpolation (which isn’t
built in to the elisp syntax, but is instead implemented by library
functions such as format.)

Naoya, your patch looks fairly good to my unpractised eye, but you might
consider adding an error message for malformed expressions such as
#r'foo', where the character after the r isn’t a double quote character.

Probably best to start thinking about how to document the syntax in the
elisp manual too.

Personally, I quite like the idea. Raw strings are useful for a lot more
than just regular expressions.

db48x



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-02-27  0:39   ` Daniel Brooks
@ 2021-02-27 16:14     ` Richard Stallman
  2021-02-27 16:18       ` Stefan Monnier
  2021-02-27 20:41       ` Daniel Brooks
  0 siblings, 2 replies; 75+ messages in thread
From: Richard Stallman @ 2021-02-27 16:14 UTC (permalink / raw)
  To: Daniel Brooks; +Cc: eliz, conao3, emacs-devel

[[[ To any NSA and FBI agents reading my email: please consider    ]]]
[[[ whether defending the US Constitution against all enemies,     ]]]
[[[ foreign or domestic, requires you to follow Snowden's example. ]]]

  > Many languages have multiple string types because they simplify the
  > process of writing strings that contain quotation characters,
  > backslashes, or other syntax such as interpolation.

I'm trying to understand that message but it's unclear
at a deep level.

Is a "raw string" supposed to be a different data type,
or is it supposed to be a different syntax for the string type?

The companison with shell syntax does not help, because I don't think
that question is meaningful for shell syntax.  I don't know the othehr
languages referred to.

-- 
Dr Richard Stallman
Chief GNUisance of the GNU Project (https://gnu.org)
Founder, Free Software Foundation (https://fsf.org)
Internet Hall-of-Famer (https://internethalloffame.org)





^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-02-27 16:14     ` Richard Stallman
@ 2021-02-27 16:18       ` Stefan Monnier
  2021-03-01  5:19         ` Richard Stallman
  2021-02-27 20:41       ` Daniel Brooks
  1 sibling, 1 reply; 75+ messages in thread
From: Stefan Monnier @ 2021-02-27 16:18 UTC (permalink / raw)
  To: Richard Stallman; +Cc: Daniel Brooks, eliz, conao3, emacs-devel

> Is a "raw string" supposed to be a different data type,
> or is it supposed to be a different syntax for the string type?

AFAIK it's usually a different syntax for the same underlying type.


        Stefan




^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-02-27 16:14     ` Richard Stallman
  2021-02-27 16:18       ` Stefan Monnier
@ 2021-02-27 20:41       ` Daniel Brooks
  1 sibling, 0 replies; 75+ messages in thread
From: Daniel Brooks @ 2021-02-27 20:41 UTC (permalink / raw)
  To: Richard Stallman; +Cc: eliz, conao3, emacs-devel

Richard Stallman <rms@gnu.org> writes:

>   > Many languages have multiple string types because they simplify the
>   > process of writing strings that contain quotation characters,
>   > backslashes, or other syntax such as interpolation.
>
> I'm trying to understand that message but it's unclear
> at a deep level.
>
> Is a "raw string" supposed to be a different data type,
> or is it supposed to be a different syntax for the string type?
>
> The companison with shell syntax does not help, because I don't think
> that question is meaningful for shell syntax.  I don't know the othehr
> languages referred to.

It is definitely just a different syntax for the same data type
(although some languages additionally have syntax that distinguishes
between utf8 string strings and byte strings with no particular
encoding; those generally end up as a different data type).

I used shell as an example primarily because it will be familiar to
everyone. It is however unusual that shell has a string syntax which
disables interpolation but not escaping; most languages with raw strings
use them to turn off both features.

db48x



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-02-26 18:18 [PATCH] Interpret #r"..." as a raw string Naoya Yamashita
                   ` (2 preceding siblings ...)
  2021-02-26 20:00 ` Eli Zaretskii
@ 2021-02-28  6:22 ` Zhu Zihao
  2021-03-01  5:26   ` Richard Stallman
  2021-03-01 12:06 ` Alan Mackenzie
  2021-03-02 11:41 ` Aurélien Aptel
  5 siblings, 1 reply; 75+ messages in thread
From: Zhu Zihao @ 2021-02-28  6:22 UTC (permalink / raw)
  To: Naoya Yamashita; +Cc: emacs-devel

[-- Attachment #1: Type: text/plain, Size: 603 bytes --]

Is #r"..." convenient enough?

If I have a string with lots of double quotes, I still have to escape
every duoble quote.

Maybe better to change to #r"..."r#, or even #r<<<<"...">>>>r# (user can
insert any number of < and > but they must be paired to avoid the
escape.)

I'm with raw string reader syntax because I often want my program be
self-documenting, so I write a lot in docstring.

BTW, Naoya, have you test raw string syntax in propertized string?
e.g. #(#r"foo bar" ....)

-- 
Retrieve my PGP public key:

  gpg --recv-keys D47A9C8B2AE3905B563D9135BE42B352A9F6821F

Zihao

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 255 bytes --]

^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-02-27 16:18       ` Stefan Monnier
@ 2021-03-01  5:19         ` Richard Stallman
  2021-03-02  5:45           ` Matt Armstrong
  0 siblings, 1 reply; 75+ messages in thread
From: Richard Stallman @ 2021-03-01  5:19 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: db48x, eliz, conao3, emacs-devel

[[[ To any NSA and FBI agents reading my email: please consider    ]]]
[[[ whether defending the US Constitution against all enemies,     ]]]
[[[ foreign or domestic, requires you to follow Snowden's example. ]]]

  > > Is a "raw string" supposed to be a different data type,
  > > or is it supposed to be a different syntax for the string type?

  > AFAIK it's usually a different syntax for the same underlying type.

Thanks.  I don't see any problem with that.

"Raw string" sounds like a different type of string.
I suggest calling it "raw string syntax" for clarity.

-- 
Dr Richard Stallman
Chief GNUisance of the GNU Project (https://gnu.org)
Founder, Free Software Foundation (https://fsf.org)
Internet Hall-of-Famer (https://internethalloffame.org)





^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-02-28  6:22 ` Zhu Zihao
@ 2021-03-01  5:26   ` Richard Stallman
  0 siblings, 0 replies; 75+ messages in thread
From: Richard Stallman @ 2021-03-01  5:26 UTC (permalink / raw)
  To: Zhu Zihao; +Cc: conao3, emacs-devel

[[[ To any NSA and FBI agents reading my email: please consider    ]]]
[[[ whether defending the US Constitution against all enemies,     ]]]
[[[ foreign or domestic, requires you to follow Snowden's example. ]]]

  > Is #r"..." convenient enough?

  > If I have a string with lots of double quotes, I still have to escape
  > every duoble quote.

  > Maybe better to change to #r"..."r#, or even #r<<<<"...">>>>r# (user can
  > insert any number of < and > but they must be paired to avoid the
  > escape.)

I suggest writing the code to move backwards over various possible syntaxes
to determine which ones work well for backward parsing.
Then choose the syntax based on that.

I've done that in the past.

-- 
Dr Richard Stallman
Chief GNUisance of the GNU Project (https://gnu.org)
Founder, Free Software Foundation (https://fsf.org)
Internet Hall-of-Famer (https://internethalloffame.org)





^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-02-26 18:18 [PATCH] Interpret #r"..." as a raw string Naoya Yamashita
                   ` (3 preceding siblings ...)
  2021-02-28  6:22 ` Zhu Zihao
@ 2021-03-01 12:06 ` Alan Mackenzie
  2021-03-01 12:13   ` Andreas Schwab
  2021-03-02  5:59   ` Matt Armstrong
  2021-03-02 11:41 ` Aurélien Aptel
  5 siblings, 2 replies; 75+ messages in thread
From: Alan Mackenzie @ 2021-03-01 12:06 UTC (permalink / raw)
  To: Naoya Yamashita; +Cc: emacs-devel

Hello, Naoya.

On Sat, Feb 27, 2021 at 03:18:57 +0900, Naoya Yamashita wrote:
> Hi, all.

> I write a patch to allow Emacs reader interpret raw string.

> As you know, we already has some special marker using `#` to make
> Emacs reader work in a special way.  For example, we have `#[` to
> indicate byte-compiled object and `#s(` to indicate hash-table.

> I introduce raw string using this architecture, if users put `#r`
> before string, Emacs reader interpret it as a raw string.

> Many programming language has a Raw string feature[^1], so I want to
> use raw string in Emacs-lisp.

I'm against introducing raw strings into Emacs Lisp.  There just doesn't
seem to be a need, and there are several disadvantages.

Firstly, it would make the language more complicated, and thus more
difficult to learn - "What does this #r mean?".

Raw strings themselves are complicated beasts, more so than your post
suggests.  (I've implemented them for C++ Mode.)

We'd have to decide which characters are valid inside raw strings - for
example, is a linefeed valid?  If so, we'd have to decide how to fontify
a newly opened raw string, which at first would extend to the end of the
buffer.  We'd have to decide how to do fontification when a " gets
inserted into the middle of an already valid raw string.

The " character can't be inserted into a raw string - that limits their
usefulness quite a lot.  Sooner or later, somebody will suggest some
"enhancement" to allow this, increasing the complication even more.
Indeed, this has already happened, with somebody suggesting using
variable length string delimiters, or suchlike.  Such strings could not
be fontified simply by the syntax routines, as they are now.

Then there is the handling of raw strings by the regexp engine, as
Andreas has already pointed out.  There will be confusion in whether or
not a doubled backslash in a regexp has to be written as \\ or \ inside
a raw string.  Currently, we have the invariable \\\\.

> To see more concrete example, please see the attached patch testcases.

> Regards,
> Naoya

[ .... ]

-- 
Alan Mackenzie (Nuremberg, Germany).



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-01 12:06 ` Alan Mackenzie
@ 2021-03-01 12:13   ` Andreas Schwab
  2021-03-02  5:59   ` Matt Armstrong
  1 sibling, 0 replies; 75+ messages in thread
From: Andreas Schwab @ 2021-03-01 12:13 UTC (permalink / raw)
  To: Alan Mackenzie; +Cc: Naoya Yamashita, emacs-devel

On Mär 01 2021, Alan Mackenzie wrote:

> Then there is the handling of raw strings by the regexp engine,

The concept of raw strings only exists at the reader level.  Internally,
at execution time, all strings are alike.

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-01  5:19         ` Richard Stallman
@ 2021-03-02  5:45           ` Matt Armstrong
  2021-03-03  5:53             ` Richard Stallman
  0 siblings, 1 reply; 75+ messages in thread
From: Matt Armstrong @ 2021-03-02  5:45 UTC (permalink / raw)
  To: rms, Stefan Monnier; +Cc: db48x, eliz, conao3, emacs-devel

Richard Stallman <rms@gnu.org> writes:

> [[[ To any NSA and FBI agents reading my email: please consider    ]]]
> [[[ whether defending the US Constitution against all enemies,     ]]]
> [[[ foreign or domestic, requires you to follow Snowden's example. ]]]
>
>   > > Is a "raw string" supposed to be a different data type,
>   > > or is it supposed to be a different syntax for the string type?
>
>   > AFAIK it's usually a different syntax for the same underlying type.
>
> Thanks.  I don't see any problem with that.
>
> "Raw string" sounds like a different type of string.
> I suggest calling it "raw string syntax" for clarity.

I have seen it called a "raw string literal" in other languages.



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-01 12:06 ` Alan Mackenzie
  2021-03-01 12:13   ` Andreas Schwab
@ 2021-03-02  5:59   ` Matt Armstrong
  2021-03-02  9:56     ` Daniel Brooks
  2021-03-02 12:01     ` Dmitry Gutov
  1 sibling, 2 replies; 75+ messages in thread
From: Matt Armstrong @ 2021-03-02  5:59 UTC (permalink / raw)
  To: Alan Mackenzie, Naoya Yamashita; +Cc: emacs-devel

Alan Mackenzie <acm@muc.de> writes:

> I'm against introducing raw strings into Emacs Lisp.  There just doesn't
> seem to be a need, and there are several disadvantages.
>
> Firstly, it would make the language more complicated, and thus more
> difficult to learn - "What does this #r mean?".
>
> Raw strings themselves are complicated beasts, more so than your post
> suggests.  (I've implemented them for C++ Mode.)
>
> We'd have to decide which characters are valid inside raw strings -
> for example, is a linefeed valid?  If so, we'd have to decide how to
> fontify a newly opened raw string, which at first would extend to the
> end of the buffer.  We'd have to decide how to do fontification when a
> " gets inserted into the middle of an already valid raw string.
>
> The " character can't be inserted into a raw string - that limits
> their usefulness quite a lot.  Sooner or later, somebody will suggest
> some "enhancement" to allow this, increasing the complication even
> more.  Indeed, this has already happened, with somebody suggesting
> using variable length string delimiters, or suchlike.  Such strings
> could not be fontified simply by the syntax routines, as they are now.
>
> Then there is the handling of raw strings by the regexp engine, as
> Andreas has already pointed out.  There will be confusion in whether
> or not a doubled backslash in a regexp has to be written as \\ or \
> inside a raw string.  Currently, we have the invariable \\\\.

Yes, it'd be nice to see a clear description of what problems this
proposal is designed to solve.  The name alone is not clear because not
every langauge has the same "raw string" semantics.  Are these like C++
raw string literals or something closer to Python's single quote?

C++ has probably the most flexible "gold standard" raw string literals.
As Alan I think rightly points out, this makes the language and all
tools that process the language more complex.  This is a high cost, so
the feature should deliver some real value.

For those that don't know, C++'s raw string literals can be as imple as
this for the string "raw-content":

   R"(raw-content)"

But if the content itself contains the character sequence )" then the
programmer can specify any delimiter they want:

   R"DELIMITER(raw-content)"more-raw-content)DELIMITER"

But as you can see above, it isn't always clearer to write a raw string
literal.



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-02  5:59   ` Matt Armstrong
@ 2021-03-02  9:56     ` Daniel Brooks
  2021-03-02 10:13       ` Andreas Schwab
  2021-03-02 11:14       ` Alan Mackenzie
  2021-03-02 12:01     ` Dmitry Gutov
  1 sibling, 2 replies; 75+ messages in thread
From: Daniel Brooks @ 2021-03-02  9:56 UTC (permalink / raw)
  To: Matt Armstrong; +Cc: Alan Mackenzie, Naoya Yamashita, emacs-devel

Matt Armstrong <matt@rfc20.org> writes:

> Alan Mackenzie <acm@muc.de> writes:
>
> C++ has probably the most flexible "gold standard" raw string literals.

With respect, I think that Raku “wins” this
fight. https://docs.raku.org/language/quoting is really worth reading;
it's a work of art. You can think of the quote operator as a function
that takes 13 named boolean arguments plus a choice of opening and
closing delimiters.

> As Alan I think rightly points out, this makes the language and all
> tools that process the language more complex.  This is a high cost, so
> the feature should deliver some real value.

Certainly true. As the ordinary Lisp string syntax already allows
multi-line strings, and interpolation is handled by the format function,
the primary benefit is to turn off escaping. We could also offer a
choice of opening and closing delimiters, though the proposed code
didn't implement that.

I think the benefit will be worth it. If we offered a little more choice
of delimiters, then we could gain more benefit when the string must also
contain double quotes. This need have a large complexity cost.

> For those that don't know, C++'s raw string literals can be as imple as
> this for the string "raw-content":
>
>    R"(raw-content)"
>
> But if the content itself contains the character sequence )" then the
> programmer can specify any delimiter they want:
>
>    R"DELIMITER(raw-content)"more-raw-content)DELIMITER"
>
> But as you can see above, it isn't always clearer to write a raw string
> literal.

I would say that there are four ways to choose the delimiters.

The simplest way is just accepting just one specific delimiter, often
with no way to include that character in the string. For example,
Scala's syntax is raw"foo", but without any form of escaping that will
allow a double quote inside the string. C#'s syntax is @"foo", but you
can include a double-quote by repeating it, so @"foo""bar" is the string
”foo"bar”. Most languages are in this category, and this is how the
proposed code works.

Then there is the sed→perl→raku way, where the parser accepts a wide
variety of characters as the opening delimiter, and uses it to compute
which closing delimiter to look for. Raku allows any character not
allowed in identifiers, which is most characters not in the L or N
Unicode categories. Sed and Perl just allow punctuation characters.

There is the Rust way, where the parser looks for a double-quote
proceeded by zero or more #'s. The closing delimiter is a double-quote
followed by the same number of #'s.

And finally the C++11 way, where it looks for a double-quote followed by
zero to sixteen source characters (with a few minor exceptions) followed
by an opening parenthesis. The closing delimiter is a closing
parenthesis followed by the same zero to sixteen characters in the same
order as in the opening delimiter followed by a double-quote character.

Of the three, I think Raku's way is the most fun because it allows the
widest choice of characters (q🕶awesome!🕶, for example). I'd be fine with
the current proposal, but if others think that it is important to allow
double-quotes inside the raw string, then I think Rust's syntax is the
next logical step. #r##"foo"## would fit in well with the rest of elsip;
it won't look as out of place as the others, and it's only a small
increment in compexity.

Or maybe we want to invent something completely new. As Emacs buffers
may include images which are treated as if they were characters of
unusual size, perhaps we could use gifs. A string bracketed by a GIF of
a dude putting on sunglasses would really show those other languages up.

As it's nicer when delimiters are paired, we could allow the closing GIF
to be horizontally mirrored so that both dudes are either looking
inwards at the string or outwards at the rest of the world.

db48x

PS: if anyone wants to go the Perl/Raku way, I happen to have built a
list of the paired punctuation characters recently:

var _PiPf = map[rune]rune{
	'«': '»', '‘': '’', '“': '”', '‹': '›', '⸂': '⸃', '⸄': '⸅', '⸉': '⸊',
	'⸌': '⸍', '⸜': '⸝', '⸠': '⸡',
}

var _PsPf = map[rune]rune{
	'‚': '’', '„': '”',
}

var _PsPe = map[rune]rune{
	'(': ')', '[': ']', '{': '}', '༺': '༻', '༼': '༽', '᚛': '᚜', '⁅': '⁆',
	'⁽': '⁾', '₍': '₎', '❨': '❩', '❪': '❫', '❬': '❭', '❮': '❯', '❰': '❱',
	'❲': '❳', '❴': '❵', '⟅': '⟆', '⟦': '⟧', '⟨': '⟩', '⟪': '⟫', '⦃': '⦄',
	'⦅': '⦆', '⦇': '⦈', '⦉': '⦊', '⦋': '⦌', '⦑': '⦒', '⦓': '⦔', '⦕': '⦖',
	'⦗': '⦘', '⧘': '⧙', '⧚': '⧛', '⧼': '⧽', '〈': '〉', '《': '》',
	'「': '」', '『': '』', '【': '】', '〔': '〕', '〖': '〗', '〘': '〙',
	'〚': '〛', '〝': '〞', '︗': '︘', '︵': '︶', '︷': '︸', '︹': '︺',
	'︻': '︼', '︽': '︾', '︿': '﹀', '﹁': '﹂', '﹃': '﹄', '﹇': '﹈',
	'﹙': '﹚', '﹛': '﹜', '﹝': '﹞', '(': ')', '[': ']', '{': '}',
	'⦅': '⦆', '「': '」', '⸨': '⸩',
}

var _SmSm = map[rune]rune{
	'<': '>',
}

This is obviously written in Go. My source code is at
https://github.com/db48x/goparsify/blob/master/literals.go#L298-L322.

Feel free to use these tables however you like; I consider them to be a
mere listing of facts and as such they're not copyrightable.

The basic algorithm that Perl uses is that the delimiter may be any
punctuation character, and if the opening delimiter is a key in any of
these tables then the closing delimiter is expected to be the
corresponding value; otherwise the closing delimiter is expected to be
identical to the opening delimiter.

Raku is similar, execept that it allows any unicode character that isn't
designated as belonging to identifiers rather than just punctuation.

For speed you'll obviously prefer to do a single lookup into one hash
table, but for organizational purposes it's nicer to have them grouped
by unicode category. This will help you update them when new characters
are added in the future.



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-02  9:56     ` Daniel Brooks
@ 2021-03-02 10:13       ` Andreas Schwab
  2021-03-02 10:55         ` Daniel Brooks
  2021-03-02 11:14       ` Alan Mackenzie
  1 sibling, 1 reply; 75+ messages in thread
From: Andreas Schwab @ 2021-03-02 10:13 UTC (permalink / raw)
  To: Daniel Brooks
  Cc: Matt Armstrong, emacs-devel, Naoya Yamashita, Alan Mackenzie

There is also the tcl way, where {} delimits kind of raw strings, but
allows properly nested {} pairs inside.

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-02 10:13       ` Andreas Schwab
@ 2021-03-02 10:55         ` Daniel Brooks
  2021-03-02 11:18           ` Andreas Schwab
  0 siblings, 1 reply; 75+ messages in thread
From: Daniel Brooks @ 2021-03-02 10:55 UTC (permalink / raw)
  To: Andreas Schwab
  Cc: Matt Armstrong, Alan Mackenzie, Naoya Yamashita, emacs-devel

Andreas Schwab <schwab@linux-m68k.org> writes:

> There is also the tcl way, where {} delimits kind of raw strings, but
> allows properly nested {} pairs inside.

That's an unusual choice; I'll have to keep it in mind for the next time
this topic comes up. Apparently you can escape the curly braces if you
don't want them to be paired up inside this type of string.

db48x



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-02  9:56     ` Daniel Brooks
  2021-03-02 10:13       ` Andreas Schwab
@ 2021-03-02 11:14       ` Alan Mackenzie
  2021-03-02 11:52         ` Daniel Brooks
  1 sibling, 1 reply; 75+ messages in thread
From: Alan Mackenzie @ 2021-03-02 11:14 UTC (permalink / raw)
  To: Daniel Brooks; +Cc: Matt Armstrong, Naoya Yamashita, emacs-devel

Hello, Daniel.

On Tue, Mar 02, 2021 at 01:56:43 -0800, Daniel Brooks wrote:
> Matt Armstrong <matt@rfc20.org> writes:

> > Alan Mackenzie <acm@muc.de> writes:

> > C++ has probably the most flexible "gold standard" raw string literals.

> With respect, I think that Raku “wins” this
> fight. https://docs.raku.org/language/quoting is really worth reading;
> it's a work of art. You can think of the quote operator as a function
> that takes 13 named boolean arguments plus a choice of opening and
> closing delimiters.

I haven't looked at raku, but I imagine that this "quoting" is something
radically different from what we do in Emacs Lisp.

> > As Alan I think rightly points out, this makes the language and all
> > tools that process the language more complex.  This is a high cost, so
> > the feature should deliver some real value.

> Certainly true. As the ordinary Lisp string syntax already allows
> multi-line strings, and interpolation is handled by the format function,
> the primary benefit is to turn off escaping. We could also offer a
> choice of opening and closing delimiters, though the proposed code
> didn't implement that.

> I think the benefit will be worth it. If we offered a little more choice
> of delimiters, then we could gain more benefit when the string must also
> contain double quotes. This need have a large complexity cost.

I think you meant to have a "not" in that last sentence, but also think
it is correct as it stands.

One of the things I didn't say explicitly in my last post was that with
any form of raw string, lisp would need to put a syntax-table text
property on each \ in such a string.  This needs to be done in an
after-change function, possibly assisted by a before-change function.
Any device to allow double quotes inside a raw string involves putting
syntax-table properties on these, too.

Having a choice of string delimiters makes things more complicated, too.

And all the while, some functionality needs to guard against such a "
becoming, or ceasing to be a raw string delimiter.

I can think of two ways to do these things: One is to clear the entire
raw string of all its syntax-table text properties at each change within
(or near) it, then reapply them all.  This could be slow in a big raw
string at normal typing speed.  The other way is to analyse carefully the
text in the vicinity of a change and alter the text properties minimally,
as needed.  C++ Mode takes this latter approach; it is complicated and
difficult to get right.

Currently, Emacs Lisp Mode doesn't need such change hooks.  Introducing
them would be a significant increase in complexity, and I think this
isn't worth it just to avoid having to quote backslashes in strings.

[ .... ]

-- 
Alan Mackenzie (Nuremberg, Germany).



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-02 10:55         ` Daniel Brooks
@ 2021-03-02 11:18           ` Andreas Schwab
  2021-03-02 11:26             ` Daniel Brooks
  0 siblings, 1 reply; 75+ messages in thread
From: Andreas Schwab @ 2021-03-02 11:18 UTC (permalink / raw)
  To: Daniel Brooks
  Cc: Matt Armstrong, Alan Mackenzie, Naoya Yamashita, emacs-devel

On Mär 02 2021, Daniel Brooks wrote:

> Andreas Schwab <schwab@linux-m68k.org> writes:
>
>> There is also the tcl way, where {} delimits kind of raw strings, but
>> allows properly nested {} pairs inside.
>
> That's an unusual choice; I'll have to keep it in mind for the next time
> this topic comes up. Apparently you can escape the curly braces if you
> don't want them to be paired up inside this type of string.

The backslash is not removed, though, so it is impossible to include a
lone brace in a brace enclosed string.

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-02 11:18           ` Andreas Schwab
@ 2021-03-02 11:26             ` Daniel Brooks
  0 siblings, 0 replies; 75+ messages in thread
From: Daniel Brooks @ 2021-03-02 11:26 UTC (permalink / raw)
  To: Andreas Schwab
  Cc: Matt Armstrong, emacs-devel, Naoya Yamashita, Alan Mackenzie

Andreas Schwab <schwab@linux-m68k.org> writes:

> On Mär 02 2021, Daniel Brooks wrote:
>
>> Andreas Schwab <schwab@linux-m68k.org> writes:
>>
>>> There is also the tcl way, where {} delimits kind of raw strings, but
>>> allows properly nested {} pairs inside.
>>
>> That's an unusual choice; I'll have to keep it in mind for the next time
>> this topic comes up. Apparently you can escape the curly braces if you
>> don't want them to be paired up inside this type of string.
>
> The backslash is not removed, though, so it is impossible to include a
> lone brace in a brace enclosed string.

Ok, that's even weirder.

db48x



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-02-26 18:18 [PATCH] Interpret #r"..." as a raw string Naoya Yamashita
                   ` (4 preceding siblings ...)
  2021-03-01 12:06 ` Alan Mackenzie
@ 2021-03-02 11:41 ` Aurélien Aptel
  2021-03-02 13:49   ` Stefan Monnier
  5 siblings, 1 reply; 75+ messages in thread
From: Aurélien Aptel @ 2021-03-02 11:41 UTC (permalink / raw)
  To: Naoya Yamashita; +Cc: Emacs development discussions

On Fri, Feb 26, 2021 at 7:22 PM Naoya Yamashita <conao3@gmail.com> wrote:
>
> Hi, all.
>
> I write a patch to allow Emacs reader interpret raw string.

Hi Naoya,

I have done a patch like that few years ago and it wasn't accepted
then see past discussion:
https://mail.gnu.org/archive/html/emacs-devel/2012-08/msg00071.html

My notes and patch on implementing it:
http://diobla.info/blog-archive/raw-strings.html

Cheers,



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-02 11:14       ` Alan Mackenzie
@ 2021-03-02 11:52         ` Daniel Brooks
  0 siblings, 0 replies; 75+ messages in thread
From: Daniel Brooks @ 2021-03-02 11:52 UTC (permalink / raw)
  To: Alan Mackenzie; +Cc: Matt Armstrong, Naoya Yamashita, emacs-devel

Alan Mackenzie <acm@muc.de> writes:

> Hello, Daniel.
>
> On Tue, Mar 02, 2021 at 01:56:43 -0800, Daniel Brooks wrote:
>> Matt Armstrong <matt@rfc20.org> writes:
>
>> > Alan Mackenzie <acm@muc.de> writes:
>
>> > C++ has probably the most flexible "gold standard" raw string literals.
>
>> With respect, I think that Raku “wins” this
>> fight. https://docs.raku.org/language/quoting is really worth reading;
>> it's a work of art. You can think of the quote operator as a function
>> that takes 13 named boolean arguments plus a choice of opening and
>> closing delimiters.
>
> I haven't looked at raku, but I imagine that this "quoting" is something
> radically different from what we do in Emacs Lisp.

One of the things you can turn on or off is interpolation of values into
quoted strings, which is a lot like what elisp uses the backquote for.

>> I think the benefit will be worth it. If we offered a little more choice
>> of delimiters, then we could gain more benefit when the string must also
>> contain double quotes. This need have a large complexity cost.
>
> I think you meant to have a "not" in that last sentence, but also think
> it is correct as it stands.

Yes, I did mean that it shouldn't add much complexity.

> One of the things I didn't say explicitly in my last post was that with
> any form of raw string, lisp would need to put a syntax-table text
> property on each \ in such a string.  This needs to be done in an
> after-change function, possibly assisted by a before-change function.
> Any device to allow double quotes inside a raw string involves putting
> syntax-table properties on these, too.
>
> Having a choice of string delimiters makes things more complicated, too.
>
> And all the while, some functionality needs to guard against such a "
> becoming, or ceasing to be a raw string delimiter.
>
> I can think of two ways to do these things: One is to clear the entire
> raw string of all its syntax-table text properties at each change within
> (or near) it, then reapply them all.  This could be slow in a big raw
> string at normal typing speed.  The other way is to analyse carefully the
> text in the vicinity of a change and alter the text properties minimally,
> as needed.  C++ Mode takes this latter approach; it is complicated and
> difficult to get right.
>
> Currently, Emacs Lisp Mode doesn't need such change hooks.  Introducing
> them would be a significant increase in complexity, and I think this
> isn't worth it just to avoid having to quote backslashes in strings.

Hmm. I don't know much about the internals of how modes work, so I'll
just take all of that as a given.

The question then is do we as humans adapt ourselves to the limitations
of our editor, or do we adapt our editor to us? Extending lisp-mode to
handle raw string literals in elisp code has a one-time cost to a few of
us, but counting those backslashes our regexes has an ongoing cost to
all of us. We're not going to ditch regexes any time soon.

db48x



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-02  5:59   ` Matt Armstrong
  2021-03-02  9:56     ` Daniel Brooks
@ 2021-03-02 12:01     ` Dmitry Gutov
  2021-03-02 14:14       ` Alan Mackenzie
  1 sibling, 1 reply; 75+ messages in thread
From: Dmitry Gutov @ 2021-03-02 12:01 UTC (permalink / raw)
  To: Matt Armstrong, Alan Mackenzie, Naoya Yamashita; +Cc: emacs-devel

On 02.03.2021 07:59, Matt Armstrong wrote:
> C++ has probably the most flexible "gold standard" raw string literals.
> As Alan I think rightly points out, this makes the language and all
> tools that process the language more complex.  This is a high cost, so
> the feature should deliver some real value.
> 
> For those that don't know, C++'s raw string literals can be as imple as
> this for the string "raw-content":
> 
>     R"(raw-content)"
> 
> But if the content itself contains the character sequence )" then the
> programmer can specify any delimiter they want:
> 
>     R"DELIMITER(raw-content)"more-raw-content)DELIMITER"

Sounds very similar to Ruby's heredocs or "Percent Strings" (and both of 
those have their own extra complexity because of allowed nesting).

Both are supported by ruby-mode with syntax-propertize-function without 
too much trouble.



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-02 11:41 ` Aurélien Aptel
@ 2021-03-02 13:49   ` Stefan Monnier
  2021-03-02 14:46     ` Aurélien Aptel
  2021-03-02 20:36     ` Daniel Brooks
  0 siblings, 2 replies; 75+ messages in thread
From: Stefan Monnier @ 2021-03-02 13:49 UTC (permalink / raw)
  To: Aurélien Aptel; +Cc: Naoya Yamashita, Emacs development discussions

> I have done a patch like that few years ago and it wasn't accepted
> then see past discussion:
> https://mail.gnu.org/archive/html/emacs-devel/2012-08/msg00071.html
>
> My notes and patch on implementing it:
> http://diobla.info/blog-archive/raw-strings.html

Thanks, that's quite helpful.
At the end you state:

    I personally think raw strings have their use outside of regexes and
    would be a nice addition to the Emacs Lisp language.

I'm willing to believe it, but so far the only concrete examples I've
seen where raw string literals could be helpful are regexps.

I'm clearly leaning against the addition of raw string literal (just
like I'm leaning against the addition of multiline comments, BTW)
because I feel the benefits are quite limited.


        Stefan




^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-02 12:01     ` Dmitry Gutov
@ 2021-03-02 14:14       ` Alan Mackenzie
  2021-03-02 14:32         ` Dmitry Gutov
  0 siblings, 1 reply; 75+ messages in thread
From: Alan Mackenzie @ 2021-03-02 14:14 UTC (permalink / raw)
  To: Dmitry Gutov; +Cc: Matt Armstrong, Naoya Yamashita, emacs-devel

Hello, Dmitry

On Tue, Mar 02, 2021 at 14:01:10 +0200, Dmitry Gutov wrote:
> On 02.03.2021 07:59, Matt Armstrong wrote:
> > C++ has probably the most flexible "gold standard" raw string literals.
> > As Alan I think rightly points out, this makes the language and all
> > tools that process the language more complex.  This is a high cost, so
> > the feature should deliver some real value.

> > For those that don't know, C++'s raw string literals can be as imple as
> > this for the string "raw-content":

> >     R"(raw-content)"

> > But if the content itself contains the character sequence )" then the
> > programmer can specify any delimiter they want:

> >     R"DELIMITER(raw-content)"more-raw-content)DELIMITER"

> Sounds very similar to Ruby's heredocs or "Percent Strings" (and both of 
> those have their own extra complexity because of allowed nesting).

> Both are supported by ruby-mode with syntax-propertize-function without 
> too much trouble.

I've just tried this out, looking up some ruby syntax on Wikipedia.
Adapting its example, start out with this in ruby-mode:

#########################################################################
a = <<-BLOCK

This is a double-quoted string
BLCK
BLOCK
b
#########################################################################

Now everything down to and including the second BLOCK has string face.
This seems correct.  Delete the O from the first BLOCK, so that the
string is now terminated by BLCK.  The second BLOCK still has string
face, although it is no longer in the string.  Is this a bug?

-- 
Alan Mackenzie (Nuremberg, Germany).



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-02 14:14       ` Alan Mackenzie
@ 2021-03-02 14:32         ` Dmitry Gutov
  2021-03-02 15:06           ` Alan Mackenzie
  0 siblings, 1 reply; 75+ messages in thread
From: Dmitry Gutov @ 2021-03-02 14:32 UTC (permalink / raw)
  To: Alan Mackenzie; +Cc: Matt Armstrong, Naoya Yamashita, emacs-devel

On 02.03.2021 16:14, Alan Mackenzie wrote:
>> Both are supported by ruby-mode with syntax-propertize-function without
>> too much trouble.
> I've just tried this out, looking up some ruby syntax on Wikipedia.
> Adapting its example, start out with this in ruby-mode:
> 
> #########################################################################
> a = <<-BLOCK
> 
> This is a double-quoted string
> BLCK
> BLOCK
> b
> #########################################################################
> 
> Now everything down to and including the second BLOCK has string face.
> This seems correct.  Delete the O from the first BLOCK, so that the
> string is now terminated by BLCK.  The second BLOCK still has string
> face, although it is no longer in the string.  Is this a bug?

Would be a bug, but I can't exactly reproduce it.

I delete the O, then wait 2 seconds (because of 
jit-lock-antiblink-grace, I guess?), and the second block gets fontified 
with font-lock-type-face.

Set jit-lock-antiblink-grace to nil to make this happen faster. Still 
dependent on jit-lock-context-time, though.



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-02 13:49   ` Stefan Monnier
@ 2021-03-02 14:46     ` Aurélien Aptel
  2021-03-02 15:11       ` Stefan Monnier
  2021-03-02 20:36     ` Daniel Brooks
  1 sibling, 1 reply; 75+ messages in thread
From: Aurélien Aptel @ 2021-03-02 14:46 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: Naoya Yamashita, Emacs development discussions

Hi Stefan,

On Tue, Mar 2, 2021 at 2:49 PM Stefan Monnier <monnier@iro.umontreal.ca> wrote:
> Thanks, that's quite helpful.
> At the end you state:
>
>     I personally think raw strings have their use outside of regexes and
>     would be a nice addition to the Emacs Lisp language.
>
> I'm willing to believe it, but so far the only concrete examples I've
> seen where raw string literals could be helpful are regexps.

It's the most common use-case. The other is embedding conf files or
source files in other languages (which is the same really, except
there's no rx equivalent for those). It's pretty nice to be able to
just dump some text verbatim (especially when copy pasting it from
somewhere) with some special quotes at the start and at the end and
not have to worry about it.

> I'm clearly leaning against the addition of raw string literal (just
> like I'm leaning against the addition of multiline comments, BTW)
> because I feel the benefits are quite limited.

I understand. I see some benefits with no cons. But I'm not
maintaining Emacs so ultimately it's up to you, Eli and the rest of
the crew (thx for all those years of work btw!).

Cheers,



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-02 14:32         ` Dmitry Gutov
@ 2021-03-02 15:06           ` Alan Mackenzie
  0 siblings, 0 replies; 75+ messages in thread
From: Alan Mackenzie @ 2021-03-02 15:06 UTC (permalink / raw)
  To: Dmitry Gutov; +Cc: Matt Armstrong, Naoya Yamashita, emacs-devel

Hello, Dmitry.

On Tue, Mar 02, 2021 at 16:32:45 +0200, Dmitry Gutov wrote:
> On 02.03.2021 16:14, Alan Mackenzie wrote:
> >> Both are supported by ruby-mode with syntax-propertize-function without
> >> too much trouble.
> > I've just tried this out, looking up some ruby syntax on Wikipedia.
> > Adapting its example, start out with this in ruby-mode:

> > #########################################################################
> > a = <<-BLOCK

> > This is a double-quoted string
> > BLCK
> > BLOCK
> > b
> > #########################################################################

> > Now everything down to and including the second BLOCK has string face.
> > This seems correct.  Delete the O from the first BLOCK, so that the
> > string is now terminated by BLCK.  The second BLOCK still has string
> > face, although it is no longer in the string.  Is this a bug?

> Would be a bug, but I can't exactly reproduce it.

Sorry, my mistake.  On my terminal, string face and type face are both
green.  Hence the confusion.

> I delete the O, then wait 2 seconds (because of 
> jit-lock-antiblink-grace, I guess?), and the second block gets fontified 
> with font-lock-type-face.

> Set jit-lock-antiblink-grace to nil to make this happen faster. Still 
> dependent on jit-lock-context-time, though.

-- 
Alan Mackenzie (Nuremberg, Germany).



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-02 14:46     ` Aurélien Aptel
@ 2021-03-02 15:11       ` Stefan Monnier
  2021-03-02 16:07         ` Aurélien Aptel
  0 siblings, 1 reply; 75+ messages in thread
From: Stefan Monnier @ 2021-03-02 15:11 UTC (permalink / raw)
  To: Aurélien Aptel; +Cc: Naoya Yamashita, Emacs development discussions

>> I'm willing to believe it, but so far the only concrete examples I've
>> seen where raw string literals could be helpful are regexps.
>
> It's the most common use-case. The other is embedding conf files or
> source files in other languages (which is the same really, except
> there's no rx equivalent for those). It's pretty nice to be able to
> just dump some text verbatim (especially when copy pasting it from
> somewhere) with some special quotes at the start and at the end and
> not have to worry about it.

I'm sorry, but I can't quite see what you're talking about.  Can you
point to existing code where there is such embedded "conf files or
source files" so we can better judge the potential benefit?
[ And in my experience "not have to worry about it" is an illusion:
  you can never truly escape the need for escaping.  ]

> I understand. I see some benefits with no cons.

If you don't see the cons, then indeed the tradeoff is clear ;-)


        Stefan




^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-02 15:11       ` Stefan Monnier
@ 2021-03-02 16:07         ` Aurélien Aptel
  2021-03-03  7:31           ` Alfred M. Szmidt
  2021-03-03 16:02           ` Stefan Monnier
  0 siblings, 2 replies; 75+ messages in thread
From: Aurélien Aptel @ 2021-03-02 16:07 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: Naoya Yamashita, Emacs development discussions

On Tue, Mar 2, 2021 at 4:11 PM Stefan Monnier <monnier@iro.umontreal.ca> wrote:
> I'm sorry, but I can't quite see what you're talking about.  Can you
> point to existing code where there is such embedded "conf files or
> source files" so we can better judge the potential benefit?

Just from my init.el:

     (:name "newbug" :query "to:samba-maintainers  subject:\"New:\" " :key "n")
...
  (shell-command-to-string "perl -i -pE 's/(li><a
href=\"#\\S+?\">)(\\d|\\.)+ /$1/' ~/TODO.html")
...
         (shell-command-to-string "notmuch search --limit=1
--sort=newest-first --output=messages 'tag:sent subject:\"work
report\"'")))
...

      (shell-command-to-string
       (format (concat "cd %s && git show -s '--pretty=format:%%h
(\"%%s\")' %s;"
               "echo -e '\n'$?")

...

(defvar moz-useful-functions "
function add_or_reload_url (url) {
    var i = find_tab_with_url(url)
    var t = \"\"
    if (i < 0) {
    t = gBrowser.addTab(url)
    } else {
    gBrowser.browsers[i].reload()
        t = gBrowser.tabs[i]
    }
    select_tab(t)
}
")
...
    (moz-send-string (concat "add_or_reload_url(\"file://"
(expand-file-name fn) "\");\n"))))
...
           (insert "<style type=\"text/css\">\n<!--/*--><![CDATA[/*><!--*/\n")
...

> If you don't see the cons, then indeed the tradeoff is clear ;-)

I've given a list of my use cases and I genuinely don't see cons,
assuming it gets properly implemented. I'm sorry if you listed them
earlier I must have skipped over.

Cheers,



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-02 13:49   ` Stefan Monnier
  2021-03-02 14:46     ` Aurélien Aptel
@ 2021-03-02 20:36     ` Daniel Brooks
  2021-03-03  0:27       ` Stefan Monnier
                         ` (2 more replies)
  1 sibling, 3 replies; 75+ messages in thread
From: Daniel Brooks @ 2021-03-02 20:36 UTC (permalink / raw)
  To: Stefan Monnier
  Cc: Aurélien Aptel, Naoya Yamashita,
	Emacs development discussions

Stefan Monnier <monnier@iro.umontreal.ca> writes:

>> I have done a patch like that few years ago and it wasn't accepted
>> then see past discussion:
>> https://mail.gnu.org/archive/html/emacs-devel/2012-08/msg00071.html
>>
>> My notes and patch on implementing it:
>> http://diobla.info/blog-archive/raw-strings.html
>
> Thanks, that's quite helpful.
> At the end you state:
>
>     I personally think raw strings have their use outside of regexes and
>     would be a nice addition to the Emacs Lisp language.
>
> I'm willing to believe it, but so far the only concrete examples I've
> seen where raw string literals could be helpful are regexps.
>
> I'm clearly leaning against the addition of raw string literal (just
> like I'm leaning against the addition of multiline comments, BTW)
> because I feel the benefits are quite limited.

Luckily we have a readily–available corpus of elisp code that we can
measure.

The two–character sequence «\"» occurs on twelve and a half thousand
lines in just the Emacs lisp source code, often more than once per
line. 「\\」 is used on just over 27k lines, and “\\\\” 1604 times.

There are 101 lines using ⟦\\\\\\\\⟧.

     rg '\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
    textmodes/reftex-vars.el
    564:      \"\\\\\\\\begin{eqnarray}\\\\|\\\\\\\\\\\\\\\\\" works for eqnarrays.
    
    net/tramp-sh.el
    962:    quoted=`echo \"$file\" | sed -e \"s/\\\"/\\\\\\\\\\\\\\\\\\\"/\"`

Look at that! 19 backslashes followed by a double–quote. That’s just
stupid. I wonder if it’s a record? Should we use git blame to find the
names of every person who touched that line and ask them what they
think? I can guess what they would say.

I think the benefits would add up pretty quickly.

db48x



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-02 20:36     ` Daniel Brooks
@ 2021-03-03  0:27       ` Stefan Monnier
  2021-03-03  0:42         ` Daniel Brooks
  2021-03-03  8:16       ` Andreas Schwab
  2021-03-03 10:12       ` Michael Albinus
  2 siblings, 1 reply; 75+ messages in thread
From: Stefan Monnier @ 2021-03-03  0:27 UTC (permalink / raw)
  To: Daniel Brooks
  Cc: Aurélien Aptel, Naoya Yamashita,
	Emacs development discussions

> There are 101 lines using ⟦\\\\\\\\⟧.
>
>      rg '\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'

In the best case with raw string literals this would be brought down to:

      rg '\\\\\\\\\\\\\\\\'

>     textmodes/reftex-vars.el
>     564:      \"\\\\\\\\begin{eqnarray}\\\\|\\\\\\\\\\\\\\\\\" works for eqnarrays.

And this would be:

     564:      "\\\\begin{eqnarray}\\|\\\\\\\\" works for eqnarrays.

While it's clearly a bit better, it's clearly no silver bullet.

> Look at that! 19 backslashes followed by a double–quote. That’s just
> stupid.

The new version has fewer backslashes, but I'm not sure it'd be few
enough to get below the threshold for "stupid".

> I think the benefits would add up pretty quickly.

I'm not convinced.


        Stefan




^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-03  0:27       ` Stefan Monnier
@ 2021-03-03  0:42         ` Daniel Brooks
  0 siblings, 0 replies; 75+ messages in thread
From: Daniel Brooks @ 2021-03-03  0:42 UTC (permalink / raw)
  To: Stefan Monnier
  Cc: Aurélien Aptel, Naoya Yamashita,
	Emacs development discussions

Stefan Monnier <monnier@iro.umontreal.ca> writes:

>>     textmodes/reftex-vars.el
>>     564:      \"\\\\\\\\begin{eqnarray}\\\\|\\\\\\\\\\\\\\\\\" works for eqnarrays.
>
> And this would be:
>
>      564:      "\\\\begin{eqnarray}\\|\\\\\\\\" works for eqnarrays.
>
> While it's clearly a bit better, it's clearly no silver bullet.
>
>> Look at that! 19 backslashes followed by a double–quote. That’s just
>> stupid.
>
> The new version has fewer backslashes, but I'm not sure it'd be few
> enough to get below the threshold for "stupid".
>
>> I think the benefits would add up pretty quickly.
>
> I'm not convinced.

So what you're saying is that because there are multiple layers of
escaping involved, we can't improve any one of those layers because it
doesn't eliminate the problem?

I say that halving the number of backslashes in cases like this is a
good thing, and very much worth the minor incremental cost of a few
dozen lines of extra code.

db48x



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-02  5:45           ` Matt Armstrong
@ 2021-03-03  5:53             ` Richard Stallman
  2021-03-03  6:14               ` Daniel Brooks
  2021-03-03  7:00               ` Eli Zaretskii
  0 siblings, 2 replies; 75+ messages in thread
From: Richard Stallman @ 2021-03-03  5:53 UTC (permalink / raw)
  To: Matt Armstrong; +Cc: db48x, eliz, conao3, monnier, emacs-devel

[[[ To any NSA and FBI agents reading my email: please consider    ]]]
[[[ whether defending the US Constitution against all enemies,     ]]]
[[[ foreign or domestic, requires you to follow Snowden's example. ]]]

  > I have seen it called a "raw string literal" in other languages.

The concept of "literal" does not fit Lisp very well.  It describe the
situation with other languages, where expressions are not objects,
only syntax.  In those languages, you don't put a string into your
program, only a "literal" which represents a string value.

In Lisp, what you put in the program _is_ a string.  What you write
in the textual code is not a "literal", it is the textual representation
of the string to put in the program.

I just saw that the term "literal" appears occasionally in the Emacs
Lisp Ref Manual.  Would someone like to rewrite those parts so that they
fit the concepts of Lisp batter?


-- 
Dr Richard Stallman
Chief GNUisance of the GNU Project (https://gnu.org)
Founder, Free Software Foundation (https://fsf.org)
Internet Hall-of-Famer (https://internethalloffame.org)





^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-03  5:53             ` Richard Stallman
@ 2021-03-03  6:14               ` Daniel Brooks
  2021-03-03  7:00               ` Eli Zaretskii
  1 sibling, 0 replies; 75+ messages in thread
From: Daniel Brooks @ 2021-03-03  6:14 UTC (permalink / raw)
  To: Richard Stallman; +Cc: Matt Armstrong, eliz, conao3, monnier, emacs-devel

Richard Stallman <rms@gnu.org> writes:

>   > I have seen it called a "raw string literal" in other languages.
>
> The concept of "literal" does not fit Lisp very well.  It describe the
> situation with other languages, where expressions are not objects,
> only syntax.  In those languages, you don't put a string into your
> program, only a "literal" which represents a string value.
>
> In Lisp, what you put in the program _is_ a string.  What you write
> in the textual code is not a "literal", it is the textual representation
> of the string to put in the program.
>
> I just saw that the term "literal" appears occasionally in the Emacs
> Lisp Ref Manual.  Would someone like to rewrite those parts so that they
> fit the concepts of Lisp batter?

Are you sure? I could be wrong, but I think that the idea of literals
for types other than just numbers and strings comes from Lisp.

For example, if Javascript didn't have array literals then code like
this would be common:

    var foo = new Array();
    foo[0] = 42;
    foo[1] = 24;

Compare that with "var foo = [42, 24]" and "(setq foo '(42 24))".

I've not written much Pascal, but in section 19 of TeX: The Program we
see an array declared:

    xchr: array [ASCII_code] of text_char;

Followed by initialization in section 20:

    xchr[’40] ← ' ';
    xchr[’41] ← '!';
    xchr[’42] ← '"';

and so on.

But the terminology is something that probably doesn't come from Lisp,
since Lisp doesn't make any distinction between a list used for data and
a list used for code.

db48x



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-03  5:53             ` Richard Stallman
  2021-03-03  6:14               ` Daniel Brooks
@ 2021-03-03  7:00               ` Eli Zaretskii
  2021-03-04  2:47                 ` Matt Armstrong
  1 sibling, 1 reply; 75+ messages in thread
From: Eli Zaretskii @ 2021-03-03  7:00 UTC (permalink / raw)
  To: rms; +Cc: db48x, matt, conao3, monnier, emacs-devel

> From: Richard Stallman <rms@gnu.org>
> Cc: monnier@iro.umontreal.ca, db48x@db48x.net, eliz@gnu.org,
> 	conao3@gmail.com, emacs-devel@gnu.org
> Date: Wed, 03 Mar 2021 00:53:55 -0500
> 
> In Lisp, what you put in the program _is_ a string.  What you write
> in the textual code is not a "literal", it is the textual representation
> of the string to put in the program.

What is the difference between "literal string" and "textual
representation of a string"?  I don't think I understand the
difference from what you wrote.

> I just saw that the term "literal" appears occasionally in the Emacs
> Lisp Ref Manual.  Would someone like to rewrite those parts so that they
> fit the concepts of Lisp batter?

To rewrite those parts, one needs to understand what's wrong with them
and how to express that better.  For example, we also say "literal
characters" in a few places -- is that wrong as well, and if so, how?



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-02 16:07         ` Aurélien Aptel
@ 2021-03-03  7:31           ` Alfred M. Szmidt
  2021-03-03 16:02           ` Stefan Monnier
  1 sibling, 0 replies; 75+ messages in thread
From: Alfred M. Szmidt @ 2021-03-03  7:31 UTC (permalink / raw)
  Cc: conao3, monnier, emacs-devel

   ...
     (shell-command-to-string "perl -i -pE 's/(li><a
   href=\"#\\S+?\">)(\\d|\\.)+ /$1/' ~/TODO.html")
   ...
	    (shell-command-to-string "notmuch search --limit=1
   --sort=newest-first --output=messages 'tag:sent subject:\"work
   report\"'")))
   ...

	 (shell-command-to-string
	  (format (concat "cd %s && git show -s '--pretty=format:%%h
   (\"%%s\")' %s;"
		  "echo -e '\n'$?")

The above seem like they could be written in Emacs Lisp instead of
calling to shell-command-to-string.

   (defvar moz-useful-functions "
   function add_or_reload_url (url) {
       var i = find_tab_with_url(url)
       var t = \"\"
       if (i < 0) {
       t = gBrowser.addTab(url)
       } else {
       gBrowser.browsers[i].reload()
	   t = gBrowser.tabs[i]
       }
       select_tab(t)
   }
   ")

For large swats of code, it is generally better to put the snippet
into a seperate file and then read it in.



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-02 20:36     ` Daniel Brooks
  2021-03-03  0:27       ` Stefan Monnier
@ 2021-03-03  8:16       ` Andreas Schwab
  2021-03-03  9:25         ` Daniel Brooks
  2021-03-03 10:12       ` Michael Albinus
  2 siblings, 1 reply; 75+ messages in thread
From: Andreas Schwab @ 2021-03-03  8:16 UTC (permalink / raw)
  To: Daniel Brooks
  Cc: Aurélien Aptel, Naoya Yamashita, Stefan Monnier,
	Emacs development discussions

On Mär 02 2021, Daniel Brooks wrote:

>     net/tramp-sh.el
>     962:    quoted=`echo \"$file\" | sed -e \"s/\\\"/\\\\\\\\\\\\\\\\\\\"/\"`
>
> Look at that! 19 backslashes followed by a double–quote.

That can easily be halved by using '' instead of "".

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-03  8:16       ` Andreas Schwab
@ 2021-03-03  9:25         ` Daniel Brooks
  2021-03-03  9:29           ` Andreas Schwab
  0 siblings, 1 reply; 75+ messages in thread
From: Daniel Brooks @ 2021-03-03  9:25 UTC (permalink / raw)
  To: Andreas Schwab
  Cc: Aurélien Aptel, Naoya Yamashita, Stefan Monnier,
	Emacs development discussions

Andreas Schwab <schwab@linux-m68k.org> writes:

> On Mär 02 2021, Daniel Brooks wrote:
>
>>     net/tramp-sh.el
>>     962:    quoted=`echo \"$file\" | sed -e \"s/\\\"/\\\\\\\\\\\\\\\\\\\"/\"`
>>
>> Look at that! 19 backslashes followed by a double–quote.
>
> That can easily be halved by using '' instead of "".

That is absolutely true. We could then halve it again by using a raw
string literal. (Or we could eliminate it entirely with a suitable
abstraction, but that's for a different conversation.)

As escaping is an exponential function, anything we can do to reduce
that exponent is equally valuable.

db48x



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-03  9:25         ` Daniel Brooks
@ 2021-03-03  9:29           ` Andreas Schwab
  2021-03-03 10:02             ` Daniel Brooks
  0 siblings, 1 reply; 75+ messages in thread
From: Andreas Schwab @ 2021-03-03  9:29 UTC (permalink / raw)
  To: Daniel Brooks
  Cc: Aurélien Aptel, Naoya Yamashita, Stefan Monnier,
	Emacs development discussions

On Mär 03 2021, Daniel Brooks wrote:

> That is absolutely true. We could then halve it again by using a raw
> string literal.

The question remains whether the complexity increase is warranted just
for a few occurrences of such cases.

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-03  9:29           ` Andreas Schwab
@ 2021-03-03 10:02             ` Daniel Brooks
  2021-03-03 10:11               ` Daniel Brooks
  0 siblings, 1 reply; 75+ messages in thread
From: Daniel Brooks @ 2021-03-03 10:02 UTC (permalink / raw)
  To: Andreas Schwab
  Cc: Aurélien Aptel, Naoya Yamashita, Stefan Monnier,
	Emacs development discussions

Andreas Schwab <schwab@linux-m68k.org> writes:

> On Mär 03 2021, Daniel Brooks wrote:
>
>> That is absolutely true. We could then halve it again by using a raw
>> string literal.
>
> The question remains whether the complexity increase is warranted just
> for a few occurrences of such cases.

I included that example because it is egregious, and I was astounded
when I saw it. The real benefit comes from the far more frequent cases
where people find themselves using «\\» or «\\\\». And if or when we
implement a syntax that allows double-quotes in a raw string literal, it
will save people even more time, because there are yet more tens of
thousands of cases of «\"» in the code.

db48x



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-03 10:02             ` Daniel Brooks
@ 2021-03-03 10:11               ` Daniel Brooks
  2021-03-03 10:14                 ` Andreas Schwab
  0 siblings, 1 reply; 75+ messages in thread
From: Daniel Brooks @ 2021-03-03 10:11 UTC (permalink / raw)
  To: Andreas Schwab
  Cc: Aurélien Aptel, Naoya Yamashita, Stefan Monnier,
	Emacs development discussions

Daniel Brooks <db48x@db48x.net> writes:

> Andreas Schwab <schwab@linux-m68k.org> writes:
>
>> On Mär 03 2021, Daniel Brooks wrote:
>>
>>> That is absolutely true. We could then halve it again by using a raw
>>> string literal.
>>
>> The question remains whether the complexity increase is warranted just
>> for a few occurrences of such cases.
>
> I included that example because it is egregious, and I was astounded
> when I saw it. The real benefit comes from the far more frequent cases
> where people find themselves using «\\» or «\\\\». And if or when we
> implement a syntax that allows double-quotes in a raw string literal, it
> will save people even more time, because there are yet more tens of
> thousands of cases of «\"» in the code.

Also, I think it's pretty funny that you suggest we should be using
Bash's raw strings because they are useful, and simultaneously that it
would not be useful for Emacs to have raw strings.

They would be useful for the same things, and complement each other very
well.

db48x



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-02 20:36     ` Daniel Brooks
  2021-03-03  0:27       ` Stefan Monnier
  2021-03-03  8:16       ` Andreas Schwab
@ 2021-03-03 10:12       ` Michael Albinus
  2021-03-03 10:42         ` Daniel Brooks
  2 siblings, 1 reply; 75+ messages in thread
From: Michael Albinus @ 2021-03-03 10:12 UTC (permalink / raw)
  To: Daniel Brooks
  Cc: Aurélien Aptel, Naoya Yamashita, Stefan Monnier,
	Emacs development discussions

Daniel Brooks <db48x@db48x.net> writes:

Hi Daniel,

>     net/tramp-sh.el
>     962:    quoted=`echo \"$file\" | sed -e \"s/\\\"/\\\\\\\\\\\\\\\\\\\"/\"`
>
> Look at that! 19 backslashes followed by a double–quote. That’s just
> stupid. I wonder if it’s a record? Should we use git blame to find the
> names of every person who touched that line and ask them what they
> think? I can guess what they would say.

Really? I'm the only person who has updated this line, and I'm quite
happy with the state of affairs.

And in any case, Tramp won't use raw string literals for the next 10
years or so, because it must be backward compatible.

> db48x

Best regards, Michael.



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-03 10:11               ` Daniel Brooks
@ 2021-03-03 10:14                 ` Andreas Schwab
  2021-03-03 11:48                   ` Daniel Brooks
  0 siblings, 1 reply; 75+ messages in thread
From: Andreas Schwab @ 2021-03-03 10:14 UTC (permalink / raw)
  To: Daniel Brooks
  Cc: Aurélien Aptel, Naoya Yamashita, Stefan Monnier,
	Emacs development discussions

On Mär 03 2021, Daniel Brooks wrote:

> Also, I think it's pretty funny that you suggest we should be using
> Bash's raw strings because they are useful, and simultaneously that it
> would not be useful for Emacs to have raw strings.

There is nothing wrong with using existing solutions.

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-03 10:12       ` Michael Albinus
@ 2021-03-03 10:42         ` Daniel Brooks
  2021-03-03 10:49           ` Michael Albinus
  2021-03-03 16:12           ` Stefan Monnier
  0 siblings, 2 replies; 75+ messages in thread
From: Daniel Brooks @ 2021-03-03 10:42 UTC (permalink / raw)
  To: Michael Albinus
  Cc: Aurélien Aptel, Naoya Yamashita, Stefan Monnier,
	Emacs development discussions

Michael Albinus <michael.albinus@gmx.de> writes:

> Daniel Brooks <db48x@db48x.net> writes:
>
> Hi Daniel,
>
>>     net/tramp-sh.el
>>     962:    quoted=`echo \"$file\" | sed -e \"s/\\\"/\\\\\\\\\\\\\\\\\\\"/\"`
>>
>> Look at that! 19 backslashes followed by a double–quote. That’s just
>> stupid. I wonder if it’s a record? Should we use git blame to find the
>> names of every person who touched that line and ask them what they
>> think? I can guess what they would say.
>
> Really? I'm the only person who has updated this line, and I'm quite
> happy with the state of affairs.

Then I guessed wrong. Will you be insulted if I assume that you have a
slight case of stockholm syndrome? ;D

> And in any case, Tramp won't use raw string literals for the next 10
> years or so, because it must be backward compatible.

That's fine. I'm not suggesting that anyone should rewrite the thirty or
fourty thousand lines of lisp in Emacs that have excess backslashes
either. They already work! They're fine.

I think a raw string syntax is a good idea because of the time it will
save us all in the future, writing the next thirty or fourty thousand
lines. And then, after that, people will write another thirty or fourty
thousand lines. A generation after that will write yet more code that
has syntax nested inside of syntax, and the time saved will continue to
add up. As the millenia pass by, more and more time will be saved, to be
spent on more useful things. Humans all across the Milky Way and even as
far as its satellite dwarf galaxies, will all happily use raw string
syntax never knowing that one of their distant ancestors blithely wrote,
tested, and debugged a line with 19 backslashes in a row merely because
raw strings hadn't been properly invented yet. As the first ships depart
for Andromeda carrying the seeds of new civilizations, computers filled
to the brim with software…

You get the idea. The cost is small, and the benefit is large.

db48x



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-03 10:42         ` Daniel Brooks
@ 2021-03-03 10:49           ` Michael Albinus
  2021-03-03 16:12           ` Stefan Monnier
  1 sibling, 0 replies; 75+ messages in thread
From: Michael Albinus @ 2021-03-03 10:49 UTC (permalink / raw)
  To: Daniel Brooks
  Cc: Aurélien Aptel, Naoya Yamashita, Stefan Monnier,
	Emacs development discussions

Daniel Brooks <db48x@db48x.net> writes:

Hi Daniel,

> Then I guessed wrong. Will you be insulted if I assume that you have a
> slight case of stockholm syndrome? ;D

Likely, it is true. Tramp is about running shell commands, and quoting
the arguments is one of my major jobs.

> db48x

Best regards, Michael.



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-03 10:14                 ` Andreas Schwab
@ 2021-03-03 11:48                   ` Daniel Brooks
  0 siblings, 0 replies; 75+ messages in thread
From: Daniel Brooks @ 2021-03-03 11:48 UTC (permalink / raw)
  To: Andreas Schwab
  Cc: Aurélien Aptel, Naoya Yamashita, Stefan Monnier,
	Emacs development discussions

Andreas Schwab <schwab@linux-m68k.org> writes:

> On Mär 03 2021, Daniel Brooks wrote:
>
>> Also, I think it's pretty funny that you suggest we should be using
>> Bash's raw strings because they are useful, and simultaneously that it
>> would not be useful for Emacs to have raw strings.
>
> There is nothing wrong with using existing solutions.

That's a rather good point.

Bash already exists, and someone else is already doing the
maintenance. The implementation effort was spent decades ago, and the
burden of maintaining that implementation appears to fall on a single
person, Chet Ramey. Possibly there are a few others who help out (plus
there are the maintainers of compatible shells, who must do similar
maintenance work). The benefit is spread out over millions of people
world–wide. It is certainly good for us to take advantage of Bash
features like single–quoted strings, since they benefit us and cost us
nothing.

Meanwhile, we have dozens of people here who regularly help maintain
Emacs, so I don't think it would be too hard for us to provide the same
benefit to our users, and ourselves.

db48x



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-02 16:07         ` Aurélien Aptel
  2021-03-03  7:31           ` Alfred M. Szmidt
@ 2021-03-03 16:02           ` Stefan Monnier
  1 sibling, 0 replies; 75+ messages in thread
From: Stefan Monnier @ 2021-03-03 16:02 UTC (permalink / raw)
  To: Aurélien Aptel; +Cc: Naoya Yamashita, Emacs development discussions

>> If you don't see the cons, then indeed the tradeoff is clear ;-)
> I've given a list of my use cases and I genuinely don't see cons,
> assuming it gets properly implemented. I'm sorry if you listed them
> earlier I must have skipped over.

The cons are the extra complexity in the language, reflected by extra
code in lread.c, extra code in elisp-mode.el, extra text in the lispref,
which will probably be followed by bug reports about some cases where
using a raw string literal doesn't work as it should (for code that's
analyzed at the text level rather than the sexp level, I'm thinking
here of code that does things like `grep '(foo "' ...`.  I can't think
offhand of such things, but I wouldn't be completely surprised if it
shows up maybe when we collect docstrings for the etc/DOC file, I also
suspect changes will be needed in edebug.el, possibly in pp.el, ...).


        Stefan




^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-03 10:42         ` Daniel Brooks
  2021-03-03 10:49           ` Michael Albinus
@ 2021-03-03 16:12           ` Stefan Monnier
  1 sibling, 0 replies; 75+ messages in thread
From: Stefan Monnier @ 2021-03-03 16:12 UTC (permalink / raw)
  To: Daniel Brooks
  Cc: Aurélien Aptel, Naoya Yamashita, Michael Albinus,
	Emacs development discussions

> Then I guessed wrong. Will you be insulted if I assume that you have a
> slight case of stockholm syndrome? ;D

Except that escaping is not going anywhere.  You can use various tricks
to try and avoid it, but the *main* issue with escaping is not to double
those backslashes, it's just to be aware of the need for it and know
when and how to handle it (witness the number of commits where we just
add a missing call to `shell-quote-argument`).

Introducing raw string literals doesn't actually help in this regard, AFAICT.


        Stefan




^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-03  7:00               ` Eli Zaretskii
@ 2021-03-04  2:47                 ` Matt Armstrong
  2021-03-04 13:49                   ` Eli Zaretskii
  2021-03-05  5:39                   ` Richard Stallman
  0 siblings, 2 replies; 75+ messages in thread
From: Matt Armstrong @ 2021-03-04  2:47 UTC (permalink / raw)
  To: Eli Zaretskii, rms; +Cc: db48x, conao3, monnier, emacs-devel

Eli Zaretskii <eliz@gnu.org> writes:

>> From: Richard Stallman <rms@gnu.org>
>> Cc: monnier@iro.umontreal.ca, db48x@db48x.net, eliz@gnu.org,
>> 	conao3@gmail.com, emacs-devel@gnu.org
>> Date: Wed, 03 Mar 2021 00:53:55 -0500
>> 
>> In Lisp, what you put in the program _is_ a string.  What you write
>> in the textual code is not a "literal", it is the textual representation
>> of the string to put in the program.
>
> What is the difference between "literal string" and "textual
> representation of a string"?  I don't think I understand the
> difference from what you wrote.
>
>> I just saw that the term "literal" appears occasionally in the Emacs
>> Lisp Ref Manual.  Would someone like to rewrite those parts so that they
>> fit the concepts of Lisp batter?
>
> To rewrite those parts, one needs to understand what's wrong with them
> and how to express that better.  For example, we also say "literal
> characters" in a few places -- is that wrong as well, and if so, how?

I have noticed that most Lisp reference material does not use the word
"literal" to describe source code elements.  Which raises the question:
how does Lisp documentation typically talk about these things?

In my experience, most references typically don't name them at all.
E.g. when talking about strings it might say "the read syntax of a
string is ..." but things that satisfy "the read syntax of a string" are
never given a name like "string literal."



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-04  2:47                 ` Matt Armstrong
@ 2021-03-04 13:49                   ` Eli Zaretskii
  2021-03-04 16:55                     ` Matt Armstrong
  2021-03-05  5:39                   ` Richard Stallman
  1 sibling, 1 reply; 75+ messages in thread
From: Eli Zaretskii @ 2021-03-04 13:49 UTC (permalink / raw)
  To: Matt Armstrong; +Cc: db48x, emacs-devel, conao3, rms, monnier

> From: Matt Armstrong <matt@rfc20.org>
> Cc: monnier@iro.umontreal.ca, db48x@db48x.net, conao3@gmail.com,
>  emacs-devel@gnu.org
> Date: Wed, 03 Mar 2021 18:47:10 -0800
> 
> I have noticed that most Lisp reference material does not use the word
> "literal" to describe source code elements.  Which raises the question:
> how does Lisp documentation typically talk about these things?

What is a "source code element"?



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-04 13:49                   ` Eli Zaretskii
@ 2021-03-04 16:55                     ` Matt Armstrong
  2021-03-05  5:44                       ` Richard Stallman
  0 siblings, 1 reply; 75+ messages in thread
From: Matt Armstrong @ 2021-03-04 16:55 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: db48x, emacs-devel, conao3, rms, monnier

Eli Zaretskii <eliz@gnu.org> writes:

>> From: Matt Armstrong <matt@rfc20.org>
>> Cc: monnier@iro.umontreal.ca, db48x@db48x.net, conao3@gmail.com,
>>  emacs-devel@gnu.org
>> Date: Wed, 03 Mar 2021 18:47:10 -0800
>> 
>> I have noticed that most Lisp reference material does not use the word
>> "literal" to describe source code elements.  Which raises the question:
>> how does Lisp documentation typically talk about these things?
>
> What is a "source code element"?

Like you I'm trying to figure out why, in Richard's words, the concept
of "literal" does not fit Lisp very well.

One sense defition of "literal" is "Of, in, or expressed by a letter or
the letters of the alphabet." and this is the sense I believe most
people mean in programming contexts.  With my current understanding, I
think this meaning applies to Lisp as well as it does in any other
language.  I don't take this to imply that the particular word "literal"
is necessarily the preferred way to describe the concept in Lisp.


Let's go back to Richard's original text:

>  The concept of "literal" does not fit Lisp very well.  It describe
>  the situation with other languages, where expressions are not
>  objects, only syntax.  In those languages, you don't put a string
>  into your program, only a "literal" which represents a string value.
>
>  In Lisp, what you put in the program _is_ a string.  What you write
>  in the textual code is not a "literal", it is the textual
>  representation of the string to put in the program.

My observation is that most descriptions of source code syntax for Lisp
languages do not use the term "literal".  Instead, they describe the
"read syntax" of each object.  Similar for printing, with language such
as "`prin1' produces output suitable for input to `read'" and phrases
like "printed representation of ...".

When I first learned Lisp I was left wondering why these long phrases
were repeated over and over thruought the documentation.  Then I had the
thought: By "printed representation of a string" and "read syntax of a
string" they are just describing "string literals"  :-)

In Richard's text above he uses language that I consider confusing: "In
Lisp, what you put in the program _is_ a string."  Why confusing?
Because if "put in program" is talking about source code, the statement
isn't true.  What you put in the program is a quoted sequence of
characters, perhaps in US-ASCII, perhaps in UTF-8, perhaps some other
encoding, depending on the encoding of the entire file.  This character
sequence is parsed by the reader according to certain syntactic rules.
The running program represents the equivalent string object entirely
differently: as an array of characters, perhaps multi-byte, perhaps not,
always without the outermost quotes, etc.  Most notably, the program can
then very conveniently mutate the string, whereas Lisp provides no
primitives to conveniently mutate the original source code.

So, given that there is a difference between a string in source code and
a string at run time, a concice phrase that designates the "printed
representation of objects of type X as written in source code" feels
useful.

I do agree that elisp.info could be improve in some uses of the term
"literal."  For example, in this description of `eq':

     The Emacs Lisp byte compiler may collapse identical literal
     objects, such as literal strings, into references to the same
     object, with the effect that the byte-compiled code will compare
     such objects as ‘eq’, while the interpreted version of the same
     code will not.  Therefore, your code should never rely on objects
     with the same literal contents being either ‘eq’ or not ‘eq’, it
     should instead use functions that compare object contents such as
     ‘equal’, described below.  Similarly, your code should not modify
     literal objects (e.g., put text properties on literal strings),
     since doing that might affect other literal objects of the same
     contents, if the byte compiler collapses them.

For one, I have not found a place in the reference where the terms
"literal object" and "literal string" are defined.  A new programmer
could be easily confused.  It may be useful to describe the general
issue being described to here in a more prominent location, since it its
relevance to `eq' is just one of possibly many issues.  E.g. mutating
any quoted object can cause the object to be different each time a
function is called, even if the "literal object" remains `eq' across
calls.



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-04  2:47                 ` Matt Armstrong
  2021-03-04 13:49                   ` Eli Zaretskii
@ 2021-03-05  5:39                   ` Richard Stallman
  2021-03-05  8:01                     ` Eli Zaretskii
  1 sibling, 1 reply; 75+ messages in thread
From: Richard Stallman @ 2021-03-05  5:39 UTC (permalink / raw)
  To: Matt Armstrong; +Cc: db48x, eliz, conao3, monnier, emacs-devel

[[[ To any NSA and FBI agents reading my email: please consider    ]]]
[[[ whether defending the US Constitution against all enemies,     ]]]
[[[ foreign or domestic, requires you to follow Snowden's example. ]]]

  > I have noticed that most Lisp reference material does not use the word
  > "literal" to describe source code elements.  Which raises the question:
  > how does Lisp documentation typically talk about these things?

  > In my experience, most references typically don't name them at all.
  > E.g. when talking about strings it might say "the read syntax of a
  > string is ..." but things that satisfy "the read syntax of a string" are
  > never given a name like "string literal."

There is no need to give them a name, because their role in Lisp is
limited and only one section in the manual needs to talk about it.

In documenting Lisp, the printed representation for an object is
a side issue.  We only need to talk about it in one place.

In most programming languages, an expression is text and a program is
text.  That text can contain string literals.  A string literal is the
text in a text that represents a string in an expression.

The manual has to describe the rules for that text at every level.
Including how to write and use string literals.  It may need to discuss
using a string literal in a certain place in an expression.

In Lisp, a program is a data structure.  It does not contain string
literals -- it contains strings.  Thus, various sections of the manual
can talk about what happens if you use a string in a certain
expression, but they don't need to talk about the printed representation
of that expression.

-- 
Dr Richard Stallman
Chief GNUisance of the GNU Project (https://gnu.org)
Founder, Free Software Foundation (https://fsf.org)
Internet Hall-of-Famer (https://internethalloffame.org)





^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-04 16:55                     ` Matt Armstrong
@ 2021-03-05  5:44                       ` Richard Stallman
  0 siblings, 0 replies; 75+ messages in thread
From: Richard Stallman @ 2021-03-05  5:44 UTC (permalink / raw)
  To: Matt Armstrong; +Cc: db48x, eliz, conao3, monnier, emacs-devel

[[[ To any NSA and FBI agents reading my email: please consider    ]]]
[[[ whether defending the US Constitution against all enemies,     ]]]
[[[ foreign or domestic, requires you to follow Snowden's example. ]]]

  > In Richard's text above he uses language that I consider confusing: "In
  > Lisp, what you put in the program _is_ a string."  Why confusing?
  > Because if "put in program" is talking about source code, the statement
  > isn't true.

A Lisp program, conceptually, is not the series of characters in a file.
It is a series of Lisp expressions, of which those characters are the
printed representation.

-- 
Dr Richard Stallman
Chief GNUisance of the GNU Project (https://gnu.org)
Founder, Free Software Foundation (https://fsf.org)
Internet Hall-of-Famer (https://internethalloffame.org)





^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-05  5:39                   ` Richard Stallman
@ 2021-03-05  8:01                     ` Eli Zaretskii
  2021-03-06  5:13                       ` Richard Stallman
  0 siblings, 1 reply; 75+ messages in thread
From: Eli Zaretskii @ 2021-03-05  8:01 UTC (permalink / raw)
  To: rms; +Cc: db48x, matt, conao3, monnier, emacs-devel

> From: Richard Stallman <rms@gnu.org>
> Cc: eliz@gnu.org, db48x@db48x.net, conao3@gmail.com,
> 	monnier@iro.umontreal.ca, emacs-devel@gnu.org
> Date: Fri, 05 Mar 2021 00:39:27 -0500
> 
> In Lisp, a program is a data structure.  It does not contain string
> literals -- it contains strings.  Thus, various sections of the manual
> can talk about what happens if you use a string in a certain
> expression, but they don't need to talk about the printed representation
> of that expression.

I understand what you are saying, but still there is a difference
between

   (concat foo bar)

and

   (concat foo "what we call a literal string")

Even if 'bar's value is the same string as the one that appears
literally in the second example, there's at least a visual difference.
And in fact, the difference is not only visual, because the
byte-compiler is allowed to treat such "literal" strings specially in
some situations.  This is one reason why the ELisp manual mentions
literal strings: it needs to describe those special situations and the
pitfalls they bring with them.

Another reason is that many (most?) readers understand "literal
string" in the sense of the above example, so it is a convenient way
of making sure the reader understands what is being discussed.

Why is it harmful to use this terminology in conjunction with Lisp,
even though its semantics in Lisp is somewhat different?



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-05  8:01                     ` Eli Zaretskii
@ 2021-03-06  5:13                       ` Richard Stallman
  2021-03-06  6:04                         ` Matt Armstrong
  2021-03-06  8:27                         ` Eli Zaretskii
  0 siblings, 2 replies; 75+ messages in thread
From: Richard Stallman @ 2021-03-06  5:13 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: db48x, matt, conao3, monnier, emacs-devel

[[[ To any NSA and FBI agents reading my email: please consider    ]]]
[[[ whether defending the US Constitution against all enemies,     ]]]
[[[ foreign or domestic, requires you to follow Snowden's example. ]]]

  > I understand what you are saying, but still there is a difference
  > between

  >    (concat foo bar)

  > and

  >    (concat foo "what we call a literal string")

I don't see a deep conceptual difference between them
The secomd uses a constant where the first uses a variable.
That is a difference in detail, but not a deep difference.
At least, I don't see a deep difference.

  > And in fact, the difference is not only visual, because the
  > byte-compiler is allowed to treat such "literal" strings specially in
  > some situations.

I am not entirely sure what that refers to; I am sort-of guessing.
The thing it is treating specially is a string in the expression being
compiled, if I understand what you mean.

This discussion is not about the facts of what happens, if I understand.
It's about the way to conceptualize them.

  > Another reason is that many (most?) readers understand "literal
  > string" in the sense of the above example, so it is a convenient way
  > of making sure the reader understands what is being discussed.

Yes and no.  Readers who know other languages will get an immediate
understanding from "literal string".  But that understanding is not
exactly the right understanding.  So we ought to correct it
to get to the right understanding.


-- 
Dr Richard Stallman
Chief GNUisance of the GNU Project (https://gnu.org)
Founder, Free Software Foundation (https://fsf.org)
Internet Hall-of-Famer (https://internethalloffame.org)





^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-06  5:13                       ` Richard Stallman
@ 2021-03-06  6:04                         ` Matt Armstrong
  2021-03-07  6:13                           ` Richard Stallman
  2021-03-06  8:27                         ` Eli Zaretskii
  1 sibling, 1 reply; 75+ messages in thread
From: Matt Armstrong @ 2021-03-06  6:04 UTC (permalink / raw)
  To: rms, Eli Zaretskii; +Cc: db48x, conao3, monnier, emacs-devel

Richard Stallman <rms@gnu.org> writes:

>   > And in fact, the difference is not only visual, because the
>   > byte-compiler is allowed to treat such "literal" strings specially in
>   > some situations.
>
> I am not entirely sure what that refers to; I am sort-of guessing.
> The thing it is treating specially is a string in the expression being
> compiled, if I understand what you mean.
>
> This discussion is not about the facts of what happens, if I
> understand.  It's about the way to conceptualize them.
>
>   > Another reason is that many (most?) readers understand "literal
>   > string" in the sense of the above example, so it is a convenient way
>   > of making sure the reader understands what is being discussed.
>
> Yes and no.  Readers who know other languages will get an immediate
> understanding from "literal string".  But that understanding is not
> exactly the right understanding.  So we ought to correct it to get to
> the right understanding.

The place Eli was referring to, I believe, is this from (info
"(elisp)Equality Predicates"):

     The Emacs Lisp byte compiler may collapse identical literal
     objects, such as literal strings, into references to the same
     object, with the effect that the byte-compiled code will compare
     such objects as ‘eq’, while the interpreted version of the same
     code will not.  Therefore, your code should never rely on objects
     with the same literal contents being either ‘eq’ or not ‘eq’, it
     should instead use functions that compare object contents such as
     ‘equal’, described below.  Similarly, your code should not modify
     literal objects (e.g., put text properties on literal strings),
     since doing that might affect other literal objects of the same
     contents, if the byte compiler collapses them.

How might this paragraph be rephrased in a way that doesn't use the term
"literal", yet remains clear.

I think I follow this discussion, but I'm not at the point where I could
rewrite that paragraph myself.  I have too much ingrained understanding
of static programming languages and their literals and not enough
exposure to the way these concepts are described for Lisp languages.



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-06  5:13                       ` Richard Stallman
  2021-03-06  6:04                         ` Matt Armstrong
@ 2021-03-06  8:27                         ` Eli Zaretskii
  2021-03-06  9:51                           ` Daniel Brooks
  2021-03-07  6:08                           ` Richard Stallman
  1 sibling, 2 replies; 75+ messages in thread
From: Eli Zaretskii @ 2021-03-06  8:27 UTC (permalink / raw)
  To: rms; +Cc: db48x, matt, conao3, monnier, emacs-devel

> From: Richard Stallman <rms@gnu.org>
> Cc: db48x@db48x.net, matt@rfc20.org, conao3@gmail.com,
> 	monnier@iro.umontreal.ca, emacs-devel@gnu.org
> Date: Sat, 06 Mar 2021 00:13:36 -0500
> 
>   > I understand what you are saying, but still there is a difference
>   > between
> 
>   >    (concat foo bar)
> 
>   > and
> 
>   >    (concat foo "what we call a literal string")
> 
> I don't see a deep conceptual difference between them
> The secomd uses a constant where the first uses a variable.

So it is okay to talk about a "string constant" or a "constant string"
instead of "string literal"?  And likewise with "literal vectors"?

We generally treat these as equivalent terms.  Here's an example:

    When similar constants occur as parts of a program, the Lisp
  interpreter might save time or space by reusing existing constants or
  their components.  For example, @code{(eq "abc" "abc")} returns
  @code{t} if the interpreter creates only one instance of the string
  literal @code{"abc"}, and returns @code{nil} if it creates two
  instances.  Lisp programs should be written so that they work
  regardless of whether this optimization is in use.

As you see, "constants" and "literals" is used here interchangeably.

>   > And in fact, the difference is not only visual, because the
>   > byte-compiler is allowed to treat such "literal" strings specially in
>   > some situations.
> 
> I am not entirely sure what that refers to; I am sort-of guessing.
> The thing it is treating specially is a string in the expression being
> compiled, if I understand what you mean.

Yes, see above (and in general, see the "Mutability" node in the ELisp
manual).

> This discussion is not about the facts of what happens, if I understand.
> It's about the way to conceptualize them.

Well, in a way it is about what happens, because almost all instances
where we mention "literals" are eventually related to the pitfalls
with using those in Lisp code that is byte-compiled.

>   > Another reason is that many (most?) readers understand "literal
>   > string" in the sense of the above example, so it is a convenient way
>   > of making sure the reader understands what is being discussed.
> 
> Yes and no.  Readers who know other languages will get an immediate
> understanding from "literal string".  But that understanding is not
> exactly the right understanding.  So we ought to correct it
> to get to the right understanding.

I'm still not sure I understand how to correct that.  If using "string
constant" is what is needed, then it's easy to switch to that
terminology throughout.  But I'm not yet sure this is the way.



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-06  8:27                         ` Eli Zaretskii
@ 2021-03-06  9:51                           ` Daniel Brooks
  2021-03-06 10:24                             ` Eli Zaretskii
  2021-03-07  6:08                           ` Richard Stallman
  1 sibling, 1 reply; 75+ messages in thread
From: Daniel Brooks @ 2021-03-06  9:51 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: matt, emacs-devel, conao3, rms, monnier

Eli Zaretskii <eliz@gnu.org> writes:

> Yes, see above (and in general, see the "Mutability" node in the ELisp
> manual).

> I'm still not sure I understand how to correct that.  If using "string
> constant" is what is needed, then it's easy to switch to that
> terminology throughout.  But I'm not yet sure this is the way.

I don't think that "constant" is the right word to use. After all, the
string that you get from a string literal can be modified just like a
string from any other source.

I think that "literal" is the right word, because it correctly describes
the provenance of the string. The alternatives to a string literal are
to call a constructor function such as make-string, or to fetch a string
from a buffer, or from input, or some other source. All of these result
in a string, but the source of the string can matter a lot.

Likewise a "list literal" is the syntax for creating a list by quoting
it rather than by calling list. The #s syntax for literal hash
tables gives you the same results as if you called make-hash-table.

db48x



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-06  9:51                           ` Daniel Brooks
@ 2021-03-06 10:24                             ` Eli Zaretskii
  0 siblings, 0 replies; 75+ messages in thread
From: Eli Zaretskii @ 2021-03-06 10:24 UTC (permalink / raw)
  To: Daniel Brooks; +Cc: matt, emacs-devel, conao3, rms, monnier

> From: Daniel Brooks <db48x@db48x.net>
> Cc: rms@gnu.org,  matt@rfc20.org,  conao3@gmail.com,
>   monnier@iro.umontreal.ca,  emacs-devel@gnu.org
> Date: Sat, 06 Mar 2021 01:51:02 -0800
> 
> Eli Zaretskii <eliz@gnu.org> writes:
> 
> > Yes, see above (and in general, see the "Mutability" node in the ELisp
> > manual).
> 
> > I'm still not sure I understand how to correct that.  If using "string
> > constant" is what is needed, then it's easy to switch to that
> > terminology throughout.  But I'm not yet sure this is the way.
> 
> I don't think that "constant" is the right word to use. After all, the
> string that you get from a string literal can be modified just like a
> string from any other source.

That's a separate issue, and that horse has been beaten to death
already, with the current ELisp manual's text that talks of
"immutable" objects being the best result we could come up with that
leaves everybody with at least a partial satisfaction.  Please let's
not start that discussion again.

Let's agree that the word "constant" in the context of this discussion
is a purely syntactic term, it has nothing to do with the object's
mutability.



^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-06  8:27                         ` Eli Zaretskii
  2021-03-06  9:51                           ` Daniel Brooks
@ 2021-03-07  6:08                           ` Richard Stallman
  1 sibling, 0 replies; 75+ messages in thread
From: Richard Stallman @ 2021-03-07  6:08 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: db48x, matt, conao3, monnier, emacs-devel

[[[ To any NSA and FBI agents reading my email: please consider    ]]]
[[[ whether defending the US Constitution against all enemies,     ]]]
[[[ foreign or domestic, requires you to follow Snowden's example. ]]]

  > So it is okay to talk about a "string constant" or a "constant string"
  > instead of "string literal"?  And likewise with "literal vectors"?

We can use the expression "constant string" to mean is a string that
is part of the code and stands for itself.  A constant object can have
any data type that has a read syntax, perhaps with the help of quote.
With the help of a macro, it can have any type.

But there is no distinction between "constant" and "variable", and
what the code actually contains is a string.  A "constant string"
is simply a string contained in the code.

  > We generally treat these as equivalent terms.  Here's an example:

  >     When similar constants occur as parts of a program, the Lisp
  >   interpreter might save time or space by reusing existing constants or
  >   their components.

In another message, I fixed up thnat text.

-- 
Dr Richard Stallman
Chief GNUisance of the GNU Project (https://gnu.org)
Founder, Free Software Foundation (https://fsf.org)
Internet Hall-of-Famer (https://internethalloffame.org)





^ permalink raw reply	[flat|nested] 75+ messages in thread

* Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-06  6:04                         ` Matt Armstrong
@ 2021-03-07  6:13                           ` Richard Stallman
  2021-03-07 17:20                             ` [External] : " Drew Adams
  0 siblings, 1 reply; 75+ messages in thread
From: Richard Stallman @ 2021-03-07  6:13 UTC (permalink / raw)
  To: Matt Armstrong; +Cc: db48x, eliz, conao3, monnier, emacs-devel

[[[ To any NSA and FBI agents reading my email: please consider    ]]]
[[[ whether defending the US Constitution against all enemies,     ]]]
[[[ foreign or domestic, requires you to follow Snowden's example. ]]]

  > The place Eli was referring to, I believe, is this from (info
  > "(elisp)Equality Predicates"):

  >      The Emacs Lisp byte compiler may collapse identical literal
  >      objects, such as literal strings, into references to the same
  >      object, with the effect that the byte-compiled code will compare
  >      such objects as ‘eq’, while the interpreted version of the same
  >      code will not.  Therefore, your code should never rely on objects
  >      with the same literal contents being either ‘eq’ or not ‘eq’, it
  >      should instead use functions that compare object contents such as
  >      ‘equal’, described below.  Similarly, your code should not modify
  >      literal objects (e.g., put text properties on literal strings),
  >      since doing that might affect other literal objects of the same
  >      contents, if the byte compiler collapses them.

Here's how I would write it.  It does not use "literal".
It does use "constant", meaning that the object appears
directly in the expressions and will be used at run time.

A constant does not have to be written in the source.
It could be generated by a macro.

I've assumed that this applies to quoted lists, too, but 
I don't know for sure -- is it so?

I've made several other improvements in the punctuation and usage.
I suggest installing those changes too.

======================================================================
The Emacs Lisp byte compiler may unify identical constant objects,
including strings (which evaluate to themselves), and quoted objects,
including lists, into references to one single object.  This has the
effect that in the byte-compiled code the objects that were unified
are ‘eq’, while when interpreting the same code they are not.

Therefore, your code should never rely on objects that are equal in
contents to be either ‘eq’ or not ‘eq’; it should instead compare them
using functions that compare object contents, such as ‘equal’,
described below.  Similarly, your code should not modify an object that
appears in the code---for instance, it should not put text properties
on strings---since such alteration might affect other objects that
have been unified with it.

======================================================================

-- 
Dr Richard Stallman
Chief GNUisance of the GNU Project (https://gnu.org)
Founder, Free Software Foundation (https://fsf.org)
Internet Hall-of-Famer (https://internethalloffame.org)





^ permalink raw reply	[flat|nested] 75+ messages in thread

* RE: [External] : Re: [PATCH] Interpret #r"..." as a raw string
  2021-03-07  6:13                           ` Richard Stallman
@ 2021-03-07 17:20                             ` Drew Adams
  0 siblings, 0 replies; 75+ messages in thread
From: Drew Adams @ 2021-03-07 17:20 UTC (permalink / raw)
  To: rms@gnu.org, Matt Armstrong
  Cc: db48x@db48x.net, eliz@gnu.org, conao3@gmail.com,
	monnier@iro.umontreal.ca, emacs-devel@gnu.org

> Here's how I would write it.  It does not use "literal".
> It does use "constant", meaning that the object appears
> directly in the expressions and will be used at run time.
> 
> A constant does not have to be written in the source.
> It could be generated by a macro.
> 
> I've assumed that this applies to quoted lists, too, but
> I don't know for sure -- is it so?
> 
> I've made several other improvements in the punctuation and usage.
> I suggest installing those changes too.
> 
> ======================================================================
> The Emacs Lisp byte compiler may unify identical constant objects,
> including strings (which evaluate to themselves), and quoted objects,
> including lists, into references to one single object.  This has the
> effect that in the byte-compiled code the objects that were unified
> are ‘eq’, while when interpreting the same code they are not.
> 
> Therefore, your code should never rely on objects that are equal in
> contents to be either ‘eq’ or not ‘eq’; it should instead compare them
> using functions that compare object contents, such as ‘equal’,
> described below.  Similarly, your code should not modify an object that
> appears in the code---for instance, it should not put text properties
> on strings---since such alteration might affect other objects that
> have been unified with it.
> 
> ======================================================================

FWIW, I generally like, and agree with, your take on
this general subject.

I guess the only thing I'd suggest (if it's not already
the case), is that we say explicitly (somewhwere, where
this topic is covered) something like what you said in
your other mail today:

  But there is no distinction between "constant" and
  "variable", and what the code actually contains is
  a string.  A "constant string" is simply a string
  contained in the code.

This too is something not obvious to people not used
to Lisp.

A "constant" object _can_ in general be modified (e.g.
by users/code).  And in some cases (quite common) that's
not a good idea - it's instead a gotcha to beware of.

And the byte-compiler (or even the reader) can sometimes
create a "constant" object, and you might not be aware
of that happening.

(I said "can in general" because Lisp does sometimes
prevent some modification of some constant objects, such
as nil and t.)

The gotcha of inadvertently modifying an object that's,
in effect, "constant" is something to make users aware
of.  How best to do that, I'm not sure.  (That's been
discussed at length in the last year or so.)

Again, I think speaking about "constant" code objects
instead of "literals" is a step forward, (provided we
make clear that in some cases Lisp doesn't prevent you
from modifying such a "constant" object (intentionally
or not).

Your main point, about Lisp programs being data in
memory, not source code in files or buffers or on
paper, is an important one - to be emphasized.

^ permalink raw reply	[flat|nested] 75+ messages in thread

end of thread, other threads:[~2021-03-07 17:20 UTC | newest]

Thread overview: 75+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-26 18:18 [PATCH] Interpret #r"..." as a raw string Naoya Yamashita
2021-02-26 18:27 ` [External] : " Drew Adams
2021-02-26 18:53   ` Naoya Yamashita
2021-02-26 19:03     ` Drew Adams
2021-02-26 19:48     ` Stefan Monnier
2021-02-26 20:23       ` Naoya Yamashita
2021-02-26 20:34         ` Andreas Schwab
2021-02-26 20:39           ` Naoya Yamashita
2021-02-26 20:45             ` Andreas Schwab
2021-02-26 20:50               ` Naoya Yamashita
2021-02-26 20:54                 ` Andreas Schwab
2021-02-26 20:03     ` Eli Zaretskii
2021-02-26 20:34       ` Naoya Yamashita
2021-02-26 19:09 ` Andreas Schwab
2021-02-26 20:00 ` Eli Zaretskii
2021-02-27  0:39   ` Daniel Brooks
2021-02-27 16:14     ` Richard Stallman
2021-02-27 16:18       ` Stefan Monnier
2021-03-01  5:19         ` Richard Stallman
2021-03-02  5:45           ` Matt Armstrong
2021-03-03  5:53             ` Richard Stallman
2021-03-03  6:14               ` Daniel Brooks
2021-03-03  7:00               ` Eli Zaretskii
2021-03-04  2:47                 ` Matt Armstrong
2021-03-04 13:49                   ` Eli Zaretskii
2021-03-04 16:55                     ` Matt Armstrong
2021-03-05  5:44                       ` Richard Stallman
2021-03-05  5:39                   ` Richard Stallman
2021-03-05  8:01                     ` Eli Zaretskii
2021-03-06  5:13                       ` Richard Stallman
2021-03-06  6:04                         ` Matt Armstrong
2021-03-07  6:13                           ` Richard Stallman
2021-03-07 17:20                             ` [External] : " Drew Adams
2021-03-06  8:27                         ` Eli Zaretskii
2021-03-06  9:51                           ` Daniel Brooks
2021-03-06 10:24                             ` Eli Zaretskii
2021-03-07  6:08                           ` Richard Stallman
2021-02-27 20:41       ` Daniel Brooks
2021-02-28  6:22 ` Zhu Zihao
2021-03-01  5:26   ` Richard Stallman
2021-03-01 12:06 ` Alan Mackenzie
2021-03-01 12:13   ` Andreas Schwab
2021-03-02  5:59   ` Matt Armstrong
2021-03-02  9:56     ` Daniel Brooks
2021-03-02 10:13       ` Andreas Schwab
2021-03-02 10:55         ` Daniel Brooks
2021-03-02 11:18           ` Andreas Schwab
2021-03-02 11:26             ` Daniel Brooks
2021-03-02 11:14       ` Alan Mackenzie
2021-03-02 11:52         ` Daniel Brooks
2021-03-02 12:01     ` Dmitry Gutov
2021-03-02 14:14       ` Alan Mackenzie
2021-03-02 14:32         ` Dmitry Gutov
2021-03-02 15:06           ` Alan Mackenzie
2021-03-02 11:41 ` Aurélien Aptel
2021-03-02 13:49   ` Stefan Monnier
2021-03-02 14:46     ` Aurélien Aptel
2021-03-02 15:11       ` Stefan Monnier
2021-03-02 16:07         ` Aurélien Aptel
2021-03-03  7:31           ` Alfred M. Szmidt
2021-03-03 16:02           ` Stefan Monnier
2021-03-02 20:36     ` Daniel Brooks
2021-03-03  0:27       ` Stefan Monnier
2021-03-03  0:42         ` Daniel Brooks
2021-03-03  8:16       ` Andreas Schwab
2021-03-03  9:25         ` Daniel Brooks
2021-03-03  9:29           ` Andreas Schwab
2021-03-03 10:02             ` Daniel Brooks
2021-03-03 10:11               ` Daniel Brooks
2021-03-03 10:14                 ` Andreas Schwab
2021-03-03 11:48                   ` Daniel Brooks
2021-03-03 10:12       ` Michael Albinus
2021-03-03 10:42         ` Daniel Brooks
2021-03-03 10:49           ` Michael Albinus
2021-03-03 16:12           ` Stefan Monnier

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).