unofficial mirror of bug-gnu-emacs@gnu.org 
 help / color / mirror / code / Atom feed
* bug#32939: 27.0.50; Don't reject confusable quotes, just fontify & mention them in error messages
@ 2018-10-04 23:58 Noam Postavsky
  2018-10-05  8:44 ` Eli Zaretskii
  0 siblings, 1 reply; 9+ messages in thread
From: Noam Postavsky @ 2018-10-04 23:58 UTC (permalink / raw)
  To: 32939

[-- Attachment #1: Type: text/plain, Size: 386 bytes --]

Severity: wishlist

As a followup from [1], here is a patchset which replaces the master's
current rejection of confusable quotes on `read', and instead adds a
hint to the error message when evaluating an expression with a
confusable quote leads to an error.

And also highlight such quotes with warning-face.

[1]: https://lists.gnu.org/archive/html/emacs-devel/2018-02/msg00093.html


[-- Attachment #2: patch --]
[-- Type: text/plain, Size: 6145 bytes --]

From 330f6072c3dc614936199f1775679328c5452416 Mon Sep 17 00:00:00 2001
From: Noam Postavsky <npostavs@gmail.com>
Date: Sat, 10 Mar 2018 18:20:45 -0500
Subject: [PATCH v1 1/2] Stop signaling an error when reading "smart quotes" in
 symbols

Revert commits from 2018-01-28 "Fix round tripping of read->print for
symbols with strange quotes", and 2017-07-22 "Signal error for symbol
names with strange quotes (Bug#2967)".
* etc/NEWS: Remove corresponding entries.
* src/character.c (confusable_symbol_character_p):
* test/src/lread-tests.el (lread-tests--old-style-backquotes): Remove.
* src/lread.c (read1): Don't signal error on confusable character.
* src/print.c (print_object): Don't escape confusable characters.
---
 etc/NEWS                |  9 ---------
 src/character.c         | 26 --------------------------
 src/character.h         |  2 --
 src/lread.c             |  7 -------
 src/print.c             |  3 +--
 test/src/lread-tests.el | 17 -----------------
 6 files changed, 1 insertion(+), 63 deletions(-)

diff --git a/etc/NEWS b/etc/NEWS
index daacf49e62..1ca4956c96 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -1073,15 +1073,6 @@ and if the new behavior breaks your code please email
 32252@debbugs.gnu.org.  Because %o and %x can now format signed
 integers, they now support the + and space flags.
 
-** To avoid confusion caused by "smart quotes", the reader signals an
-error when reading Lisp symbols which begin with one of the following
-quotation characters: ‘’‛“”‟〞"'.  A symbol beginning with such a
-character can be written by escaping the quotation character with a
-backslash.  For example:
-
-    (read "‘smart") => (invalid-read-syntax "strange quote" "‘")
-    (read "\\‘smart") == (intern "‘smart")
-
 +++
 ** Omitting variables after '&optional' and '&rest' is now allowed.
 For example (defun foo (&optional)) is no longer an error.  This is
diff --git a/src/character.c b/src/character.c
index 0b14e476c1..edaeca0a35 100644
--- a/src/character.c
+++ b/src/character.c
@@ -1056,32 +1056,6 @@ blankp (int c)
   return XFIXNUM (category) == UNICODE_CATEGORY_Zs; /* separator, space */
 }
 
-
-/* Return true for characters that would read as symbol characters,
-   but graphically may be confused with some kind of punctuation.  We
-   require an escaping backslash, when such characters begin a
-   symbol.  */
-bool
-confusable_symbol_character_p (int ch)
-{
-  switch (ch)
-    {
-    case 0x2018: /* LEFT SINGLE QUOTATION MARK */
-    case 0x2019: /* RIGHT SINGLE QUOTATION MARK */
-    case 0x201B: /* SINGLE HIGH-REVERSED-9 QUOTATION MARK */
-    case 0x201C: /* LEFT DOUBLE QUOTATION MARK */
-    case 0x201D: /* RIGHT DOUBLE QUOTATION MARK */
-    case 0x201F: /* DOUBLE HIGH-REVERSED-9 QUOTATION MARK */
-    case 0x301E: /* DOUBLE PRIME QUOTATION MARK */
-    case 0xFF02: /* FULLWIDTH QUOTATION MARK */
-    case 0xFF07: /* FULLWIDTH APOSTROPHE */
-      return true;
-
-    default:
-      return false;
-    }
-}
-
 signed char HEXDIGIT_CONST hexdigit[UCHAR_MAX + 1] =
   {
 #if HEXDIGIT_IS_CONST
diff --git a/src/character.h b/src/character.h
index 5dff85aed4..6d5349e07d 100644
--- a/src/character.h
+++ b/src/character.h
@@ -683,8 +683,6 @@ char_surrogate_p (int c)
 extern bool printablep (int);
 extern bool blankp (int);
 
-extern bool confusable_symbol_character_p (int ch);
-
 /* Return a translation table of id number ID.  */
 #define GET_TRANSLATION_TABLE(id) \
   (XCDR (XVECTOR (Vtranslation_table_vector)->contents[(id)]))
diff --git a/src/lread.c b/src/lread.c
index 73e38d8995..fa3b38c312 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -3539,13 +3539,6 @@ read1 (Lisp_Object readcharfun, int *pch, bool first_in_list)
 	    if (! NILP (result))
 	      return unbind_to (count, result);
 	  }
-        if (!quoted && multibyte)
-          {
-            int ch = STRING_CHAR ((unsigned char *) read_buffer);
-            if (confusable_symbol_character_p (ch))
-              xsignal2 (Qinvalid_read_syntax, build_string ("strange quote"),
-                        CALLN (Fstring, make_fixnum (ch)));
-          }
 	{
 	  Lisp_Object result;
 	  ptrdiff_t nbytes = p - read_buffer;
diff --git a/src/print.c b/src/print.c
index c0c90bc7e9..c112d806cc 100644
--- a/src/print.c
+++ b/src/print.c
@@ -2049,8 +2049,7 @@ print_object (Lisp_Object obj, Lisp_Object printcharfun, bool escapeflag)
 		    || c == ';' || c == '#' || c == '(' || c == ')'
 		    || c == ',' || c == '.' || c == '`'
 		    || c == '[' || c == ']' || c == '?' || c <= 040
-                    || confusing
-		    || (i == 1 && confusable_symbol_character_p (c)))
+		    || confusing)
 		  {
 		    printchar ('\\', printcharfun);
 		    confusing = false;
diff --git a/test/src/lread-tests.el b/test/src/lread-tests.el
index f19d98320a..0d7cd962bd 100644
--- a/test/src/lread-tests.el
+++ b/test/src/lread-tests.el
@@ -142,23 +142,6 @@ lread-tests--last-message
                            "unescaped character literals "
                            "`?\"', `?(', `?)', `?;', `?[', `?]' detected!")))))
 
-(ert-deftest lread-tests--funny-quote-symbols ()
-  "Check that 'smart quotes' or similar trigger errors in symbol names."
-  (dolist (quote-char
-           '(#x2018 ;; LEFT SINGLE QUOTATION MARK
-             #x2019 ;; RIGHT SINGLE QUOTATION MARK
-             #x201B ;; SINGLE HIGH-REVERSED-9 QUOTATION MARK
-             #x201C ;; LEFT DOUBLE QUOTATION MARK
-             #x201D ;; RIGHT DOUBLE QUOTATION MARK
-             #x201F ;; DOUBLE HIGH-REVERSED-9 QUOTATION MARK
-             #x301E ;; DOUBLE PRIME QUOTATION MARK
-             #xFF02 ;; FULLWIDTH QUOTATION MARK
-             #xFF07 ;; FULLWIDTH APOSTROPHE
-             ))
-    (let ((str (format "%cfoo" quote-char)))
-     (should-error (read str) :type 'invalid-read-syntax)
-     (should (eq (read (concat "\\" str)) (intern str))))))
-
 (ert-deftest lread-test-bug26837 ()
   "Test for https://debbugs.gnu.org/26837 ."
   (let ((load-path (cons
-- 
2.11.0


[-- Attachment #3: patch --]
[-- Type: text/plain, Size: 6060 bytes --]

From d45f4f8aafb3caaf411d8bb5e434f42bc2d5f56a Mon Sep 17 00:00:00 2001
From: Noam Postavsky <npostavs@gmail.com>
Date: Sat, 10 Mar 2018 18:12:55 -0500
Subject: [PATCH v1 2/2] Give suggestions on errors about var names with
 confusable chars

Add some hints to the message for byte compiler 'free variable'
warnings and 'void-variable' errors where the variable has confusable
characters in it.
* lisp/help.el (uni-confusables), uni-confusables-regexp): New
constants.
(help-command-error-confusable-suggestions): New function, added to
`command-error-function'.
(help-uni-confusable-suggestions): New function.
* lisp/emacs-lisp/bytecomp.el (byte-compile-variable-ref): Use it.

* lisp/emacs-lisp/lisp-mode.el
(lisp--match-confusable-symbol-character): New function.
(lisp-fdefs): Use it to fontify confusable characters with
font-lock-warning-face when they occur in symbol names.
---
 lisp/emacs-lisp/bytecomp.el  | 10 +++++++--
 lisp/emacs-lisp/lisp-mode.el | 16 ++++++++++++++-
 lisp/help.el                 | 48 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 71 insertions(+), 3 deletions(-)

diff --git a/lisp/emacs-lisp/bytecomp.el b/lisp/emacs-lisp/bytecomp.el
index 8b67e1007d..31d589ab02 100644
--- a/lisp/emacs-lisp/bytecomp.el
+++ b/lisp/emacs-lisp/bytecomp.el
@@ -3407,7 +3407,10 @@ byte-compile-variable-ref
 		  (boundp var)
 		  (memq var byte-compile-bound-variables)
 		  (memq var byte-compile-free-references))
-	(byte-compile-warn "reference to free variable `%S'" var)
+        (let* ((varname (prin1-to-string var))
+               (suggestions (help-uni-confusable-suggestions varname)))
+          (byte-compile-warn "reference to free variable `%s'%s" varname
+                             (if suggestions (concat "\n  " suggestions) "")))
 	(push var byte-compile-free-references))
       (byte-compile-dynamic-variable-op 'byte-varref var))))
 
@@ -3423,7 +3426,10 @@ byte-compile-variable-set
 		  (boundp var)
 		  (memq var byte-compile-bound-variables)
 		  (memq var byte-compile-free-assignments))
-	(byte-compile-warn "assignment to free variable `%s'" var)
+        (let* ((varname (prin1-to-string var))
+               (suggestions (help-uni-confusable-suggestions varname)))
+          (byte-compile-warn "assignment to free variable `%s'%s" varname
+                             (if suggestions (concat "\n  " suggestions) "")))
 	(push var byte-compile-free-assignments))
       (byte-compile-dynamic-variable-op 'byte-varset var))))
 
diff --git a/lisp/emacs-lisp/lisp-mode.el b/lisp/emacs-lisp/lisp-mode.el
index f31b4d4dd2..87bdafa976 100644
--- a/lisp/emacs-lisp/lisp-mode.el
+++ b/lisp/emacs-lisp/lisp-mode.el
@@ -280,6 +280,17 @@ elisp--font-lock-backslash
          `(face ,font-lock-warning-face
                 help-echo "This \\ has no effect"))))
 
+(defun lisp--match-confusable-symbol-character  (limit)
+  ;; Match a confusable character within a Lisp symbol.
+  (catch 'matched
+    (while t
+      (if (not (re-search-forward uni-confusables-regexp limit t))
+          (throw 'matched nil)
+        ;; Skip confusables inside strings or comments.
+        (save-match-data
+          (if (not (nth 8 (syntax-ppss)))
+              (throw 'matched t)))))))
+
 (let-when-compile
     ((lisp-fdefs '("defmacro" "defun"))
      (lisp-vdefs '("defvar"))
@@ -463,7 +474,10 @@ elisp--font-lock-backslash
            (3 'font-lock-regexp-grouping-construct prepend))
          (lisp--match-hidden-arg
           (0 '(face font-lock-warning-face
-               help-echo "Hidden behind deeper element; move to another line?")))
+                    help-echo "Hidden behind deeper element; move to another line?")))
+         (lisp--match-confusable-symbol-character
+          0 '(face font-lock-warning-face
+                    help-echo "Confusable character"))
          ))
       "Gaudy level highlighting for Emacs Lisp mode.")
 
diff --git a/lisp/help.el b/lisp/help.el
index 6e06fc9e1c..66d191f863 100644
--- a/lisp/help.el
+++ b/lisp/help.el
@@ -1489,6 +1489,54 @@ help--make-usage-docstring
     (help--docstring-quote (format "%S" (help--make-usage fn arglist)))))
 
 \f
+
+;; Just some quote-like characters for now.  TODO: generate this stuff
+;; from official Unicode data.
+(defconst uni-confusables
+  '((#x2018 . "'") ;; LEFT SINGLE QUOTATION MARK
+    (#x2019 . "'") ;; RIGHT SINGLE QUOTATION MARK
+    (#x201B . "'") ;; SINGLE HIGH-REVERSED-9 QUOTATION MARK
+    (#x201C . "\"") ;; LEFT DOUBLE QUOTATION MARK
+    (#x201D . "\"") ;; RIGHT DOUBLE QUOTATION MARK
+    (#x201F . "\"") ;; DOUBLE HIGH-REVERSED-9 QUOTATION MARK
+    (#x301E . "\"") ;; DOUBLE PRIME QUOTATION MARK
+    (#xFF02 . "'") ;; FULLWIDTH QUOTATION MARK
+    (#xFF07 . "'") ;; FULLWIDTH APOSTROPHE
+    ))
+
+(defconst uni-confusables-regexp
+  (concat "[" (mapcar #'car uni-confusables) "]"))
+
+(defun help-uni-confusable-suggestions (string)
+  "Return a message describing confusables in STRING."
+  (let ((i 0)
+        (confusables nil))
+    (while (setq i (string-match uni-confusables-regexp string i))
+      (let ((replacement (alist-get (aref string i) uni-confusables)))
+        (push (aref string i) confusables)
+        (setq string (replace-match replacement t t string))
+        (setq i (+ i (length replacement)))))
+    (when confusables
+      (format-message
+       (if (> (length confusables) 1)
+           "Found confusable characters: %s; perhaps you meant: `%s'?"
+         "Found confusable character: %s, perhaps you meant: `%s'?")
+       (mapconcat (lambda (c) (format-message "`%c'" c))
+                  confusables ", ")
+       string))))
+
+(defun help-command-error-confusable-suggestions (data _context _signal)
+  (pcase data
+    (`(void-variable ,var)
+     (princ (concat "\n  " (help-uni-confusable-suggestions
+                            (symbol-name var)))
+            t))
+    (_ nil)))
+
+(add-function :after command-error-function
+              #'help-command-error-confusable-suggestions)
+
+\f
 (provide 'help)
 
 ;;; help.el ends here
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2019-11-28 23:25 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-04 23:58 bug#32939: 27.0.50; Don't reject confusable quotes, just fontify & mention them in error messages Noam Postavsky
2018-10-05  8:44 ` Eli Zaretskii
2018-10-24 22:25   ` Noam Postavsky
2018-10-27  9:48     ` Eli Zaretskii
2018-10-30  1:30       ` Noam Postavsky
2018-10-30  7:12         ` Eli Zaretskii
2019-11-17  8:42           ` Lars Ingebrigtsen
2019-11-27 19:42             ` Noam Postavsky
2019-11-28 23:25               ` Noam Postavsky

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).