From 95cf4580d238148070f7e80a2078e169079064ab Mon Sep 17 00:00:00 2001 From: Laurence Warne Date: Tue, 9 Aug 2022 08:33:18 +0100 Subject: [PATCH] Fix python escape code fontification for multi-line literals * lisp/progmodes/python.el (python--string-bytes-literal-matcher): Go backward one char after a match so that consecutive escape codes are highlighted (python--not-raw-string-literal-start-regexp): Make regular expression more comprehensive, so multi-line bytes literals are not caught (python-rx): Accept one to three octal digits in octal escape codes instead of always three --- lisp/progmodes/python.el | 22 ++++--- test/lisp/progmodes/python-tests.el | 95 ++++++++++++++++++++++++++--- 2 files changed, 102 insertions(+), 15 deletions(-) diff --git a/lisp/progmodes/python.el b/lisp/progmodes/python.el index 5edd6e7df5..96f9d14832 100644 --- a/lisp/progmodes/python.el +++ b/lisp/progmodes/python.el @@ -432,7 +432,7 @@ python-rx (seq (not "\\") (group (or "\\\\" "\\'" "\\a" "\\b" "\\f" "\\n" "\\r" "\\t" "\\v" - (seq "\\" (= 3 (in "0-7"))) + (seq "\\" (** 1 3 (in "0-7"))) (seq "\\x" hex hex))))) (string-escape-sequence (or bytes-escape-sequence @@ -556,7 +556,14 @@ python--not-raw-bytes-literal-start-regexp "A regular expression matching the start of a not-raw bytes literal.") (defconst python--not-raw-string-literal-start-regexp - (rx (or bos (not alnum)) (? (or "u" "U" "F" "f")) (or "\"" "\"\"\"" "'" "'''") eos) + (rx bos (or + ;; Multi-line string literals + (seq (? (? (not alnum)) (or "u" "U" "F" "f")) (or "\"\"\"" "'''")) + (seq (? anychar) (not alnum) (or "\"\"\"" "'''")) + ;; Single line string literals + (seq (? (** 0 2 anychar) (not alnum)) (or "u" "U" "F" "f") (or "'" "\"")) + (seq (? (** 0 3 anychar) (not (any "'\"" alnum))) (or "'" "\""))) + eos) "A regular expression matching the start of a not-raw string literal.") (defun python--string-bytes-literal-matcher (regexp start-regexp) @@ -565,11 +572,12 @@ python--string-bytes-literal-matcher (cl-loop for result = (re-search-forward regexp limit t) for result-valid = (and result - (let* ((pos (nth 8 (syntax-ppss))) - (before-quote - (buffer-substring-no-properties - (max (- pos 5) (point-min)) - (min (+ pos 1) (point-max))))) + (when-let* ((pos (nth 8 (syntax-ppss))) + (before-quote + (buffer-substring-no-properties + (max (- pos 4) (point-min)) + (min (+ pos 1) (point-max))))) + (backward-char) (string-match-p start-regexp before-quote))) until (or (not result) result-valid) finally return (and result-valid result)))) diff --git a/test/lisp/progmodes/python-tests.el b/test/lisp/progmodes/python-tests.el index e3c8d5554a..d303050fad 100644 --- a/test/lisp/progmodes/python-tests.el +++ b/test/lisp/progmodes/python-tests.el @@ -407,6 +407,81 @@ python-font-lock-escape-sequence-string-newline (31 . font-lock-constant-face) (33 . font-lock-string-face)))) +(ert-deftest python-font-lock-escape-sequence-multiline-string () + (python-tests-assert-faces + (let ((escape-sequences "\\x12 \123 \\n \\u1234 \\U00010348 \\N{Plus-Minus Sign}")) + (cl-loop for string-prefix in '("" "f" "rf" "fr" "r" "rb" "br" "b") + concat (cl-loop for quote-string in '("\"\"\"" "'''") + concat (concat string-prefix + quote-string + escape-sequences + quote-string + "\n")))) + '((1 . font-lock-doc-face) + (4 . font-lock-constant-face) + (8 . font-lock-doc-face) + (11 . font-lock-constant-face) + (13 . font-lock-doc-face) + (14 . font-lock-constant-face) + (20 . font-lock-doc-face) + (21 . font-lock-constant-face) + (31 . font-lock-doc-face) + (32 . font-lock-constant-face) + (51 . font-lock-doc-face) (54) + (55 . font-lock-doc-face) + (58 . font-lock-constant-face) + (62 . font-lock-doc-face) + (65 . font-lock-constant-face) + (67 . font-lock-doc-face) + (68 . font-lock-constant-face) + (74 . font-lock-doc-face) + (75 . font-lock-constant-face) + (85 . font-lock-doc-face) + (86 . font-lock-constant-face) + (105 . font-lock-doc-face) (108) + (110 . font-lock-string-face) + (113 . font-lock-constant-face) + (117 . font-lock-string-face) + (120 . font-lock-constant-face) + (122 . font-lock-string-face) + (123 . font-lock-constant-face) + (129 . font-lock-string-face) + (130 . font-lock-constant-face) + (140 . font-lock-string-face) + (141 . font-lock-constant-face) + (160 . font-lock-string-face) (163) + (165 . font-lock-string-face) + (168 . font-lock-constant-face) + (172 . font-lock-string-face) + (175 . font-lock-constant-face) + (177 . font-lock-string-face) + (178 . font-lock-constant-face) + (184 . font-lock-string-face) + (185 . font-lock-constant-face) + (195 . font-lock-string-face) + (196 . font-lock-constant-face) + (215 . font-lock-string-face) (218) + (221 . font-lock-string-face) (274) + (277 . font-lock-string-face) (330) + (333 . font-lock-string-face) (386) + (389 . font-lock-string-face) (442) + (444 . font-lock-string-face) (497) + (499 . font-lock-string-face) (552) + (555 . font-lock-string-face) (608) + (611 . font-lock-string-face) (664) + (667 . font-lock-string-face) (720) + (723 . font-lock-string-face) (776) + (778 . font-lock-string-face) + (781 . font-lock-constant-face) + (785 . font-lock-string-face) + (788 . font-lock-constant-face) + (790 . font-lock-string-face) (831) + (833 . font-lock-string-face) + (836 . font-lock-constant-face) + (840 . font-lock-string-face) + (843 . font-lock-constant-face) + (845 . font-lock-string-face) (886)))) + (ert-deftest python-font-lock-escape-sequence-bytes-newline () (python-tests-assert-faces "b'\\n' @@ -421,19 +496,23 @@ python-font-lock-escape-sequence-bytes-newline (ert-deftest python-font-lock-escape-sequence-hex-octal () (python-tests-assert-faces - "b'\\x12 \\777' -'\\x12 \\777'" + "b'\\x12 \\777 \\1\\23' +'\\x12 \\777 \\1\\23'" '((1) (2 . font-lock-doc-face) (3 . font-lock-constant-face) (7 . font-lock-doc-face) (8 . font-lock-constant-face) - (12 . font-lock-doc-face) (13) - (14 . font-lock-doc-face) - (15 . font-lock-constant-face) - (19 . font-lock-doc-face) - (20 . font-lock-constant-face) - (24 . font-lock-doc-face)))) + (12 . font-lock-doc-face) + (13 . font-lock-constant-face) + (18 . font-lock-doc-face) (19) + (20 . font-lock-doc-face) + (21 . font-lock-constant-face) + (25 . font-lock-doc-face) + (26 . font-lock-constant-face) + (30 . font-lock-doc-face) + (31 . font-lock-constant-face) + (36 . font-lock-doc-face)))) (ert-deftest python-font-lock-escape-sequence-unicode () (python-tests-assert-faces -- 2.30.2