From dd4863ad55310b084839df8f508bf57364d489b7 Mon Sep 17 00:00:00 2001 From: Denis Zubarev Date: Sat, 11 Nov 2023 04:55:44 +0300 Subject: [PATCH] Improve syntax highlighting for python-ts-mode Fix fontification of strings inside of f-strings interpolation, e.g. for f"beg {'nested'}" - 'nested' was not fontified as string. Do not override the face of builtin functions (all, bytes etc.) with the function call face. Add missing assignment expressions (:= *=). Highlight variables defined in for loop (for var1, var2 in ). Fontify built-ins (dict,list,etc.) as types when they are used in type hints. Highlight union types (type1|type2). Highlight base class names in the class definition. Fontify class patterns in case statement. Highlight the second argument as a type in isinstance/issubclass call. Highlight dotted decorator names. * lisp/progmodes/python.el (python--treesit-keywords): Add compound keyword "is not". (python--treesit-fontify-string): fix f-string interpolation. Enable interpolation highlighting only if string-interpolation is presented on the enabled levels of treesit-font-lock-feature-list. (python--treesit-fontify-string-interpolation): Remove function. (python--treesit-fontify-union-types): Fontify nested union types. (python--treesit-fontify-dotted-decorator): Fontify all parts of dotted decorator name. (python--treesit-settings): Change/add rules. * test/lisp/progmodes/python-tests.el (python-ts-tests-with-temp-buffer): function for setting up test buffer. (python-ts-mode-compound-keywords-face) (python-ts-mode-var-for-loop-face) (python-ts-mode-named-assignement-face-1) (python-ts-mode-assignement-face-2) (python-ts-mode-nested-types-face-1) (python-ts-mode-union-types-face-1) (python-ts-mode-union-types-face-2) (python-ts-mode-types-face-1) (python-ts-mode-types-face-2) (python-ts-mode-types-face-3) (python-ts-mode-isinstance-type-face-1) (python-ts-mode-isinstance-type-face-2) (python-ts-mode-superclass-type-face) (python-ts-mode-class-patterns-face) (python-ts-mode-dotted-decorator-face-1) (python-ts-mode-dotted-decorator-face-2) (python-ts-mode-builtin-call-face) (python-ts-mode-interpolation-nested-string) (python-ts-mode-disabled-string-interpolation) (python-ts-mode-interpolation-doc-string): Add tests. --- lisp/progmodes/python.el | 183 +++++++++++++----- test/lisp/progmodes/python-tests.el | 285 ++++++++++++++++++++++++++++ 2 files changed, 420 insertions(+), 48 deletions(-) diff --git a/lisp/progmodes/python.el b/lisp/progmodes/python.el index ab3bf1b4ec..1b33a45965 100644 --- a/lisp/progmodes/python.el +++ b/lisp/progmodes/python.el @@ -979,7 +979,7 @@ python--treesit-keywords "raise" "return" "try" "while" "with" "yield" ;; These are technically operators, but we fontify them as ;; keywords. - "and" "in" "is" "not" "or" "not in")) + "and" "in" "is" "not" "or" "not in" "is not")) (defvar python--treesit-builtins '("abs" "all" "any" "ascii" "bin" "bool" "breakpoint" "bytearray" @@ -1042,9 +1042,7 @@ python--treesit-fontify-string f-strings. OVERRIDE is the override flag described in `treesit-font-lock-rules'. START and END mark the region to be fontified." - (let* ((string-beg (treesit-node-start node)) - (string-end (treesit-node-end node)) - (maybe-expression (treesit-node-parent node)) + (let* ((maybe-expression (treesit-node-parent node)) (grandparent (treesit-node-parent (treesit-node-parent maybe-expression))) @@ -1072,28 +1070,78 @@ python--treesit-fontify-string (equal (treesit-node-type maybe-expression) "expression_statement")) 'font-lock-doc-face - 'font-lock-string-face))) - ;; Don't highlight string prefixes like f/r/b. - (save-excursion - (goto-char string-beg) - (when (re-search-forward "[\"']" string-end t) - (setq string-beg (match-beginning 0)))) - (treesit-fontify-with-override - string-beg string-end face override start end))) - -(defun python--treesit-fontify-string-interpolation - (node _ start end &rest _) - "Fontify string interpolation. -NODE is the string node. Do not fontify the initial f for -f-strings. START and END mark the region to be + 'font-lock-string-face)) + + (ignore-interpolation (not + (seq-some + (lambda (feats) (memq 'string-interpolation feats)) + (seq-take treesit-font-lock-feature-list treesit-font-lock-level)))) + ;; If interpolation is enabled, highlight only + ;; string_start/string_content/string_end children. Do not + ;; touch interpolation node that can occur inside of the + ;; string. + (string-nodes (if ignore-interpolation + (list node) + (treesit-filter-child + node + (lambda (ch) (member (treesit-node-type ch) + '("string_start" + "string_content" + "string_end"))) + t)))) + + (dolist (string-node string-nodes) + (let ((string-beg (treesit-node-start string-node)) + (string-end (treesit-node-end string-node))) + (when (or ignore-interpolation + (equal (treesit-node-type string-node) "string_start")) + ;; Don't highlight string prefixes like f/r/b. + (save-excursion + (goto-char string-beg) + (when (re-search-forward "[\"']" string-end t) + (setq string-beg (match-beginning 0))))) + + (treesit-fontify-with-override + string-beg string-end face override start end))))) + +(defun python--treesit-fontify-union-types (node override start end &rest _) + "Fontify nested union types in the type hints. +For examlpe, Lvl1 | Lvl2[Lvl3[Lvl4[Lvl5 | None]], Lvl2]. This +structure is represented via nesting binary_operator and +subscript nodes. This function iterates over all levels and +highlight identifier nodes. NODE is the binary_operator +node. OVERRIDE is the override flag described in +`treesit-font-lock-rules'. START and END mark the region to be fontified." - ;; This is kind of a hack, it basically removes the face applied by - ;; the string feature, so that following features can apply their - ;; face. - (let ((n-start (treesit-node-start node)) - (n-end (treesit-node-end node))) - (remove-text-properties - (max start n-start) (min end n-end) '(face)))) + (dolist (child (treesit-node-children node t)) + (pcase (treesit-node-type child) + ((or "identifier" "none") + (treesit-fontify-with-override + (treesit-node-start child) (treesit-node-end child) + 'font-lock-type-face override start end)) + ("attribute" + (when-let ((type-node (treesit-node-child-by-field-name child "attribute"))) + (treesit-fontify-with-override + (treesit-node-start type-node) (treesit-node-end type-node) + 'font-lock-type-face override start end))) + ((or "binary_operator" "subscript") + (python--treesit-fontify-union-types child override start end))))) + +(defun python--treesit-fontify-dotted-decorator (node override start end &rest _) + "Fontify dotted decorators. +For example @pytes.mark.skip. Iterate over all nested attribute +nodes and highlight identifier nodes. NODE is the first attribute +node. OVERRIDE is the override flag described in +`treesit-font-lock-rules'. START and END mark the region to be +fontified." + (dolist (child (treesit-node-children node t)) + (pcase (treesit-node-type child) + ("identifier" + (treesit-fontify-with-override + (treesit-node-start child) (treesit-node-end child) + 'font-lock-type-face override start end)) + ("attribute" + (python--treesit-fontify-dotted-decorator child override start end))))) (defvar python--treesit-settings (treesit-font-lock-rules @@ -1103,14 +1151,9 @@ python--treesit-settings :feature 'string :language 'python - '((string) @python--treesit-fontify-string) + '((string) @python--treesit-fontify-string + (interpolation ["{" "}"] @font-lock-misc-punctuation-face)) - ;; HACK: This feature must come after the string feature and before - ;; other features. Maybe we should make string-interpolation an - ;; option rather than a feature. - :feature 'string-interpolation - :language 'python - '((interpolation) @python--treesit-fontify-string-interpolation) :feature 'keyword :language 'python @@ -1126,12 +1169,6 @@ python--treesit-settings name: (identifier) @font-lock-type-face) (parameters (identifier) @font-lock-variable-name-face)) - :feature 'function - :language 'python - '((call function: (identifier) @font-lock-function-call-face) - (call function: (attribute - attribute: (identifier) @font-lock-function-call-face))) - :feature 'builtin :language 'python `(((identifier) @font-lock-builtin-face @@ -1142,6 +1179,12 @@ python--treesit-settings eol)) @font-lock-builtin-face))) + :feature 'function + :language 'python + '((call function: (identifier) @font-lock-function-call-face) + (call function: (attribute + attribute: (identifier) @font-lock-function-call-face))) + :feature 'constant :language 'python '([(true) (false) (none)] @font-lock-constant-face) @@ -1153,30 +1196,74 @@ python--treesit-settings @font-lock-variable-name-face) (assignment left: (attribute attribute: (identifier) - @font-lock-property-use-face)) - (pattern_list (identifier) + @font-lock-variable-name-face)) + (augmented_assignment left: (identifier) + @font-lock-variable-name-face) + (named_expression name: (identifier) + @font-lock-variable-name-face) + (pattern_list [(identifier) + (list_splat_pattern (identifier))] @font-lock-variable-name-face) - (tuple_pattern (identifier) + (tuple_pattern [(identifier) + (list_splat_pattern (identifier))] @font-lock-variable-name-face) - (list_pattern (identifier) - @font-lock-variable-name-face) - (list_splat_pattern (identifier) - @font-lock-variable-name-face)) + (list_pattern [(identifier) + (list_splat_pattern (identifier))] + @font-lock-variable-name-face)) :feature 'decorator :language 'python + ;; Override function call face. + :override t '((decorator "@" @font-lock-type-face) (decorator (call function: (identifier) @font-lock-type-face)) - (decorator (identifier) @font-lock-type-face)) + (decorator (identifier) @font-lock-type-face) + (decorator [(attribute) (call (attribute))] @python--treesit-fontify-dotted-decorator)) :feature 'type :language 'python + ;; Override built-in faces when dict/list are used for type hints. + :override t `(((identifier) @font-lock-type-face (:match ,(rx-to-string `(seq bol (or ,@python--treesit-exceptions) - eol)) + eol)) @font-lock-type-face)) - (type (identifier) @font-lock-type-face)) + (type [(identifier) (none)] @font-lock-type-face) + (type (attribute attribute: (identifier) @font-lock-type-face)) + ;; We don't want to highlight a package of the type + ;; (e.g. pack.ClassName). So explicitly exclude patterns with + ;; attribute, since we handle dotted type name in the previous + ;; rule. The following rule handle + ;; generic_type/list/tuple/splat_type nodes. + (type (_ !attribute [[(identifier) (none)] @font-lock-type-face + (attribute attribute: (identifier) @font-lock-type-face) ])) + ;; collections.abc.Iterator[T] case. + (type (subscript (attribute attribute: (identifier) @font-lock-type-face))) + ;; Nested optional type hints, e.g. val: Lvl1 | Lvl2[Lvl3[Lvl4]]. + (type (binary_operator) @python--treesit-fontify-union-types) + ;;class Type(Base1, Sequence[T]). + (class_definition + superclasses: + (argument_list [(identifier) @font-lock-type-face + (attribute attribute: (identifier) @font-lock-type-face) + (subscript (identifier) @font-lock-type-face) + (subscript (attribute attribute: (identifier) @font-lock-type-face))])) + + ;; Patern matching: case [str(), pack0.Type0()]. Take only the + ;; last identifier. + (class_pattern (dotted_name (identifier) @font-lock-type-face :anchor)) + + + ;; Highlight the second argument as a type in isinstance/issubclass. + ((call function: (identifier) @func-name + (argument_list :anchor (_) + [(identifier) @font-lock-type-face + (attribute attribute: (identifier) @font-lock-type-face) + (tuple (identifier) @font-lock-type-face) + (tuple (attribute attribute: (identifier) @font-lock-type-face)) + (binary_operator) @python--treesit-fontify-union-types])) + (:match "^is\\(?:instance\\|subclass\\)$" @func-name))) :feature 'escape-sequence :language 'python diff --git a/test/lisp/progmodes/python-tests.el b/test/lisp/progmodes/python-tests.el index a44a11896f..fd4d593613 100644 --- a/test/lisp/progmodes/python-tests.el +++ b/test/lisp/progmodes/python-tests.el @@ -7299,6 +7299,291 @@ python-tests--flymake-command-output-pattern "Unused import a.b.c (unused-import)" "W0611: Unused import a.b.c (unused-import)")))))) +;;; python-ts-mode font-lock tests + +(defmacro python-ts-tests-with-temp-buffer (contents &rest body) + "Create a `python-ts-mode' enabled temp buffer with CONTENTS. +BODY is code to be executed within the temp buffer. Point is +always located at the beginning of buffer." + (declare (indent 1) (debug t)) + `(with-temp-buffer + (skip-unless (treesit-ready-p 'python)) + (require 'python) + (let ((python-indent-guess-indent-offset nil)) + (python-ts-mode) + (setopt treesit-font-lock-level 3) + (insert ,contents) + (font-lock-ensure) + (goto-char (point-min)) + ,@body))) + +(ert-deftest python-ts-mode-compound-keywords-face () + (dolist (test '("is not" "not in")) + (python-ts-tests-with-temp-buffer + (concat "t " test " t") + (forward-to-word) + (should (eq (face-at-point) font-lock-keyword-face)) + (forward-to-word) + (should (eq (face-at-point) font-lock-keyword-face))))) + +(ert-deftest python-ts-mode-named-assignement-face-1 () + (python-ts-tests-with-temp-buffer + "var := 3" + (should (eq (face-at-point) font-lock-variable-name-face)))) + +(ert-deftest python-ts-mode-assignement-face-2 () + (python-ts-tests-with-temp-buffer + "var, *rest = call()" + (dolist (test '("var" "rest")) + (search-forward test) + (goto-char (match-beginning 0)) + (should (eq (face-at-point) font-lock-variable-name-face)))) + + (python-ts-tests-with-temp-buffer + "def func(*args):" + (dolist (test '("args")) + (search-forward test) + (goto-char (match-beginning 0)) + (should (not (eq (face-at-point) font-lock-variable-name-face)))))) + +(ert-deftest python-ts-mode-nested-types-face-1 () + (python-ts-tests-with-temp-buffer + "def func(v:dict[ list[ tuple[str] ], int | None] | None):" + (dolist (test '("dict" "list" "tuple" "str" "int" "None" "None")) + (search-forward test) + (goto-char (match-beginning 0)) + (should (eq (face-at-point) font-lock-type-face))))) + +(ert-deftest python-ts-mode-union-types-face-1 () + (python-ts-tests-with-temp-buffer + "def f(val: tuple[tuple, list[Lvl1 | Lvl2[Lvl3[Lvl4[Lvl5 | None]], Lvl2]]]):" + (dolist (test '("tuple" "tuple" "list" "Lvl1" "Lvl2" "Lvl3" "Lvl4" "Lvl5" "None" "Lvl2")) + (search-forward test) + (goto-char (match-beginning 0)) + (should (eq (face-at-point) font-lock-type-face))))) + +(ert-deftest python-ts-mode-union-types-face-2 () + (python-ts-tests-with-temp-buffer + "def f(val: Type0 | Type1[Type2, pack0.Type3] | pack1.pack2.Type4 | None):" + (dolist (test '("Type0" "Type1" "Type2" "Type3" "Type4" "None")) + (search-forward test) + (goto-char (match-beginning 0)) + (should (eq (face-at-point) font-lock-type-face))) + + (goto-char (point-min)) + (dolist (test '("pack0" "pack1" "pack2")) + (search-forward test) + (goto-char (match-beginning 0)) + (should (not (eq (face-at-point) font-lock-type-face)))))) + +(ert-deftest python-ts-mode-types-face-1 () + (python-ts-tests-with-temp-buffer + "def f(val: Callable[[Type0], (Type1, Type2)]):" + (dolist (test '("Callable" "Type0" "Type1" "Type2")) + (search-forward test) + (goto-char (match-beginning 0)) + (should (eq (face-at-point) font-lock-type-face))))) + +(ert-deftest python-ts-mode-types-face-2 () + (python-ts-tests-with-temp-buffer + "def annot3(val:pack0.Type0)->pack1.pack2.pack3.Type1:" + (dolist (test '("Type0" "Type1")) + (search-forward test) + (goto-char (match-beginning 0)) + (should (eq (face-at-point) font-lock-type-face))) + (goto-char (point-min)) + (dolist (test '("pack0" "pack1" "pack2" "pack3")) + (search-forward test) + (goto-char (match-beginning 0)) + (should (not (eq (face-at-point) font-lock-type-face)))))) + +(ert-deftest python-ts-mode-types-face-3 () + (python-ts-tests-with-temp-buffer + "def annot3(val:collections.abc.Iterator[Type0]):" + (dolist (test '("Iterator" "Type0")) + (search-forward test) + (goto-char (match-beginning 0)) + (should (eq (face-at-point) font-lock-type-face))) + (goto-char (point-min)) + (dolist (test '("collections" "abc")) + (search-forward test) + (goto-char (match-beginning 0)) + (should (not (eq (face-at-point) font-lock-type-face)))))) + +(ert-deftest python-ts-mode-isinstance-type-face-1 () + (python-ts-tests-with-temp-buffer + "isinstance(var1, pkg.Type0) + isinstance(var2, (str, dict, Type1, type(None))) + isinstance(var3, my_type())" + + (dolist (test '("var1" "pkg" "var2" "type" "None" "var3" "my_type")) + (let ((case-fold-search nil)) + (search-forward test)) + (goto-char (match-beginning 0)) + (should (not (eq (face-at-point) font-lock-type-face)))) + + (goto-char (point-min)) + (dolist (test '("Type0" "str" "dict" "Type1")) + (search-forward test) + (goto-char (match-beginning 0)) + (should (eq (face-at-point) font-lock-type-face))))) + +(ert-deftest python-ts-mode-isinstance-type-face-2 () + (python-ts-tests-with-temp-buffer + "issubclass(mytype, int|list|collections.abc.Iterable)" + (dolist (test '("int" "list" "Iterable")) + (search-forward test) + (goto-char (match-beginning 0)) + (should (eq (face-at-point) font-lock-type-face))))) + +(ert-deftest python-ts-mode-superclass-type-face () + (python-ts-tests-with-temp-buffer + "class Temp(Base1, pack0.Base2, Sequence[T1, T2]):" + + (dolist (test '("Base1" "Base2" "Sequence" "T1" "T2")) + (search-forward test) + (goto-char (match-beginning 0)) + (should (eq (face-at-point) font-lock-type-face))) + + (goto-char (point-min)) + (dolist (test '("pack0")) + (search-forward test) + (goto-char (match-beginning 0)) + (should (not (eq (face-at-point) font-lock-type-face)))))) + +(ert-deftest python-ts-mode-class-patterns-face () + (python-ts-tests-with-temp-buffer + "match tt: + case str(): + pass + case [Type0() | bytes(b) | pack0.pack1.Type1()]: + pass + case {'i': int(i), 'f': float() as f}: + pass" + + (dolist (test '("str" "Type0" "bytes" "Type1" "int" "float")) + (search-forward test) + (goto-char (match-beginning 0)) + (should (eq (face-at-point) font-lock-type-face))) + + (goto-char (point-min)) + (dolist (test '("pack0" "pack1")) + (search-forward test) + (goto-char (match-beginning 0)) + (should (not (eq (face-at-point) font-lock-type-face)))))) + +(ert-deftest python-ts-mode-dotted-decorator-face-1 () + (python-ts-tests-with-temp-buffer + "@pytest.mark.skip + @pytest.mark.skip(reason='msg') + def test():" + + (dolist (test '("pytest" "mark" "skip" "pytest" "mark" "skip")) + (search-forward test) + (goto-char (match-beginning 0)) + (should (eq (face-at-point) font-lock-type-face))))) + +(ert-deftest python-ts-mode-dotted-decorator-face-2 () + (python-ts-tests-with-temp-buffer + "@pytest.mark.skip(reason='msg') + def test():" + + (setopt treesit-font-lock-level 4) + (dolist (test '("pytest" "mark" "skip")) + (search-forward test) + (goto-char (match-beginning 0)) + (should (eq (face-at-point) font-lock-type-face))))) + +(ert-deftest python-ts-mode-builtin-call-face () + (python-ts-tests-with-temp-buffer + "all()" + ;; enable 'function' feature from 4th level + (setopt treesit-font-lock-level 4) + (should (eq (face-at-point) font-lock-builtin-face)))) + +(ert-deftest python-ts-mode-interpolation-nested-string () + (python-ts-tests-with-temp-buffer + "t = f\"beg {True + 'string'}\"" + + (search-forward "True") + (goto-char (match-beginning 0)) + (should (eq (face-at-point) font-lock-constant-face)) + + (goto-char (point-min)) + (dolist (test '("f" "{" "+" "}")) + (search-forward test) + (goto-char (match-beginning 0)) + (should (not (eq (face-at-point) font-lock-string-face)))) + + + (goto-char (point-min)) + (dolist (test '("beg" "'string'" "\"")) + (search-forward test) + (goto-char (match-beginning 0)) + (should (eq (face-at-point) font-lock-string-face))))) + +(ert-deftest python-ts-mode-level-fontification-wo-interpolation () + (python-ts-tests-with-temp-buffer + "t = f\"beg {True + var}\"" + + (setopt treesit-font-lock-level 2) + (search-forward "f") + (goto-char (match-beginning 0)) + (should (not (eq (face-at-point) font-lock-string-face))) + + (dolist (test '("\"" "beg" "{" "True" "var" "}" "\"")) + (search-forward test) + (goto-char (match-beginning 0)) + (should (eq (face-at-point) font-lock-string-face))))) + +(ert-deftest python-ts-mode-disabled-string-interpolation () + (python-ts-tests-with-temp-buffer + "t = f\"beg {True + var}\"" + + (unwind-protect + (progn + (setf (nth 2 treesit-font-lock-feature-list) + (remq 'string-interpolation (nth 2 treesit-font-lock-feature-list))) + (setopt treesit-font-lock-level 3) + + (search-forward "f") + (goto-char (match-beginning 0)) + (should (not (eq (face-at-point) font-lock-string-face))) + + (dolist (test '("\"" "beg" "{" "True" "var" "}" "\"")) + (search-forward test) + (goto-char (match-beginning 0)) + (should (eq (face-at-point) font-lock-string-face)))) + + (setf (nth 2 treesit-font-lock-feature-list) + (append (nth 2 treesit-font-lock-feature-list) '(string-interpolation)))))) + +(ert-deftest python-ts-mode-interpolation-doc-string () + (python-ts-tests-with-temp-buffer + "f\"\"\"beg {'s1' + True + 's2'} end\"\"\"" + + (search-forward "True") + (goto-char (match-beginning 0)) + (should (eq (face-at-point) font-lock-constant-face)) + + (goto-char (point-min)) + (dolist (test '("f" "{" "+" "}")) + (search-forward test) + (goto-char (match-beginning 0)) + (should (not (eq (face-at-point) font-lock-string-face)))) + + (goto-char (point-min)) + (dolist (test '("\"\"\"" "beg" "end" "\"\"\"")) + (search-forward test) + (goto-char (match-beginning 0)) + (should (eq (face-at-point) font-lock-doc-face))) + + (goto-char (point-min)) + (dolist (test '("'s1'" "'s2'")) + (search-forward test) + (goto-char (match-beginning 0)) + (should (eq (face-at-point) font-lock-string-face))))) + (provide 'python-tests) ;;; python-tests.el ends here -- 2.34.1