all messages for Emacs-related lists mirrored at yhetil.org
 help / color / mirror / code / Atom feed
From: Yuan Fu <casouri@gmail.com>
To: Dmitry Gutov <dmitry@gutov.dev>
Cc: Eli Zaretskii <eliz@gnu.org>, 62951@debbugs.gnu.org
Subject: bug#62951: 29.0.90; c-ts-mode: Incorrect fontification due to FOR_EACH_TAIL_SAFE
Date: Wed, 26 Apr 2023 20:14:45 -0700	[thread overview]
Message-ID: <36F36D54-E56C-42C8-B686-448C8BF938EA@gmail.com> (raw)
In-Reply-To: <90E66143-A3E2-4B76-AC0A-01DC5E841AFE@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 444 bytes --]

> 
> My idea right now is to use the range feature in tree-sitter. Since the “body” of FOR_EACH_TAIL is valid C, I can either set the ranges for the parser so it ignores FOR_EACH_TAIL

I end up going this route.

> or I can add another parser that only parses the body of FOR_EACH_TAIL.

Ok, here’s the patch. Eli, would you give it a try? This is a sizable patch so I’m not sure if you want it on emacs-29 or master.

Yuan


[-- Attachment #2: for-each-tail-fix.patch --]
[-- Type: application/octet-stream, Size: 8003 bytes --]

From 6d9a9a4ce88f996d87fd121aaf6bfa21581fac30 Mon Sep 17 00:00:00 2001
From: Yuan Fu <casouri@gmail.com>
Date: Wed, 26 Apr 2023 20:09:42 -0700
Subject: [PATCH] Fix FOR_EACH_TAIL in c-ts-mode (bug#62951)

* lisp/progmodes/c-ts-mode.el
(c-ts-mode--indent-styles): New indent rule.
* lisp/progmodes/c-ts-mode.el (c-ts-mode--for-each-tail-body-matcher):
(c-ts-mode--emacs-c-range-query)
(c-ts-mode--for-each-tail-ranges)
(c-ts-mode--reverse-ranges)
(c-ts-mode--emacs-set-ranges): New functions and variables.
(c-ts-mode): Create a emacs-c parser.  More setup for Emacs source
support.

* lisp/treesit.el (treesit-query-range): Ignore underscore-prefixed
capture names.
---
 lisp/progmodes/c-ts-mode.el | 92 ++++++++++++++++++++++++++++++++++++-
 lisp/treesit.el             | 17 +++++--
 2 files changed, 103 insertions(+), 6 deletions(-)

diff --git a/lisp/progmodes/c-ts-mode.el b/lisp/progmodes/c-ts-mode.el
index 6100f00e3ba..c4e1ba7a23a 100644
--- a/lisp/progmodes/c-ts-mode.el
+++ b/lisp/progmodes/c-ts-mode.el
@@ -357,7 +357,9 @@ c-ts-mode--indent-styles
   "Indent rules supported by `c-ts-mode'.
 MODE is either `c' or `cpp'."
   (let ((common
-         `(((parent-is "translation_unit") column-0 0)
+         `((c-ts-mode--for-each-tail-body-matcher prev-line c-ts-mode-indent-offset)
+
+           ((parent-is "translation_unit") column-0 0)
            ((query "(ERROR (ERROR)) @indent") column-0 0)
            ((node-is ")") parent 1)
            ((node-is "]") parent-bol 0)
@@ -969,6 +971,75 @@ c-ts-mode--emacs-current-defun-name
   (or (treesit-add-log-current-defun)
       (c-ts-mode--defun-name (c-ts-mode--emacs-defun-at-point))))
 
+;;; FOR_EACH_TAIL fix
+;;
+;; FOR_EACH_TAIL (and FOR_EACH_TAIL_SAFE) followed by a unbracketed
+;; body will mess up the parser, which parses the thing as a function
+;; declaration.  We "fix" it by adding a shadow parser, emacs-c (which
+;; is just c but under a different name).  We use emacs-c to find each
+;; FOR_EACH_TAIL with a unbracketed body, and set the ranges of the C
+;; parser so that it skips those FOR_EACH_TAIL's.  Note that we only
+;; ignore FOR_EACH_TAIL's with a unbracketed body.  Those with a
+;; bracketed body parses more or less fine.
+
+(defun c-ts-mode--for-each-tail-body-matcher (_n _p bol &rest _)
+  "A matcher that matches the first line after a FOR_EACH_TAIL.
+For BOL see `treesit-simple-indent-rules'."
+  (when c-ts-mode-emacs-sources-support
+    (save-excursion
+      (goto-char bol)
+      (forward-line -1)
+      (skip-chars-forward " \t")
+      (looking-at (rx "FOR_EACH_TAIL" (? (or "_SAFE" "_ALIST_VALUE")))))))
+
+(defvar c-ts-mode--emacs-c-range-query
+  (treesit-query-compile
+   'emacs-c `(((declaration
+                type: (macro_type_specifier
+                       name: (identifier) @_name)
+                @for-each-tail)
+               (:match ,(rx "FOR_EACH_TAIL"
+                            (? (or "_SAFE" "_ALIST_VALUE")))
+                       @_name))))
+  "Query that finds the FOR_EACH_TAIL with a unbracketed body.")
+
+(defvar-local c-ts-mode--for-each-tail-ranges nil
+  "Ranges covering all the FOR_EACH_TAIL's in the buffer.")
+
+(defun c-ts-mode--reverse-ranges (ranges beg end)
+  "Reverse RANGES and return the new ranges between BEG and END.
+Positions that were included RANGES are not in the returned
+ranges, and vice versa.
+
+Return nil if RANGES is nil.  This way, passing the returned
+ranges to `treesit-parser-set-included-ranges' will make the
+parser parse the whole buffer."
+  (if (null ranges)
+      nil
+    (let ((new-ranges nil)
+          (prev-end beg))
+      (dolist (range ranges)
+        (push (cons prev-end (car range)) new-ranges)
+        (setq prev-end (cdr range)))
+      (push (cons prev-end end) new-ranges)
+      (nreverse new-ranges))))
+
+(defun c-ts-mode--emacs-set-ranges (beg end)
+  "Set ranges for the C parser to skip some FOR_EACH_TAIL's.
+BEG and END are described in `treesit-range-rules'."
+  (let* ((c-parser (treesit-parser-create 'c))
+         (old-ranges c-ts-mode--for-each-tail-ranges)
+         (new-ranges (treesit-query-range
+                      'emacs-c c-ts-mode--emacs-c-range-query beg end))
+         (set-ranges (treesit--clip-ranges
+                      (treesit--merge-ranges
+                       old-ranges new-ranges beg end)
+                      (point-min) (point-max)))
+         (reversed-ranges (c-ts-mode--reverse-ranges
+                           set-ranges (point-min) (point-max))))
+    (setq-local c-ts-mode--for-each-tail-ranges set-ranges)
+    (treesit-parser-set-included-ranges c-parser reversed-ranges)))
+
 ;;; Modes
 
 (defvar-keymap c-ts-base-mode-map
@@ -1072,6 +1143,13 @@ c-ts-mode
   :after-hook (c-ts-mode-set-modeline)
 
   (when (treesit-ready-p 'c)
+    ;; If Emacs source support is enabled, make sure emacs-c parser is
+    ;; after c parser in the parser list. This way various tree-sitter
+    ;; functions will automatically use the c parser rather than the
+    ;; emacs-c parser.
+    (when c-ts-mode-emacs-sources-support
+      (treesit-parser-create 'emacs-c))
+
     (treesit-parser-create 'c)
     ;; Comments.
     (setq-local comment-start "/* ")
@@ -1087,7 +1165,17 @@ c-ts-mode
 
     (when c-ts-mode-emacs-sources-support
       (setq-local add-log-current-defun-function
-                  #'c-ts-mode--emacs-current-defun-name))))
+                  #'c-ts-mode--emacs-current-defun-name)
+
+      (setq-local treesit-range-settings
+                  (treesit-range-rules 'c-ts-mode--emacs-set-ranges))
+
+      (setq-local treesit-language-at-point-function (lambda (_pos) 'c))
+
+      ;; Add a fake "emacs-c" language which is just C.  Used for
+      ;; skipping FOR_EACH_TAIL, see `c-ts-mode--emacs-set-ranges'.
+      (setf (alist-get 'emacs-c treesit-load-name-override-list)
+            '("libtree-sitter-c" "tree_sitter_c")))))
 
 ;;;###autoload
 (define-derived-mode c++-ts-mode c-ts-base-mode "C++"
diff --git a/lisp/treesit.el b/lisp/treesit.el
index e718ea1a23a..1d4749c8cd2 100644
--- a/lisp/treesit.el
+++ b/lisp/treesit.el
@@ -378,13 +378,16 @@ treesit-query-string
 (defun treesit-query-range (node query &optional beg end)
   "Query the current buffer and return ranges of captured nodes.
 
-QUERY, NODE, BEG, END are the same as in
-`treesit-query-capture'.  This function returns a list
-of (START . END), where START and END specifics the range of each
-captured node.  Capture names don't matter."
+QUERY, NODE, BEG, END are the same as in `treesit-query-capture'.
+This function returns a list of (START . END), where START and
+END specifics the range of each captured node.  Capture names
+generally don't matter, but names that starts with an underscore
+are ignored."
   (cl-loop for capture
            in (treesit-query-capture node query beg end)
+           for name = (car capture)
            for node = (cdr capture)
+           if (not (string-prefix-p "_" (symbol-name name)))
            collect (cons (treesit-node-start node)
                          (treesit-node-end node))))
 
@@ -399,6 +402,9 @@ treesit-range-settings
 range to the range spanned by captured nodes.  QUERY must be a
 compiled query.
 
+Capture names generally don't matter, but names that starts with
+an underscore are ignored.
+
 QUERY can also be a function, in which case it is called with 2
 arguments, START and END.  It should ensure parsers' ranges are
 correct in the region between START and END.
@@ -418,6 +424,9 @@ treesit-range-rules
 Each QUERY is a tree-sitter query in either the string,
 s-expression or compiled form.
 
+Capture names generally don't matter, but names that starts with
+an underscore are ignored.
+
 For each QUERY, :KEYWORD and VALUE pairs add meta information to
 it.  For example,
 
-- 
2.33.1


  reply	other threads:[~2023-04-27  3:14 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-19 16:40 bug#62951: 29.0.90; c-ts-mode: Incorrect fontification due to FOR_EACH_TAIL_SAFE Eli Zaretskii
2023-04-21 20:37 ` Yuan Fu
2023-04-22  7:17   ` Eli Zaretskii
2023-04-23  0:28     ` Yuan Fu
2023-04-23  6:25       ` Eli Zaretskii
2023-04-24  7:02         ` Yuan Fu
2023-04-23 21:04       ` Dmitry Gutov
2023-04-26 22:19         ` Yuan Fu
2023-04-27  3:14           ` Yuan Fu [this message]
2023-04-27 15:03             ` Eli Zaretskii
2023-04-27 19:56               ` Yuan Fu
2023-04-28  5:41                 ` Eli Zaretskii
2023-04-29 22:55                   ` Yuan Fu
2023-04-30  5:24                     ` Eli Zaretskii
2023-04-27  8:57           ` Dmitry Gutov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=36F36D54-E56C-42C8-B686-448C8BF938EA@gmail.com \
    --to=casouri@gmail.com \
    --cc=62951@debbugs.gnu.org \
    --cc=dmitry@gutov.dev \
    --cc=eliz@gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this external index

	https://git.savannah.gnu.org/cgit/emacs.git
	https://git.savannah.gnu.org/cgit/emacs/org-mode.git

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.