unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
* Initial fontification in sh-mode with tree-sittter
@ 2022-10-27 22:01 João Paulo Labegalini de Carvalho
  2022-10-27 23:09 ` João Paulo Labegalini de Carvalho
                   ` (3 more replies)
  0 siblings, 4 replies; 53+ messages in thread
From: João Paulo Labegalini de Carvalho @ 2022-10-27 22:01 UTC (permalink / raw)
  To: emacs-devel


[-- Attachment #1.1: Type: text/plain, Size: 1201 bytes --]

Hi everyone,

Please find the patch for enabling fontification in sh-mode (currently
only for bash) using tree-sitter.

I welcome all comments and suggestions to improve the patch.

I noticed a weird behavior with heredocs. Take the code below:

echo <<EOF
This is a here document.
EOF
echo "Done."

My patch correctly fontifies the code above, but if I kill the whole line
with the "This is a here document." text, then the sh-heredoc face bleeds
out and all the subsequent comments get fontified as part of the heredoc.

A similar behavior happens if tree-sitter is not enabled, if the heredoc is
empty then all subsequent commands are fontified as heredoc. However, as
soon as anything is added to the heredoc, then everything goes back to the
correct fontification.

Such "refreshing" does not happen with tree-sitter enabled, but if I
execute M-x sh-mode then the buffer gets refreshed and everything looks
good.

What am I doing wrong?

-- 
João Paulo L. de Carvalho
Ph.D Computer Science |  IC-UNICAMP | Campinas , SP - Brazil
Postdoctoral Research Fellow | University of Alberta | Edmonton, AB - Canada
joao.carvalho@ic.unicamp.br
joao.carvalho@ualberta.ca

[-- Attachment #1.2: Type: text/html, Size: 1834 bytes --]

[-- Attachment #2: 0001-Initial-fontification-in-sh-mode-with-tree-sitter.patch --]
[-- Type: text/x-patch, Size: 6639 bytes --]

From 48fb6f8e949a8caf73b0714d647947c069260797 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20P=2E=20L=2E=20de=20Carvalho?=
 <jaopaulolc@gmail.com>
Date: Thu, 27 Oct 2022 15:45:56 -0600
Subject: [PATCH] Initial fontification in sh-mode with tree-sitter

---
 lisp/progmodes/sh-script.el | 144 +++++++++++++++++++++++++++++++++---
 1 file changed, 133 insertions(+), 11 deletions(-)

diff --git a/lisp/progmodes/sh-script.el b/lisp/progmodes/sh-script.el
index 558b62b20a..c3645eb9e9 100644
--- a/lisp/progmodes/sh-script.el
+++ b/lisp/progmodes/sh-script.el
@@ -148,6 +148,7 @@
   (require 'let-alist)
   (require 'subr-x))
 (require 'executable)
+(require 'treesit)
 
 (autoload 'comint-completion-at-point "comint")
 (autoload 'comint-filename-completion "comint")
@@ -1534,13 +1535,6 @@ sh-mode
   ;; we can't look if previous line ended with `\'
   (setq-local comint-prompt-regexp "^[ \t]*")
   (setq-local imenu-case-fold-search nil)
-  (setq font-lock-defaults
-	`((sh-font-lock-keywords
-	   sh-font-lock-keywords-1 sh-font-lock-keywords-2)
-	  nil nil
-	  ((?/ . "w") (?~ . "w") (?. . "w") (?- . "w") (?_ . "w")) nil
-	  (font-lock-syntactic-face-function
-	   . ,#'sh-font-lock-syntactic-face-function)))
   (setq-local syntax-propertize-function #'sh-syntax-propertize-function)
   (add-hook 'syntax-propertize-extend-region-functions
             #'syntax-propertize-multiline 'append 'local)
@@ -1587,7 +1581,26 @@ sh-mode
    nil nil)
   (add-hook 'flymake-diagnostic-functions #'sh-shellcheck-flymake nil t)
   (add-hook 'hack-local-variables-hook
-    #'sh-after-hack-local-variables nil t))
+    #'sh-after-hack-local-variables nil t)
+
+  (cond
+   ;; Tree-sitter
+   ((treesit-ready-p 'sh-mode sh-shell)
+    (setq-local font-lock-keywords-only t)
+    (setq-local treesit-font-lock-feature-list
+                '((basic) (moderate) (elaborate)))
+    (setq-local treesit-font-lock-settings
+                sh-mode--treesit-settings)
+    (treesit-major-mode-setup))
+   ;; Elisp.
+   (t
+    (setq font-lock-defaults
+          `((sh-font-lock-keywords
+             sh-font-lock-keywords-1 sh-font-lock-keywords-2)
+            nil nil
+            ((?/ . "w") (?~ . "w") (?. . "w") (?- . "w") (?_ . "w")) nil
+            (font-lock-syntactic-face-function
+             . ,#'sh-font-lock-syntactic-face-function))))))
 
 ;;;###autoload
 (defalias 'shell-script-mode 'sh-mode)
@@ -3191,6 +3204,115 @@ sh-shellcheck-flymake
       (process-send-region sh--shellcheck-process (point-min) (point-max))
       (process-send-eof sh--shellcheck-process))))
 
-(provide 'sh-script)
-
-;;; sh-script.el ends here
+;;; Tree-sitter font-lock
+
+(defface sh-mode--treesit-special-var-name-face
+  '((t (:inherit font-lock-builtin-face)))
+  "Face name to use for special `sh-mode' variables (e.g. PATH)")
+
+(defface sh-mode--treesit-operator-face
+  '((t (:inherit font-lock-builtin-face)))
+  "Face name to use for `sh-mode' operators (e.g. <<)")
+
+(defface sh-mode--treesit-call-face
+  '((t (:inherit font-lock-function-name-face)))
+  "Face name to use for `sh-mode' non-builtin command calls")
+
+(defvar sh-mode--treesit-operators
+  '("|" "|&" "||" "&&" ">" ">>" "<" "<<" "<<-" "<<<" "==" "!=" ";"
+    ";;" ";&" ";;&")
+  "List of `sh-mode' operator to fontify")
+
+(defvar sh-mode--treesit-keywords
+  '("case" "do" "done" "elif" "else" "esac" "export" "fi" "for"
+    "function" "if" "in" "unset" "while" "then")
+  "Minimal list of keywords that belong to tree-sitter-bash's grammar.
+
+Some reserved words are not recognize to keep the grammar
+simpler. Those are identified with regex-based filtered queries.
+
+See `sh-mode--treesit-other-keywords' and
+`sh-mode--treesit-settings').")
+
+(defun sh-mode--treesit-other-keywords ()
+  "Returns a list `others' of key/reserved words to be fontified with
+regex-based queries as they are not part of tree-sitter-bash's
+grammar.
+
+See `sh-mode--treesit-other-keywords' and
+`sh-mode--treesit-settings')."
+  (let ((minimal sh-mode--treesit-keywords)
+        (all (append (sh-feature sh-leading-keywords)
+                     (sh-feature sh-other-keywords)))
+        (others))
+    (dolist (keyword all others)
+      (if (not (member keyword minimal))
+          (setq others (cons keyword others))))))
+
+(defvar sh-mode--treesit-settings
+  (treesit-font-lock-rules
+   :language sh-shell
+   :feature 'basic
+   :override t
+   '(;; function
+     (function_definition name: (word) @font-lock-function-name-face)
+     ;; comments
+     (comment) @font-lock-comment-face
+     ;; strings and heredoc
+     [ (string) (raw_string) ] @font-lock-string-face
+     ;; heredocs
+     [ (heredoc_start) (heredoc_body) ] @sh-heredoc
+     ;; variables
+     (variable_name) @font-lock-variable-name-face)
+   :language sh-shell
+   :feature 'moderate
+   :override t
+   `(;; keywords
+     [ ,@sh-mode--treesit-keywords ] @font-lock-keyword-face
+     ;; reserved words
+     (command_name
+      ((word) @font-lock-keyword-face
+       (:match
+        ,(rx-to-string
+            `(seq bol
+                  (or ,@(sh-mode--treesit-other-keywords))
+                  eol))
+        @font-lock-keyword-face)))
+     ;; function/non-builtin command calls
+     (command_name (word) @sh-mode--treesit-call-face)
+     ;; builtin commands
+     (command_name
+      ((word) @font-lock-builtin-face
+       (:match ,(let ((builtins
+                       (sh-feature sh-builtins)))
+                  (rx-to-string
+                   `(seq bol
+                         (or ,@builtins)
+                         eol)))
+               @font-lock-builtin-face)))
+     ;; declaration commands
+     (declaration_command) @font-lock-builtin-face
+     ;; variables
+     (variable_name) @font-lock-variable-name-face)
+   :language sh-shell
+   :feature 'elaborate
+   :override t
+   `(;; everything inside command substitution
+     (command_substitution _ _ @sh-quoted-exec _)
+     ;; constants
+     (case_item value: (word) @font-lock-constant-face)
+     (file_descriptor) @font-lock-constant-face
+     ;; operators
+     [ ,@sh-mode--treesit-operators ] @sh-mode--treesit-operator-face
+     ;; special variables
+     ((variable_name) @sh-mode--treesit-special-var-name-face
+      (:match ,(let ((builtin-vars (sh-feature sh-variables)))
+                 (rx-to-string
+                  `(seq bol
+                        (or ,@builtin-vars)
+                        eol)))
+              @sh-mode--treesit-special-var-name-face))))
+  "Tree-sitter font-lock settings for `sh-mode'.")
+
+(provide 'sh-mode)
+;;; sh-mode.el ends here
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-10-27 22:01 Initial fontification in sh-mode with tree-sittter João Paulo Labegalini de Carvalho
@ 2022-10-27 23:09 ` João Paulo Labegalini de Carvalho
  2022-10-27 23:40   ` João Paulo Labegalini de Carvalho
  2022-10-28  0:18 ` Stefan Kangas
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 53+ messages in thread
From: João Paulo Labegalini de Carvalho @ 2022-10-27 23:09 UTC (permalink / raw)
  To: emacs-devel

[-- Attachment #1: Type: text/plain, Size: 593 bytes --]

>
> A similar behavior happens if tree-sitter is not enabled, if the heredoc
> is empty then all subsequent commands are fontified as heredoc. However, as
> soon as anything is added to the heredoc, then everything goes back to the
> correct fontification.
>

Please ignore the above comment. The "no refresh" bug only happens with
tree-sitter enabled.

-- 
João Paulo L. de Carvalho
Ph.D Computer Science |  IC-UNICAMP | Campinas , SP - Brazil
Postdoctoral Research Fellow | University of Alberta | Edmonton, AB - Canada
joao.carvalho@ic.unicamp.br
joao.carvalho@ualberta.ca

[-- Attachment #2: Type: text/html, Size: 1116 bytes --]

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-10-27 23:09 ` João Paulo Labegalini de Carvalho
@ 2022-10-27 23:40   ` João Paulo Labegalini de Carvalho
  2022-10-28  8:12     ` Yuan Fu
  0 siblings, 1 reply; 53+ messages in thread
From: João Paulo Labegalini de Carvalho @ 2022-10-27 23:40 UTC (permalink / raw)
  To: emacs-devel

[-- Attachment #1: Type: text/plain, Size: 1487 bytes --]

A similar thing happens on python-mode with tree-sitter enabled.

Given a buffer with the following code:

defun foo():
    return 42

If a multi-line comment is inserted as in:

"""
defun foo():
    return 42
"""

The function definition does not get fontified as part of a comment. But if
I execute C-x x f, then everything looks good. Now If one of the markers is
deleted, nothing changes until I execute C-x x f, which then makes
everything look correct again.

It seems that an auto "refreshing" is missing.

On Thu, Oct 27, 2022 at 5:09 PM João Paulo Labegalini de Carvalho <
jaopaulolc@gmail.com> wrote:

> A similar behavior happens if tree-sitter is not enabled, if the heredoc
>> is empty then all subsequent commands are fontified as heredoc. However, as
>> soon as anything is added to the heredoc, then everything goes back to the
>> correct fontification.
>>
>
> Please ignore the above comment. The "no refresh" bug only happens with
> tree-sitter enabled.
>
> --
> João Paulo L. de Carvalho
> Ph.D Computer Science |  IC-UNICAMP | Campinas , SP - Brazil
> Postdoctoral Research Fellow | University of Alberta | Edmonton, AB -
> Canada
> joao.carvalho@ic.unicamp.br
> joao.carvalho@ualberta.ca
>


-- 
João Paulo L. de Carvalho
Ph.D Computer Science |  IC-UNICAMP | Campinas , SP - Brazil
Postdoctoral Research Fellow | University of Alberta | Edmonton, AB - Canada
joao.carvalho@ic.unicamp.br
joao.carvalho@ualberta.ca

[-- Attachment #2: Type: text/html, Size: 3012 bytes --]

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-10-27 22:01 Initial fontification in sh-mode with tree-sittter João Paulo Labegalini de Carvalho
  2022-10-27 23:09 ` João Paulo Labegalini de Carvalho
@ 2022-10-28  0:18 ` Stefan Kangas
  2022-10-28  0:48   ` João Paulo Labegalini de Carvalho
  2022-10-28 15:27 ` João Paulo Labegalini de Carvalho
  2022-11-02 18:22 ` João Paulo Labegalini de Carvalho
  3 siblings, 1 reply; 53+ messages in thread
From: Stefan Kangas @ 2022-10-28  0:18 UTC (permalink / raw)
  To: João Paulo Labegalini de Carvalho, emacs-devel

João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com> writes:

> +(defface sh-mode--treesit-special-var-name-face
> +  '((t (:inherit font-lock-builtin-face)))
> +  "Face name to use for special `sh-mode' variables (e.g. PATH)")
> +
> +(defface sh-mode--treesit-operator-face
> +  '((t (:inherit font-lock-builtin-face)))
> +  "Face name to use for `sh-mode' operators (e.g. <<)")
> +
> +(defface sh-mode--treesit-call-face
> +  '((t (:inherit font-lock-function-name-face)))
> +  "Face name to use for `sh-mode' non-builtin command calls")

Are those faces really needed?  Is it not enough with just the
font-lock-* faces we already have?

If the answer is that we do need them, should they really be marked
private?  I thought the point of a face was allowing users and themes to
customize them.



^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-10-28  0:18 ` Stefan Kangas
@ 2022-10-28  0:48   ` João Paulo Labegalini de Carvalho
  0 siblings, 0 replies; 53+ messages in thread
From: João Paulo Labegalini de Carvalho @ 2022-10-28  0:48 UTC (permalink / raw)
  To: Stefan Kangas; +Cc: emacs-devel

[-- Attachment #1: Type: text/plain, Size: 851 bytes --]

On Thu, Oct 27, 2022 at 6:18 PM Stefan Kangas <stefankangas@gmail.com>
wrote:

> Are those faces really needed?  Is it not enough with just the
> font-lock-* faces we already have?
>

They are not needed but might be desirable for users that want to highlight
those parts of the code that do not have a font-lock* face to them.
Would it be better to define them as font-lock faces instead?


> If the answer is that we do need them, should they really be marked
> private?  I thought the point of a face was allowing users and themes to
> customize them.
>

You are right, those should not be private.

-- 
João Paulo L. de Carvalho
Ph.D Computer Science |  IC-UNICAMP | Campinas , SP - Brazil
Postdoctoral Research Fellow | University of Alberta | Edmonton, AB - Canada
joao.carvalho@ic.unicamp.br
joao.carvalho@ualberta.ca

[-- Attachment #2: Type: text/html, Size: 1626 bytes --]

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-10-27 23:40   ` João Paulo Labegalini de Carvalho
@ 2022-10-28  8:12     ` Yuan Fu
  2022-10-28 15:09       ` Daniel Martín
  0 siblings, 1 reply; 53+ messages in thread
From: Yuan Fu @ 2022-10-28  8:12 UTC (permalink / raw)
  To: João Paulo Labegalini de Carvalho; +Cc: emacs-devel



> On Oct 27, 2022, at 4:40 PM, João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com> wrote:
> 
> A similar thing happens on python-mode with tree-sitter enabled.
> 
> Given a buffer with the following code:
> 
> defun foo():
>     return 42
> 
> If a multi-line comment is inserted as in:
> 
> """
> defun foo():
>     return 42
> """
> 
> The function definition does not get fontified as part of a comment. But if I execute C-x x f, then everything looks good. Now If one of the markers is deleted, nothing changes until I execute C-x x f, which then makes everything look correct again.
> 
> It seems that an auto "refreshing" is missing. 

Hmmm, I couldn’t reproduce this in python-mode, also defun is not recognized in python so I used this snippet:

def foo():
    Return 42

When I insert “”” before the defun, everything after becomes string face, when I insert the following “””, everything is updated again.

I didn’t make any significant change to the font-lock code recently, either.

Yuan


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-10-28  8:12     ` Yuan Fu
@ 2022-10-28 15:09       ` Daniel Martín
  2022-10-31  2:13         ` Yuan Fu
  0 siblings, 1 reply; 53+ messages in thread
From: Daniel Martín @ 2022-10-28 15:09 UTC (permalink / raw)
  To: Yuan Fu; +Cc: João Paulo Labegalini de Carvalho, emacs-devel

Yuan Fu <casouri@gmail.com> writes:

>
> Hmmm, I couldn’t reproduce this in python-mode, also defun is not recognized in python so I used this snippet:
>
> def foo():
>     Return 42
>
> When I insert “”” before the defun, everything after becomes string face, when I insert the following “””, everything is updated again.
>
> I didn’t make any significant change to the font-lock code recently, either.
>

I can reproduce the problem by following these steps:

emacs -Q from top of the feature/tree-sitter branch
M-: (require 'treesit)
M-x customize-variable treesit-settings RET
Set "Activate" to "Yes" and apply the change.
C-x b sample.py RET
M-x python-mode
Write the following program:

def main():
    return 0

M-< C-o """ (the code is not fontified as string)
M-> """ (the code is not fontified as string)
M-x python-mode RET (the code _is_ fontified as string)

A git bisect tells that the first bad commit is
5159789e55d64c7482dff3dc1a621d01f530f83c

Hope this helps.



^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-10-27 22:01 Initial fontification in sh-mode with tree-sittter João Paulo Labegalini de Carvalho
  2022-10-27 23:09 ` João Paulo Labegalini de Carvalho
  2022-10-28  0:18 ` Stefan Kangas
@ 2022-10-28 15:27 ` João Paulo Labegalini de Carvalho
  2022-10-28 15:57   ` Stefan Kangas
  2022-11-02 18:22 ` João Paulo Labegalini de Carvalho
  3 siblings, 1 reply; 53+ messages in thread
From: João Paulo Labegalini de Carvalho @ 2022-10-28 15:27 UTC (permalink / raw)
  To: emacs-devel


[-- Attachment #1.1: Type: text/plain, Size: 1802 bytes --]

Hi,

In face of the comments about the risks of adding new faces, I edited my
patch to only use existing faces from font-lock.

Please let me know what you think of this new patch.

Thanks.

On Thu, Oct 27, 2022 at 4:01 PM João Paulo Labegalini de Carvalho <
jaopaulolc@gmail.com> wrote:

> Hi everyone,
>
> Please find the patch for enabling fontification in sh-mode (currently
> only for bash) using tree-sitter.
>
> I welcome all comments and suggestions to improve the patch.
>
> I noticed a weird behavior with heredocs. Take the code below:
>
> echo <<EOF
> This is a here document.
> EOF
> echo "Done."
>
> My patch correctly fontifies the code above, but if I kill the whole line
> with the "This is a here document." text, then the sh-heredoc face bleeds
> out and all the subsequent comments get fontified as part of the heredoc.
>
> A similar behavior happens if tree-sitter is not enabled, if the heredoc
> is empty then all subsequent commands are fontified as heredoc. However, as
> soon as anything is added to the heredoc, then everything goes back to the
> correct fontification.
>
> Such "refreshing" does not happen with tree-sitter enabled, but if I
> execute M-x sh-mode then the buffer gets refreshed and everything looks
> good.
>
> What am I doing wrong?
>
> --
> João Paulo L. de Carvalho
> Ph.D Computer Science |  IC-UNICAMP | Campinas , SP - Brazil
> Postdoctoral Research Fellow | University of Alberta | Edmonton, AB -
> Canada
> joao.carvalho@ic.unicamp.br
> joao.carvalho@ualberta.ca
>


-- 
João Paulo L. de Carvalho
Ph.D Computer Science |  IC-UNICAMP | Campinas , SP - Brazil
Postdoctoral Research Fellow | University of Alberta | Edmonton, AB - Canada
joao.carvalho@ic.unicamp.br
joao.carvalho@ualberta.ca

[-- Attachment #1.2: Type: text/html, Size: 2906 bytes --]

[-- Attachment #2: 0001-Initial-fontification-in-sh-mode-with-tree-sitter.patch --]
[-- Type: text/x-patch, Size: 6143 bytes --]

From 1d81266f11ee61aaa33abef37385cbcab75022ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20P=2E=20L=2E=20de=20Carvalho?=
 <jaopaulolc@gmail.com>
Date: Thu, 27 Oct 2022 15:45:56 -0600
Subject: [PATCH] Initial fontification in sh-mode with tree-sitter

---
 lisp/progmodes/sh-script.el | 132 +++++++++++++++++++++++++++++++++---
 1 file changed, 121 insertions(+), 11 deletions(-)

diff --git a/lisp/progmodes/sh-script.el b/lisp/progmodes/sh-script.el
index 558b62b20a..2ab225b77d 100644
--- a/lisp/progmodes/sh-script.el
+++ b/lisp/progmodes/sh-script.el
@@ -148,6 +148,7 @@
   (require 'let-alist)
   (require 'subr-x))
 (require 'executable)
+(require 'treesit)
 
 (autoload 'comint-completion-at-point "comint")
 (autoload 'comint-filename-completion "comint")
@@ -1534,13 +1535,6 @@ sh-mode
   ;; we can't look if previous line ended with `\'
   (setq-local comint-prompt-regexp "^[ \t]*")
   (setq-local imenu-case-fold-search nil)
-  (setq font-lock-defaults
-	`((sh-font-lock-keywords
-	   sh-font-lock-keywords-1 sh-font-lock-keywords-2)
-	  nil nil
-	  ((?/ . "w") (?~ . "w") (?. . "w") (?- . "w") (?_ . "w")) nil
-	  (font-lock-syntactic-face-function
-	   . ,#'sh-font-lock-syntactic-face-function)))
   (setq-local syntax-propertize-function #'sh-syntax-propertize-function)
   (add-hook 'syntax-propertize-extend-region-functions
             #'syntax-propertize-multiline 'append 'local)
@@ -1587,7 +1581,26 @@ sh-mode
    nil nil)
   (add-hook 'flymake-diagnostic-functions #'sh-shellcheck-flymake nil t)
   (add-hook 'hack-local-variables-hook
-    #'sh-after-hack-local-variables nil t))
+    #'sh-after-hack-local-variables nil t)
+
+  (cond
+   ;; Tree-sitter
+   ((treesit-ready-p 'sh-mode sh-shell)
+    (setq-local font-lock-keywords-only t)
+    (setq-local treesit-font-lock-feature-list
+                '((basic) (moderate) (elaborate)))
+    (setq-local treesit-font-lock-settings
+                sh-mode--treesit-settings)
+    (treesit-major-mode-setup))
+   ;; Elisp.
+   (t
+    (setq font-lock-defaults
+          `((sh-font-lock-keywords
+             sh-font-lock-keywords-1 sh-font-lock-keywords-2)
+            nil nil
+            ((?/ . "w") (?~ . "w") (?. . "w") (?- . "w") (?_ . "w")) nil
+            (font-lock-syntactic-face-function
+             . ,#'sh-font-lock-syntactic-face-function))))))
 
 ;;;###autoload
 (defalias 'shell-script-mode 'sh-mode)
@@ -3191,6 +3204,103 @@ sh-shellcheck-flymake
       (process-send-region sh--shellcheck-process (point-min) (point-max))
       (process-send-eof sh--shellcheck-process))))
 
-(provide 'sh-script)
-
-;;; sh-script.el ends here
+;;; Tree-sitter font-lock
+
+(defvar sh-mode--treesit-operators
+  '("|" "|&" "||" "&&" ">" ">>" "<" "<<" "<<-" "<<<" "==" "!=" ";"
+    ";;" ";&" ";;&")
+  "List of `sh-mode' operator to fontify")
+
+(defvar sh-mode--treesit-keywords
+  '("case" "do" "done" "elif" "else" "esac" "export" "fi" "for"
+    "function" "if" "in" "unset" "while" "then")
+  "Minimal list of keywords that belong to tree-sitter-bash's grammar.
+
+Some reserved words are not recognize to keep the grammar
+simpler. Those are identified with regex-based filtered queries.
+
+See `sh-mode--treesit-other-keywords' and
+`sh-mode--treesit-settings').")
+
+(defun sh-mode--treesit-other-keywords ()
+  "Returns a list `others' of key/reserved words to be fontified with
+regex-based queries as they are not part of tree-sitter-bash's
+grammar.
+
+See `sh-mode--treesit-other-keywords' and
+`sh-mode--treesit-settings')."
+  (let ((minimal sh-mode--treesit-keywords)
+        (all (append (sh-feature sh-leading-keywords)
+                     (sh-feature sh-other-keywords)))
+        (others))
+    (dolist (keyword all others)
+      (if (not (member keyword minimal))
+          (setq others (cons keyword others))))))
+
+(defvar sh-mode--treesit-settings
+  (treesit-font-lock-rules
+   :language sh-shell
+   :feature 'basic
+   :override t
+   '(;; function
+     (function_definition name: (word) @font-lock-function-name-face)
+     ;; comments
+     (comment) @font-lock-comment-face
+     ;; strings and heredoc
+     [ (string) (raw_string) ] @font-lock-string-face
+     ;; heredocs
+     [ (heredoc_start) (heredoc_body) ] @sh-heredoc
+     ;; variables
+     (variable_name) @font-lock-variable-name-face)
+   :language sh-shell
+   :feature 'moderate
+   :override t
+   `(;; keywords
+     [ ,@sh-mode--treesit-keywords ] @font-lock-keyword-face
+     ;; reserved words
+     (command_name
+      ((word) @font-lock-keyword-face
+       (:match
+        ,(rx-to-string
+            `(seq bol
+                  (or ,@(sh-mode--treesit-other-keywords))
+                  eol))
+        @font-lock-keyword-face)))
+     ;; function/non-builtin command calls
+     (command_name (word) @font-lock-function-name-face)
+     ;; builtin commands
+     (command_name
+      ((word) @font-lock-builtin-face
+       (:match ,(let ((builtins
+                       (sh-feature sh-builtins)))
+                  (rx-to-string
+                   `(seq bol
+                         (or ,@builtins)
+                         eol)))
+               @font-lock-builtin-face)))
+     ;; declaration commands
+     (declaration_command) @font-lock-builtin-face
+     ;; variables
+     (variable_name) @font-lock-variable-name-face)
+   :language sh-shell
+   :feature 'elaborate
+   :override t
+   `(;; everything inside command substitution
+     (command_substitution _ _ @sh-quoted-exec _)
+     ;; constants
+     (case_item value: (word) @font-lock-constant-face)
+     (file_descriptor) @font-lock-constant-face
+     ;; operators
+     [ ,@sh-mode--treesit-operators ] @font-lock-builtin-face
+     ;; special variables
+     ((variable_name) @font-lock-builtin-face
+      (:match ,(let ((builtin-vars (sh-feature sh-variables)))
+                 (rx-to-string
+                  `(seq bol
+                        (or ,@builtin-vars)
+                        eol)))
+              @font-lock-builtin-face))))
+  "Tree-sitter font-lock settings for `sh-mode'.")
+
+(provide 'sh-mode)
+;;; sh-mode.el ends here
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-10-28 15:27 ` João Paulo Labegalini de Carvalho
@ 2022-10-28 15:57   ` Stefan Kangas
  2022-10-28 16:15     ` Stefan Monnier
  0 siblings, 1 reply; 53+ messages in thread
From: Stefan Kangas @ 2022-10-28 15:57 UTC (permalink / raw)
  To: João Paulo Labegalini de Carvalho, emacs-devel

João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com> writes:

> +    (setq-local treesit-font-lock-feature-list
> +                '((basic) (moderate) (elaborate)))

I see that most modes on the branch seem to be using those three symbols
for now.

Are we sure about `basic', `moderate', and `elaborate', or is that just
a place-holder for something more fine-grained to be added later?  The
documentation seems to imply the latter.



^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-10-28 15:57   ` Stefan Kangas
@ 2022-10-28 16:15     ` Stefan Monnier
  2022-10-28 16:23       ` Theodor Thornhill
                         ` (2 more replies)
  0 siblings, 3 replies; 53+ messages in thread
From: Stefan Monnier @ 2022-10-28 16:15 UTC (permalink / raw)
  To: Stefan Kangas; +Cc: João Paulo Labegalini de Carvalho, emacs-devel

Stefan Kangas [2022-10-28 08:57:07] wrote:
> João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com> writes:
>> +    (setq-local treesit-font-lock-feature-list
>> +                '((basic) (moderate) (elaborate)))
> I see that most modes on the branch seem to be using those three symbols
> for now.

Indeed, it's sad.  It's goes against the idea being
`treesit-font-lock-feature-list` which is to map features to levels.

It doesn't make much sense to call a highlight feature "moderate".
Highlight feature names should be things like "function names", "nested
functions", "multiline docstrings", "punctuation".


        Stefan




^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-10-28 16:15     ` Stefan Monnier
@ 2022-10-28 16:23       ` Theodor Thornhill
  2022-10-28 16:34       ` João Paulo Labegalini de Carvalho
  2022-10-28 17:44       ` Yuan Fu
  2 siblings, 0 replies; 53+ messages in thread
From: Theodor Thornhill @ 2022-10-28 16:23 UTC (permalink / raw)
  To: emacs-devel, Stefan Monnier, Stefan Kangas
  Cc: João Paulo Labegalini de Carvalho



On 28 October 2022 18:15:01 CEST, Stefan Monnier <monnier@iro.umontreal.ca> wrote:
>Stefan Kangas [2022-10-28 08:57:07] wrote:
>> João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com> writes:
>>> +    (setq-local treesit-font-lock-feature-list
>>> +                '((basic) (moderate) (elaborate)))
>> I see that most modes on the branch seem to be using those three symbols
>> for now.
>
>Indeed, it's sad.  It's goes against the idea being
>`treesit-font-lock-feature-list` which is to map features to levels.
>
>It doesn't make much sense to call a highlight feature "moderate".
>Highlight feature names should be things like "function names", "nested
>functions", "multiline docstrings", "punctuation".

Agreed. Sloppy work on my end. I'll make a new commit this evening :-) 

Theo 



^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-10-28 16:15     ` Stefan Monnier
  2022-10-28 16:23       ` Theodor Thornhill
@ 2022-10-28 16:34       ` João Paulo Labegalini de Carvalho
  2022-10-28 17:37         ` Stefan Monnier
  2022-10-28 17:44       ` Yuan Fu
  2 siblings, 1 reply; 53+ messages in thread
From: João Paulo Labegalini de Carvalho @ 2022-10-28 16:34 UTC (permalink / raw)
  To: Stefan Monnier, emacs-devel

[-- Attachment #1: Type: text/plain, Size: 1408 bytes --]

On Fri, Oct 28, 2022 at 10:15 AM Stefan Monnier <monnier@iro.umontreal.ca>
wrote:

> Indeed, it's sad.  It's goes against the idea being
> `treesit-font-lock-feature-list` which is to map features to levels.
>
> It doesn't make much sense to call a highlight feature "moderate".
> Highlight feature names should be things like "function names", "nested
> functions", "multiline docstrings", "punctuation".
>

I agree with that. It seems that the initial effort on using tree-sitter
was following font-lock notions too closely.

For instance, if we are creating features such as "function names", "nested
functions", and "punctuation" does it still make sense to have something
like `font-lock-maximum-decoration'? Because, it is not clear to me how we
can say that fontifying "multiline docstrings" is a *higher* decoration
than fontifying "function names".

Maybe the items in the list should not have a priority of particular order,
their presence just serves to enable, while their absence disables, the
fontification of a particular feature.

I will isolate the queries to mirror the idea of each feature map to
program constructs as suggested.


-- 
João Paulo L. de Carvalho
Ph.D Computer Science |  IC-UNICAMP | Campinas , SP - Brazil
Postdoctoral Research Fellow | University of Alberta | Edmonton, AB - Canada
joao.carvalho@ic.unicamp.br
joao.carvalho@ualberta.ca

[-- Attachment #2: Type: text/html, Size: 2167 bytes --]

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-10-28 16:34       ` João Paulo Labegalini de Carvalho
@ 2022-10-28 17:37         ` Stefan Monnier
  2022-10-28 17:45           ` Yuan Fu
  0 siblings, 1 reply; 53+ messages in thread
From: Stefan Monnier @ 2022-10-28 17:37 UTC (permalink / raw)
  To: João Paulo Labegalini de Carvalho; +Cc: emacs-devel

> For instance, if we are creating features such as "function names", "nested
> functions", and "punctuation" does it still make sense to have something
> like `font-lock-maximum-decoration'? Because, it is not clear to me how we
> can say that fontifying "multiline docstrings" is a *higher* decoration
> than fontifying "function names".

The point of the design is that, while the details are murky, we can
make a "best effort" choice to map feature names to levels in
`treesit-font-lock-feature-list`.

This way, users can set a vague global "level" preference as a good
baseline, which they can more finely tune according to their own
preference by controlling individual features for specific modes.


        Stefan




^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-10-28 16:15     ` Stefan Monnier
  2022-10-28 16:23       ` Theodor Thornhill
  2022-10-28 16:34       ` João Paulo Labegalini de Carvalho
@ 2022-10-28 17:44       ` Yuan Fu
  2 siblings, 0 replies; 53+ messages in thread
From: Yuan Fu @ 2022-10-28 17:44 UTC (permalink / raw)
  To: Stefan Monnier
  Cc: Stefan Kangas, João Paulo Labegalini de Carvalho,
	emacs-devel



> On Oct 28, 2022, at 9:15 AM, Stefan Monnier <monnier@iro.umontreal.ca> wrote:
> 
> Stefan Kangas [2022-10-28 08:57:07] wrote:
>> João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com> writes:
>>> +    (setq-local treesit-font-lock-feature-list
>>> +                '((basic) (moderate) (elaborate)))
>> I see that most modes on the branch seem to be using those three symbols
>> for now.
> 
> Indeed, it's sad.  It's goes against the idea being
> `treesit-font-lock-feature-list` which is to map features to levels.
> 
> It doesn't make much sense to call a highlight feature "moderate".
> Highlight feature names should be things like "function names", "nested
> functions", "multiline docstrings", "punctuation”.

Yeah it’s a kind of placeholder, plus a test for font-lock-maximum-decoration. I plan to refine it once I’m done with more pressing problems.

Yuan





^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-10-28 17:37         ` Stefan Monnier
@ 2022-10-28 17:45           ` Yuan Fu
  2022-10-28 18:12             ` Stefan Monnier
  2022-10-29  7:13             ` Augusto Stoffel
  0 siblings, 2 replies; 53+ messages in thread
From: Yuan Fu @ 2022-10-28 17:45 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: João Paulo Labegalini de Carvalho, emacs-devel



> On Oct 28, 2022, at 10:37 AM, Stefan Monnier <monnier@iro.umontreal.ca> wrote:
> 
>> For instance, if we are creating features such as "function names", "nested
>> functions", and "punctuation" does it still make sense to have something
>> like `font-lock-maximum-decoration'? Because, it is not clear to me how we
>> can say that fontifying "multiline docstrings" is a *higher* decoration
>> than fontifying "function names".
> 
> The point of the design is that, while the details are murky, we can
> make a "best effort" choice to map feature names to levels in
> `treesit-font-lock-feature-list`.
> 
> This way, users can set a vague global "level" preference as a good
> baseline, which they can more finely tune according to their own
> preference by controlling individual features for specific modes.

I should probably add some convenient functions user can use to add/remove feature from that list, since its not a plain list, but rather a list of list.

Yuan


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-10-28 17:45           ` Yuan Fu
@ 2022-10-28 18:12             ` Stefan Monnier
  2022-11-01  0:33               ` Yuan Fu
  2022-10-29  7:13             ` Augusto Stoffel
  1 sibling, 1 reply; 53+ messages in thread
From: Stefan Monnier @ 2022-10-28 18:12 UTC (permalink / raw)
  To: Yuan Fu; +Cc: João Paulo Labegalini de Carvalho, emacs-devel

>> This way, users can set a vague global "level" preference as a good
>> baseline, which they can more finely tune according to their own
>> preference by controlling individual features for specific modes.
> I should probably add some convenient functions user can use to add/remove
> feature from that list, since its not a plain list, but rather a list
> of list.

I think this var should be provided by the major mode and not touched
by users.

Instead we should provide another config var for users where they can
individually enable/disable specific features for specific major modes.


        Stefan




^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-10-28 17:45           ` Yuan Fu
  2022-10-28 18:12             ` Stefan Monnier
@ 2022-10-29  7:13             ` Augusto Stoffel
  1 sibling, 0 replies; 53+ messages in thread
From: Augusto Stoffel @ 2022-10-29  7:13 UTC (permalink / raw)
  To: Yuan Fu; +Cc: Stefan Monnier, João Paulo Labegalini de Carvalho,
	emacs-devel

On Fri, 28 Oct 2022 at 10:45, Yuan Fu wrote:

> I should probably add some convenient functions user can use to
> add/remove feature from that list, since its not a plain list, but
> rather a list of list.

Would it perhaps makes sense to make this like a hook variable, that is,
a flat list where each entry has a "depth" property stored somewhere
else?  Then it's easy to remove individual elements, and one can also
rather easily delete all elements above a certain depth.



^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-10-28 15:09       ` Daniel Martín
@ 2022-10-31  2:13         ` Yuan Fu
  2022-10-31 21:56           ` Yuan Fu
  0 siblings, 1 reply; 53+ messages in thread
From: Yuan Fu @ 2022-10-31  2:13 UTC (permalink / raw)
  To: Daniel Martín; +Cc: João Paulo Labegalini de Carvalho, emacs-devel



> On Oct 28, 2022, at 8:09 AM, Daniel Martín <mardani29@yahoo.es> wrote:
> 
> Yuan Fu <casouri@gmail.com> writes:
> 
>> 
>> Hmmm, I couldn’t reproduce this in python-mode, also defun is not recognized in python so I used this snippet:
>> 
>> def foo():
>>    Return 42
>> 
>> When I insert “”” before the defun, everything after becomes string face, when I insert the following “””, everything is updated again.
>> 
>> I didn’t make any significant change to the font-lock code recently, either.
>> 
> 
> I can reproduce the problem by following these steps:
> 
> emacs -Q from top of the feature/tree-sitter branch
> M-: (require 'treesit)
> M-x customize-variable treesit-settings RET
> Set "Activate" to "Yes" and apply the change.
> C-x b sample.py RET
> M-x python-mode
> Write the following program:
> 
> def main():
>    return 0
> 
> M-< C-o """ (the code is not fontified as string)
> M-> """ (the code is not fontified as string)
> M-x python-mode RET (the code _is_ fontified as string)

Thanks, I can reproduce this on emacs -Q, too (but strangely not on my “configured” Emacs).

> 
> A git bisect tells that the first bad commit is
> 5159789e55d64c7482dff3dc1a621d01f530f83c

Strange, this commit has nothing to do with font-lock, hmm...

> 
> Hope this helps.

Yes! Many thanks.

Yuan





^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-10-31  2:13         ` Yuan Fu
@ 2022-10-31 21:56           ` Yuan Fu
  2022-11-01  0:09             ` Daniel Martín
  2022-11-02 20:37             ` [SPAM UNSURE] " Stephen Leake
  0 siblings, 2 replies; 53+ messages in thread
From: Yuan Fu @ 2022-10-31 21:56 UTC (permalink / raw)
  To: Daniel Martín; +Cc: João Paulo Labegalini de Carvalho, emacs-devel



> On Oct 30, 2022, at 7:13 PM, Yuan Fu <casouri@gmail.com> wrote:
> 
> 
> 
>> On Oct 28, 2022, at 8:09 AM, Daniel Martín <mardani29@yahoo.es> wrote:
>> 
>> Yuan Fu <casouri@gmail.com> writes:
>> 
>>> 
>>> Hmmm, I couldn’t reproduce this in python-mode, also defun is not recognized in python so I used this snippet:
>>> 
>>> def foo():
>>>   Return 42
>>> 
>>> When I insert “”” before the defun, everything after becomes string face, when I insert the following “””, everything is updated again.
>>> 
>>> I didn’t make any significant change to the font-lock code recently, either.
>>> 
>> 
>> I can reproduce the problem by following these steps:
>> 
>> emacs -Q from top of the feature/tree-sitter branch
>> M-: (require 'treesit)
>> M-x customize-variable treesit-settings RET
>> Set "Activate" to "Yes" and apply the change.
>> C-x b sample.py RET
>> M-x python-mode
>> Write the following program:
>> 
>> def main():
>>   return 0
>> 
>> M-< C-o """ (the code is not fontified as string)
>> M-> """ (the code is not fontified as string)
>> M-x python-mode RET (the code _is_ fontified as string)
> 
> Thanks, I can reproduce this on emacs -Q, too (but strangely not on my “configured” Emacs).
> 
>> 
>> A git bisect tells that the first bad commit is
>> 5159789e55d64c7482dff3dc1a621d01f530f83c
> 
> Strange, this commit has nothing to do with font-lock, hmm...
> 
>> 
>> Hope this helps.
> 
> Yes! Many thanks.
> 
> Yuan

I think I’ve fixed this problem. It is mainly due to how the fortifications rule is written. When the starting quotes are inserted, normal syntactic font-lock will mark everything behind it in string-face, but for tree-sitter, since the source is incomplete, the quotes are in an error node in the parse tree and there is no string node, so tree-sitter rules can’t capture any string node, so no string face is applied. And when the ending quotes are inserted, the only region jit-lock wants tree-sitter to fontify is that three quote, so again the whole string is not captured. I changed the font-lock rule to match the ending quote rather than the whole string, and now it’s working fine.

Now it is actually better than before: If you insert an open quote, the rest of the buffer will not be marked in string face, instead, and when you insert the ending quote, the string is fontified correctly.

Yuan




^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-10-31 21:56           ` Yuan Fu
@ 2022-11-01  0:09             ` Daniel Martín
  2022-11-01  0:25               ` Yuan Fu
  2022-11-02 20:37             ` [SPAM UNSURE] " Stephen Leake
  1 sibling, 1 reply; 53+ messages in thread
From: Daniel Martín @ 2022-11-01  0:09 UTC (permalink / raw)
  To: Yuan Fu; +Cc: João Paulo Labegalini de Carvalho, emacs-devel

Yuan Fu <casouri@gmail.com> writes:

>
> I think I’ve fixed this problem. It is mainly due to how the
> fortifications rule is written. When the starting quotes are inserted,
> normal syntactic font-lock will mark everything behind it in
> string-face, but for tree-sitter, since the source is incomplete, the
> quotes are in an error node in the parse tree and there is no string
> node, so tree-sitter rules can’t capture any string node, so no string
> face is applied. And when the ending quotes are inserted, the only
> region jit-lock wants tree-sitter to fontify is that three quote, so
> again the whole string is not captured. I changed the font-lock rule
> to match the ending quote rather than the whole string, and now it’s
> working fine.
>
> Now it is actually better than before: If you insert an open quote,
> the rest of the buffer will not be marked in string face, instead, and
> when you insert the ending quote, the string is fontified correctly.
>

I can still reproduce the problem, with same reproduction steps that I
mentioned before.

Curiously, as soon as I press C-l, the text gets fontified with the
string face.



^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-01  0:09             ` Daniel Martín
@ 2022-11-01  0:25               ` Yuan Fu
  2022-11-01  7:13                 ` Eli Zaretskii
  0 siblings, 1 reply; 53+ messages in thread
From: Yuan Fu @ 2022-11-01  0:25 UTC (permalink / raw)
  To: Daniel Martín; +Cc: João Paulo Labegalini de Carvalho, emacs-devel



> On Oct 31, 2022, at 5:09 PM, Daniel Martín <mardani29@yahoo.es> wrote:
> 
> Yuan Fu <casouri@gmail.com> writes:
> 
>> 
>> I think I’ve fixed this problem. It is mainly due to how the
>> fortifications rule is written. When the starting quotes are inserted,
>> normal syntactic font-lock will mark everything behind it in
>> string-face, but for tree-sitter, since the source is incomplete, the
>> quotes are in an error node in the parse tree and there is no string
>> node, so tree-sitter rules can’t capture any string node, so no string
>> face is applied. And when the ending quotes are inserted, the only
>> region jit-lock wants tree-sitter to fontify is that three quote, so
>> again the whole string is not captured. I changed the font-lock rule
>> to match the ending quote rather than the whole string, and now it’s
>> working fine.
>> 
>> Now it is actually better than before: If you insert an open quote,
>> the rest of the buffer will not be marked in string face, instead, and
>> when you insert the ending quote, the string is fontified correctly.
>> 
> 
> I can still reproduce the problem, with same reproduction steps that I
> mentioned before.
> 
> Curiously, as soon as I press C-l, the text gets fontified with the
> string face.

I think that’s just jit-lock & redisplay doing funny stuff. If you set treesit--font-lock-verbose to t, it should log that appropriate faces are applied when you insert the quote.

Yuan


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-10-28 18:12             ` Stefan Monnier
@ 2022-11-01  0:33               ` Yuan Fu
  2022-11-01  3:38                 ` Stefan Monnier
  0 siblings, 1 reply; 53+ messages in thread
From: Yuan Fu @ 2022-11-01  0:33 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: João Paulo Labegalini de Carvalho, emacs-devel



> On Oct 28, 2022, at 11:12 AM, Stefan Monnier <monnier@iro.umontreal.ca> wrote:
> 
>>> This way, users can set a vague global "level" preference as a good
>>> baseline, which they can more finely tune according to their own
>>> preference by controlling individual features for specific modes.
>> I should probably add some convenient functions user can use to add/remove
>> feature from that list, since its not a plain list, but rather a list
>> of list.
> 
> I think this var should be provided by the major mode and not touched
> by users.
> 
> Instead we should provide another config var for users where they can
> individually enable/disable specific features for specific major modes.

How about:

(add-hook 'xxx-mode-hook
          (lambda ()
            (setq-local treesit-font-lock-disabled-features '(features...))
            (treesit-font-lock-recompute-features)))

Or

(add-hook 'xxx-mode-hook
          (lambda ()
            (treesit-font-lock-set-disabled-features 'features...)))

?

You probably don’t need to set enabled features, because you can set maximum-decoration to a high value and specify what to disable. And maximum-decoration is infinity by default.

BTW, I’m trying to think of a better prefix than font-lock for these functions and variables, maybe “fontify”? Like

treesit-fontify-region-function
treesit-fontify-rules
treesit-fontify-settings
treesit-fontify-feature-list

It being a verb is a bit strange, but “fontification” is too long. Actually, what does “lock” even mean in font lock?

Yuan


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-01  0:33               ` Yuan Fu
@ 2022-11-01  3:38                 ` Stefan Monnier
  2022-11-01  8:37                   ` Yuan Fu
  0 siblings, 1 reply; 53+ messages in thread
From: Stefan Monnier @ 2022-11-01  3:38 UTC (permalink / raw)
  To: Yuan Fu; +Cc: João Paulo Labegalini de Carvalho, emacs-devel

> You probably don’t need to set enabled features, because you can set
> maximum-decoration to a high value and specify what to disable.
> And maximum-decoration is infinity by default.

I thought it would be nice to let people choose an approximate baseline
by setting their "decoration level" and then tweak things by *either*
adding or removing elements from it.

E.g. I generally like my fontification to be quite subdued so I'd like
choose a low baseline decoration level, but I might want to add one or
two things to it in some modes.


        Stefan




^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-01  0:25               ` Yuan Fu
@ 2022-11-01  7:13                 ` Eli Zaretskii
  2022-11-01  8:35                   ` Yuan Fu
  0 siblings, 1 reply; 53+ messages in thread
From: Eli Zaretskii @ 2022-11-01  7:13 UTC (permalink / raw)
  To: Yuan Fu; +Cc: mardani29, jaopaulolc, emacs-devel

> From: Yuan Fu <casouri@gmail.com>
> Date: Mon, 31 Oct 2022 17:25:01 -0700
> Cc: João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com>, 
>  emacs-devel@gnu.org
> 
> > Curiously, as soon as I press C-l, the text gets fontified with the
> > string face.
> 
> I think that’s just jit-lock & redisplay doing funny stuff. If you set treesit--font-lock-verbose to t, it should log that appropriate faces are applied when you insert the quote.

Which means you don't play by redisplay's rules in this case.  When
Lisp code changes faces, it should remove the 'fontified' property
from the text where faces changed.  Does your code do that?



^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-01  7:13                 ` Eli Zaretskii
@ 2022-11-01  8:35                   ` Yuan Fu
  2022-11-01  9:23                     ` Eli Zaretskii
  0 siblings, 1 reply; 53+ messages in thread
From: Yuan Fu @ 2022-11-01  8:35 UTC (permalink / raw)
  To: Eli Zaretskii
  Cc: Daniel Martín, João Paulo Labegalini de Carvalho,
	emacs-devel



> On Nov 1, 2022, at 12:13 AM, Eli Zaretskii <eliz@gnu.org> wrote:
> 
>> From: Yuan Fu <casouri@gmail.com>
>> Date: Mon, 31 Oct 2022 17:25:01 -0700
>> Cc: João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com>, 
>> emacs-devel@gnu.org
>> 
>>> Curiously, as soon as I press C-l, the text gets fontified with the
>>> string face.
>> 
>> I think that’s just jit-lock & redisplay doing funny stuff. If you set treesit--font-lock-verbose to t, it should log that appropriate faces are applied when you insert the quote.
> 
> Which means you don't play by redisplay's rules in this case.  When
> Lisp code changes faces, it should remove the 'fontified' property
> from the text where faces changed.  Does your code do that?

I thought 'fontified should be set to t, and it is done by jit-lock? I also thought changing the ‘face property should be enough to trigger redisplay, is that true?

Yuan


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-01  3:38                 ` Stefan Monnier
@ 2022-11-01  8:37                   ` Yuan Fu
  0 siblings, 0 replies; 53+ messages in thread
From: Yuan Fu @ 2022-11-01  8:37 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: João Paulo Labegalini de Carvalho, emacs-devel



> On Oct 31, 2022, at 8:38 PM, Stefan Monnier <monnier@iro.umontreal.ca> wrote:
> 
>> You probably don’t need to set enabled features, because you can set
>> maximum-decoration to a high value and specify what to disable.
>> And maximum-decoration is infinity by default.
> 
> I thought it would be nice to let people choose an approximate baseline
> by setting their "decoration level" and then tweak things by *either*
> adding or removing elements from it.
> 
> E.g. I generally like my fontification to be quite subdued so I'd like
> choose a low baseline decoration level, but I might want to add one or
> two things to it in some modes.

Done. I reused treesit-font-lock-recompute-features:

(add-hook 'xxx-mode-hook
          (lambda ()
            (treesit-font-lock-recompute-features
             ‘(features-to-add...) '(features-to-remove...))))

Yuan


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-01  8:35                   ` Yuan Fu
@ 2022-11-01  9:23                     ` Eli Zaretskii
       [not found]                       ` <CAGjvy2_6BReOVjSqgTM57+h+Ycjdu1o1TKoQHf6q-ypnAX3=rA@mail.gmail.com>
  0 siblings, 1 reply; 53+ messages in thread
From: Eli Zaretskii @ 2022-11-01  9:23 UTC (permalink / raw)
  To: Yuan Fu; +Cc: mardani29, jaopaulolc, emacs-devel

> From: Yuan Fu <casouri@gmail.com>
> Date: Tue, 1 Nov 2022 01:35:25 -0700
> Cc: Daniel Martín <mardani29@yahoo.es>,
>  João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com>,
>  emacs-devel@gnu.org
> 
> 
> 
> > On Nov 1, 2022, at 12:13 AM, Eli Zaretskii <eliz@gnu.org> wrote:
> > 
> >> From: Yuan Fu <casouri@gmail.com>
> >> Date: Mon, 31 Oct 2022 17:25:01 -0700
> >> Cc: João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com>, 
> >> emacs-devel@gnu.org
> >> 
> >>> Curiously, as soon as I press C-l, the text gets fontified with the
> >>> string face.
> >> 
> >> I think that’s just jit-lock & redisplay doing funny stuff. If you set treesit--font-lock-verbose to t, it should log that appropriate faces are applied when you insert the quote.
> > 
> > Which means you don't play by redisplay's rules in this case.  When
> > Lisp code changes faces, it should remove the 'fontified' property
> > from the text where faces changed.  Does your code do that?
> 
> I thought 'fontified should be set to t, and it is done by jit-lock? I also thought changing the ‘face property should be enough to trigger redisplay, is that true?

It should, but it doesn't in your case: the display engine is for some
reason not aware that faces changed in some areas, so it doesn't
redisplay those areas.  You need to figure out why.  Or show the code
which does that, and someone else will help you understand that.



^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-10-27 22:01 Initial fontification in sh-mode with tree-sittter João Paulo Labegalini de Carvalho
                   ` (2 preceding siblings ...)
  2022-10-28 15:27 ` João Paulo Labegalini de Carvalho
@ 2022-11-02 18:22 ` João Paulo Labegalini de Carvalho
  2022-11-02 18:55   ` João Paulo Labegalini de Carvalho
  3 siblings, 1 reply; 53+ messages in thread
From: João Paulo Labegalini de Carvalho @ 2022-11-02 18:22 UTC (permalink / raw)
  To: emacs-devel


[-- Attachment #1.1: Type: text/plain, Size: 1790 bytes --]

Here is another version of the path. This has the queries separated into
language related terms and groups them into lists of features.

Looking forward to your feedback.

Thanks.

On Thu, Oct 27, 2022 at 4:01 PM João Paulo Labegalini de Carvalho <
jaopaulolc@gmail.com> wrote:

> Hi everyone,
>
> Please find the patch for enabling fontification in sh-mode (currently
> only for bash) using tree-sitter.
>
> I welcome all comments and suggestions to improve the patch.
>
> I noticed a weird behavior with heredocs. Take the code below:
>
> echo <<EOF
> This is a here document.
> EOF
> echo "Done."
>
> My patch correctly fontifies the code above, but if I kill the whole line
> with the "This is a here document." text, then the sh-heredoc face bleeds
> out and all the subsequent comments get fontified as part of the heredoc.
>
> A similar behavior happens if tree-sitter is not enabled, if the heredoc
> is empty then all subsequent commands are fontified as heredoc. However, as
> soon as anything is added to the heredoc, then everything goes back to the
> correct fontification.
>
> Such "refreshing" does not happen with tree-sitter enabled, but if I
> execute M-x sh-mode then the buffer gets refreshed and everything looks
> good.
>
> What am I doing wrong?
>
> --
> João Paulo L. de Carvalho
> Ph.D Computer Science |  IC-UNICAMP | Campinas , SP - Brazil
> Postdoctoral Research Fellow | University of Alberta | Edmonton, AB -
> Canada
> joao.carvalho@ic.unicamp.br
> joao.carvalho@ualberta.ca
>


-- 
João Paulo L. de Carvalho
Ph.D Computer Science |  IC-UNICAMP | Campinas , SP - Brazil
Postdoctoral Research Fellow | University of Alberta | Edmonton, AB - Canada
joao.carvalho@ic.unicamp.br
joao.carvalho@ualberta.ca

[-- Attachment #1.2: Type: text/html, Size: 2892 bytes --]

[-- Attachment #2: 0001-Initial-fontification-in-sh-mode-with-tree-sitter.patch --]
[-- Type: text/x-patch, Size: 7220 bytes --]

From a30903758c2c776fd7e1f03e8f71a5a12bc00862 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20P=2E=20L=2E=20de=20Carvalho?=
 <jaopaulolc@gmail.com>
Date: Thu, 27 Oct 2022 15:45:56 -0600
Subject: [PATCH] Initial fontification in sh-mode with tree-sitter

---
 lisp/progmodes/sh-script.el | 153 +++++++++++++++++++++++++++++++++---
 1 file changed, 142 insertions(+), 11 deletions(-)

diff --git a/lisp/progmodes/sh-script.el b/lisp/progmodes/sh-script.el
index 558b62b20a..f174ca7714 100644
--- a/lisp/progmodes/sh-script.el
+++ b/lisp/progmodes/sh-script.el
@@ -148,6 +148,7 @@
   (require 'let-alist)
   (require 'subr-x))
 (require 'executable)
+(require 'treesit)
 
 (autoload 'comint-completion-at-point "comint")
 (autoload 'comint-filename-completion "comint")
@@ -1534,13 +1535,6 @@ sh-mode
   ;; we can't look if previous line ended with `\'
   (setq-local comint-prompt-regexp "^[ \t]*")
   (setq-local imenu-case-fold-search nil)
-  (setq font-lock-defaults
-	`((sh-font-lock-keywords
-	   sh-font-lock-keywords-1 sh-font-lock-keywords-2)
-	  nil nil
-	  ((?/ . "w") (?~ . "w") (?. . "w") (?- . "w") (?_ . "w")) nil
-	  (font-lock-syntactic-face-function
-	   . ,#'sh-font-lock-syntactic-face-function)))
   (setq-local syntax-propertize-function #'sh-syntax-propertize-function)
   (add-hook 'syntax-propertize-extend-region-functions
             #'syntax-propertize-multiline 'append 'local)
@@ -1587,7 +1581,28 @@ sh-mode
    nil nil)
   (add-hook 'flymake-diagnostic-functions #'sh-shellcheck-flymake nil t)
   (add-hook 'hack-local-variables-hook
-    #'sh-after-hack-local-variables nil t))
+    #'sh-after-hack-local-variables nil t)
+
+  (cond
+   ;; Tree-sitter
+   ((treesit-ready-p 'sh-mode sh-shell)
+    (setq-local font-lock-keywords-only t)
+    (setq-local treesit-font-lock-feature-list
+                '((comments functions strings heredocs)
+                  (variables keywords commands decl-commands)
+                  (constants operators builtin-variables)))
+    (setq-local treesit-font-lock-settings
+                sh-mode--treesit-settings)
+    (treesit-major-mode-setup))
+   ;; Elisp.
+   (t
+    (setq font-lock-defaults
+          `((sh-font-lock-keywords
+             sh-font-lock-keywords-1 sh-font-lock-keywords-2)
+            nil nil
+            ((?/ . "w") (?~ . "w") (?. . "w") (?- . "w") (?_ . "w")) nil
+            (font-lock-syntactic-face-function
+             . ,#'sh-font-lock-syntactic-face-function))))))
 
 ;;;###autoload
 (defalias 'shell-script-mode 'sh-mode)
@@ -3191,6 +3206,122 @@ sh-shellcheck-flymake
       (process-send-region sh--shellcheck-process (point-min) (point-max))
       (process-send-eof sh--shellcheck-process))))
 
-(provide 'sh-script)
-
-;;; sh-script.el ends here
+;;; Tree-sitter font-lock
+
+(defvar sh-mode--treesit-operators
+  '("|" "|&" "||" "&&" ">" ">>" "<" "<<" "<<-" "<<<" "==" "!=" ";"
+    ";;" ";&" ";;&")
+  "List of `sh-mode' operator to fontify")
+
+(defvar sh-mode--treesit-keywords
+  '("case" "do" "done" "elif" "else" "esac" "export" "fi" "for"
+    "function" "if" "in" "unset" "while" "then")
+  "Minimal list of keywords that belong to tree-sitter-bash's grammar.
+
+Some reserved words are not recognize to keep the grammar
+simpler. Those are identified with regex-based filtered queries.
+
+See `sh-mode--treesit-other-keywords' and
+`sh-mode--treesit-settings').")
+
+(defun sh-mode--treesit-other-keywords ()
+  "Returns a list `others' of key/reserved words to be fontified with
+regex-based queries as they are not part of tree-sitter-bash's
+grammar.
+
+See `sh-mode--treesit-other-keywords' and
+`sh-mode--treesit-settings')."
+  (let ((minimal sh-mode--treesit-keywords)
+        (all (append (sh-feature sh-leading-keywords)
+                     (sh-feature sh-other-keywords)))
+        (others))
+    (dolist (keyword all others)
+      (if (not (member keyword minimal))
+          (setq others (cons keyword others))))))
+
+(defun sh-mode--treesit-fontify-decl-command-name (_beg _end node)
+  "Fontifies only the name of declaration_command nodes.
+
+This is used instead of `font-lock-builtion-face' directly because
+otherwise the whole command, including the variable assignment part,
+is fontified with with `font-lock-builtin-face'. An alternative to
+this would be to declaration_command nodes to have a `name:' field."
+  (let* ((maybe-decl-cmd (treesit-node-parent node))
+         (node-type (treesit-node-type maybe-decl-cmd)))
+    (when (string= node-type "declaration_command")
+      (let* ((name-node (car (treesit-node-children maybe-decl-cmd)))
+             (name-beg (treesit-node-start name-node))
+             (name-end (treesit-node-end name-node)))
+        (put-text-property name-beg
+                           name-end
+                           'face
+                           font-lock-builtin-face)))))
+
+(defvar sh-mode--treesit-settings
+  (treesit-font-lock-rules
+   :feature 'comments
+   :language sh-shell
+   '((comment) @font-lock-comment-face)
+   :feature 'functions
+   :language sh-shell
+   '((function_definition name: (word) @font-lock-function-name-face))
+   :feature 'strings
+   :language sh-shell
+   '([(string) (raw_string)] @font-lock-string-face)
+   :feature 'heredocs
+   :language sh-shell
+   '([(heredoc_start) (heredoc_body)] @sh-heredoc)
+   :feature 'variables
+   :language sh-shell
+   '((variable_name) @font-lock-variable-name-face)
+   :feature 'keywords
+   :language sh-shell
+   `(;; keywords
+     [ ,@sh-mode--treesit-keywords ] @font-lock-keyword-face
+     ;; reserved words
+     (command_name
+      ((word) @font-lock-keyword-face
+       (:match
+        ,(rx-to-string
+            `(seq bol
+                  (or ,@(sh-mode--treesit-other-keywords))
+                  eol))
+        @font-lock-keyword-face))))
+   :feature 'commands
+   :language sh-shell
+   `(;; function/non-builtin command calls
+     (command_name (word) @font-lock-function-name-face)
+     ;; builtin commands
+     (command_name
+      ((word) @font-lock-builtin-face
+       (:match ,(let ((builtins
+                       (sh-feature sh-builtins)))
+                  (rx-to-string
+                   `(seq bol
+                         (or ,@builtins)
+                         eol)))
+               @font-lock-builtin-face))))
+   :feature 'decl-commands
+   :language sh-shell
+   '(;; declaration commands
+     (declaration_command _ @sh-mode--treesit-fontify-command-name))
+   :feature 'constants
+   :language sh-shell
+   '((case_item value: (word) @font-lock-constant-face)
+     (file_descriptor) @font-lock-constant-face)
+   :feature 'operators
+   :language sh-shell
+   `([ ,@sh-mode--treesit-operators ] @font-lock-builtin-face)
+   :feature 'builtin-variables
+   :language sh-shell
+   `(((special_variable_name) @font-lock-builtin-face
+      (:match ,(let ((builtin-vars (sh-feature sh-variables)))
+                 (rx-to-string
+                  `(seq bol
+                        (or ,@builtin-vars)
+                        eol)))
+              @font-lock-builtin-face))))
+  "Tree-sitter font-lock settings for `sh-mode'.")
+
+(provide 'sh-mode)
+;;; sh-mode.el ends here
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-02 18:22 ` João Paulo Labegalini de Carvalho
@ 2022-11-02 18:55   ` João Paulo Labegalini de Carvalho
  2022-11-12 12:47     ` Eli Zaretskii
  0 siblings, 1 reply; 53+ messages in thread
From: João Paulo Labegalini de Carvalho @ 2022-11-02 18:55 UTC (permalink / raw)
  To: emacs-devel


[-- Attachment #1.1: Type: text/plain, Size: 2297 bytes --]

I missed the update that adds override as the last argument of the capture
function. Fixed now.

On Wed, Nov 2, 2022 at 12:22 PM João Paulo Labegalini de Carvalho <
jaopaulolc@gmail.com> wrote:

> Here is another version of the path. This has the queries separated into
> language related terms and groups them into lists of features.
>
> Looking forward to your feedback.
>
> Thanks.
>
> On Thu, Oct 27, 2022 at 4:01 PM João Paulo Labegalini de Carvalho <
> jaopaulolc@gmail.com> wrote:
>
>> Hi everyone,
>>
>> Please find the patch for enabling fontification in sh-mode (currently
>> only for bash) using tree-sitter.
>>
>> I welcome all comments and suggestions to improve the patch.
>>
>> I noticed a weird behavior with heredocs. Take the code below:
>>
>> echo <<EOF
>> This is a here document.
>> EOF
>> echo "Done."
>>
>> My patch correctly fontifies the code above, but if I kill the whole line
>> with the "This is a here document." text, then the sh-heredoc face bleeds
>> out and all the subsequent comments get fontified as part of the heredoc.
>>
>> A similar behavior happens if tree-sitter is not enabled, if the heredoc
>> is empty then all subsequent commands are fontified as heredoc. However, as
>> soon as anything is added to the heredoc, then everything goes back to the
>> correct fontification.
>>
>> Such "refreshing" does not happen with tree-sitter enabled, but if I
>> execute M-x sh-mode then the buffer gets refreshed and everything looks
>> good.
>>
>> What am I doing wrong?
>>
>> --
>> João Paulo L. de Carvalho
>> Ph.D Computer Science |  IC-UNICAMP | Campinas , SP - Brazil
>> Postdoctoral Research Fellow | University of Alberta | Edmonton, AB -
>> Canada
>> joao.carvalho@ic.unicamp.br
>> joao.carvalho@ualberta.ca
>>
>
>
> --
> João Paulo L. de Carvalho
> Ph.D Computer Science |  IC-UNICAMP | Campinas , SP - Brazil
> Postdoctoral Research Fellow | University of Alberta | Edmonton, AB -
> Canada
> joao.carvalho@ic.unicamp.br
> joao.carvalho@ualberta.ca
>


-- 
João Paulo L. de Carvalho
Ph.D Computer Science |  IC-UNICAMP | Campinas , SP - Brazil
Postdoctoral Research Fellow | University of Alberta | Edmonton, AB - Canada
joao.carvalho@ic.unicamp.br
joao.carvalho@ualberta.ca

[-- Attachment #1.2: Type: text/html, Size: 3901 bytes --]

[-- Attachment #2: 0001-Initial-fontification-in-sh-mode-with-tree-sitter.patch --]
[-- Type: text/x-patch, Size: 7220 bytes --]

From a30903758c2c776fd7e1f03e8f71a5a12bc00862 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20P=2E=20L=2E=20de=20Carvalho?=
 <jaopaulolc@gmail.com>
Date: Thu, 27 Oct 2022 15:45:56 -0600
Subject: [PATCH] Initial fontification in sh-mode with tree-sitter

---
 lisp/progmodes/sh-script.el | 153 +++++++++++++++++++++++++++++++++---
 1 file changed, 142 insertions(+), 11 deletions(-)

diff --git a/lisp/progmodes/sh-script.el b/lisp/progmodes/sh-script.el
index 558b62b20a..f174ca7714 100644
--- a/lisp/progmodes/sh-script.el
+++ b/lisp/progmodes/sh-script.el
@@ -148,6 +148,7 @@
   (require 'let-alist)
   (require 'subr-x))
 (require 'executable)
+(require 'treesit)
 
 (autoload 'comint-completion-at-point "comint")
 (autoload 'comint-filename-completion "comint")
@@ -1534,13 +1535,6 @@ sh-mode
   ;; we can't look if previous line ended with `\'
   (setq-local comint-prompt-regexp "^[ \t]*")
   (setq-local imenu-case-fold-search nil)
-  (setq font-lock-defaults
-	`((sh-font-lock-keywords
-	   sh-font-lock-keywords-1 sh-font-lock-keywords-2)
-	  nil nil
-	  ((?/ . "w") (?~ . "w") (?. . "w") (?- . "w") (?_ . "w")) nil
-	  (font-lock-syntactic-face-function
-	   . ,#'sh-font-lock-syntactic-face-function)))
   (setq-local syntax-propertize-function #'sh-syntax-propertize-function)
   (add-hook 'syntax-propertize-extend-region-functions
             #'syntax-propertize-multiline 'append 'local)
@@ -1587,7 +1581,28 @@ sh-mode
    nil nil)
   (add-hook 'flymake-diagnostic-functions #'sh-shellcheck-flymake nil t)
   (add-hook 'hack-local-variables-hook
-    #'sh-after-hack-local-variables nil t))
+    #'sh-after-hack-local-variables nil t)
+
+  (cond
+   ;; Tree-sitter
+   ((treesit-ready-p 'sh-mode sh-shell)
+    (setq-local font-lock-keywords-only t)
+    (setq-local treesit-font-lock-feature-list
+                '((comments functions strings heredocs)
+                  (variables keywords commands decl-commands)
+                  (constants operators builtin-variables)))
+    (setq-local treesit-font-lock-settings
+                sh-mode--treesit-settings)
+    (treesit-major-mode-setup))
+   ;; Elisp.
+   (t
+    (setq font-lock-defaults
+          `((sh-font-lock-keywords
+             sh-font-lock-keywords-1 sh-font-lock-keywords-2)
+            nil nil
+            ((?/ . "w") (?~ . "w") (?. . "w") (?- . "w") (?_ . "w")) nil
+            (font-lock-syntactic-face-function
+             . ,#'sh-font-lock-syntactic-face-function))))))
 
 ;;;###autoload
 (defalias 'shell-script-mode 'sh-mode)
@@ -3191,6 +3206,122 @@ sh-shellcheck-flymake
       (process-send-region sh--shellcheck-process (point-min) (point-max))
       (process-send-eof sh--shellcheck-process))))
 
-(provide 'sh-script)
-
-;;; sh-script.el ends here
+;;; Tree-sitter font-lock
+
+(defvar sh-mode--treesit-operators
+  '("|" "|&" "||" "&&" ">" ">>" "<" "<<" "<<-" "<<<" "==" "!=" ";"
+    ";;" ";&" ";;&")
+  "List of `sh-mode' operator to fontify")
+
+(defvar sh-mode--treesit-keywords
+  '("case" "do" "done" "elif" "else" "esac" "export" "fi" "for"
+    "function" "if" "in" "unset" "while" "then")
+  "Minimal list of keywords that belong to tree-sitter-bash's grammar.
+
+Some reserved words are not recognize to keep the grammar
+simpler. Those are identified with regex-based filtered queries.
+
+See `sh-mode--treesit-other-keywords' and
+`sh-mode--treesit-settings').")
+
+(defun sh-mode--treesit-other-keywords ()
+  "Returns a list `others' of key/reserved words to be fontified with
+regex-based queries as they are not part of tree-sitter-bash's
+grammar.
+
+See `sh-mode--treesit-other-keywords' and
+`sh-mode--treesit-settings')."
+  (let ((minimal sh-mode--treesit-keywords)
+        (all (append (sh-feature sh-leading-keywords)
+                     (sh-feature sh-other-keywords)))
+        (others))
+    (dolist (keyword all others)
+      (if (not (member keyword minimal))
+          (setq others (cons keyword others))))))
+
+(defun sh-mode--treesit-fontify-decl-command-name (_beg _end node)
+  "Fontifies only the name of declaration_command nodes.
+
+This is used instead of `font-lock-builtion-face' directly because
+otherwise the whole command, including the variable assignment part,
+is fontified with with `font-lock-builtin-face'. An alternative to
+this would be to declaration_command nodes to have a `name:' field."
+  (let* ((maybe-decl-cmd (treesit-node-parent node))
+         (node-type (treesit-node-type maybe-decl-cmd)))
+    (when (string= node-type "declaration_command")
+      (let* ((name-node (car (treesit-node-children maybe-decl-cmd)))
+             (name-beg (treesit-node-start name-node))
+             (name-end (treesit-node-end name-node)))
+        (put-text-property name-beg
+                           name-end
+                           'face
+                           font-lock-builtin-face)))))
+
+(defvar sh-mode--treesit-settings
+  (treesit-font-lock-rules
+   :feature 'comments
+   :language sh-shell
+   '((comment) @font-lock-comment-face)
+   :feature 'functions
+   :language sh-shell
+   '((function_definition name: (word) @font-lock-function-name-face))
+   :feature 'strings
+   :language sh-shell
+   '([(string) (raw_string)] @font-lock-string-face)
+   :feature 'heredocs
+   :language sh-shell
+   '([(heredoc_start) (heredoc_body)] @sh-heredoc)
+   :feature 'variables
+   :language sh-shell
+   '((variable_name) @font-lock-variable-name-face)
+   :feature 'keywords
+   :language sh-shell
+   `(;; keywords
+     [ ,@sh-mode--treesit-keywords ] @font-lock-keyword-face
+     ;; reserved words
+     (command_name
+      ((word) @font-lock-keyword-face
+       (:match
+        ,(rx-to-string
+            `(seq bol
+                  (or ,@(sh-mode--treesit-other-keywords))
+                  eol))
+        @font-lock-keyword-face))))
+   :feature 'commands
+   :language sh-shell
+   `(;; function/non-builtin command calls
+     (command_name (word) @font-lock-function-name-face)
+     ;; builtin commands
+     (command_name
+      ((word) @font-lock-builtin-face
+       (:match ,(let ((builtins
+                       (sh-feature sh-builtins)))
+                  (rx-to-string
+                   `(seq bol
+                         (or ,@builtins)
+                         eol)))
+               @font-lock-builtin-face))))
+   :feature 'decl-commands
+   :language sh-shell
+   '(;; declaration commands
+     (declaration_command _ @sh-mode--treesit-fontify-command-name))
+   :feature 'constants
+   :language sh-shell
+   '((case_item value: (word) @font-lock-constant-face)
+     (file_descriptor) @font-lock-constant-face)
+   :feature 'operators
+   :language sh-shell
+   `([ ,@sh-mode--treesit-operators ] @font-lock-builtin-face)
+   :feature 'builtin-variables
+   :language sh-shell
+   `(((special_variable_name) @font-lock-builtin-face
+      (:match ,(let ((builtin-vars (sh-feature sh-variables)))
+                 (rx-to-string
+                  `(seq bol
+                        (or ,@builtin-vars)
+                        eol)))
+              @font-lock-builtin-face))))
+  "Tree-sitter font-lock settings for `sh-mode'.")
+
+(provide 'sh-mode)
+;;; sh-mode.el ends here
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
       [not found]                       ` <CAGjvy2_6BReOVjSqgTM57+h+Ycjdu1o1TKoQHf6q-ypnAX3=rA@mail.gmail.com>
@ 2022-11-02 19:17                         ` Eli Zaretskii
  2022-11-03  1:25                           ` Yuan Fu
  0 siblings, 1 reply; 53+ messages in thread
From: Eli Zaretskii @ 2022-11-02 19:17 UTC (permalink / raw)
  To: João Paulo Labegalini de Carvalho, Yuan Fu; +Cc: emacs-devel

> From: João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com>
> Date: Wed, 2 Nov 2022 12:34:56 -0600
> 
> Yes, the fontification in the buffer is still not updated correctly.
> 
> With the step below, the definition of function foo is not re-fontified and remains with string-face until C-x x f is
> executed.
> 
> emacs -Q from top of the feature/tree-sitter branch
> M-: (require 'treesit)
> M-x customize-variable treesit-settings RET
> Set "Activate" to "Yes" and apply the change.
> C-x b sample.py RET
> M-x python-mode
> Write the following program:
> 
> def main():
>     return 0
> 
> M-<
> """
> M->
> """ (at this point everything is in string-face, as expected)
> C-a
> C-k (everything is still fontified as string)
> C-x x f (leading """ is not fontified and definition of foo is correctly fontified)
> 
> I am interested in understanding what is causing this as a similar thing happens with heredocs in sh-mode &
> tree-sitter.

Yuan, can you look into this?  It sounds like some general problem
with integration of tree-sitter with jit-lock.  Or maybe something is
missing from the way tree-sitter nodes are mapped into face text
properties.  Are the faces actually being put on the relevant text,
for starters?



^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [SPAM UNSURE] Re: Initial fontification in sh-mode with tree-sittter
  2022-10-31 21:56           ` Yuan Fu
  2022-11-01  0:09             ` Daniel Martín
@ 2022-11-02 20:37             ` Stephen Leake
  1 sibling, 0 replies; 53+ messages in thread
From: Stephen Leake @ 2022-11-02 20:37 UTC (permalink / raw)
  To: Yuan Fu
  Cc: Daniel Martín, João Paulo Labegalini de Carvalho,
	emacs-devel

Yuan Fu <casouri@gmail.com> writes:

> I think I’ve fixed this problem. It is mainly due to how the
> fortifications rule is written. When the starting quotes are inserted,
> normal syntactic font-lock will mark everything behind it in
> string-face, but for tree-sitter, since the source is incomplete, the
> quotes are in an error node in the parse tree and there is no string
> node, so tree-sitter rules can’t capture any string node, so no string
> face is applied. And when the ending quotes are inserted, the only
> region jit-lock wants tree-sitter to fontify is that three quote, so
> again the whole string is not captured. 

I'd say this is a bug in tree-sitter itself (probably in edit-tree). I'm
still fixing similar bugs in the wisi incremental parser.

> I changed the font-lock rule to match the ending quote rather than the
> whole string, and now it’s working fine.

A reasonable workaround, but perhaps submit a bug report to tree-sitter
github?

> Now it is actually better than before: If you insert an open quote,
> the rest of the buffer will not be marked in string face, instead, and
> when you insert the ending quote, the string is fontified correctly.

People's notion of "better" will differ here. I'd say it is at best
acceptable.

Best would be to highlight that initial single quote in some
error face (bright red). The user could be intending that as the end
quote, so declaring everything after it to be a string is just wrong.

-- 
-- Stephe



^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-02 19:17                         ` Eli Zaretskii
@ 2022-11-03  1:25                           ` Yuan Fu
  2022-11-03  6:36                             ` Eli Zaretskii
  2022-11-03 16:08                             ` João Paulo Labegalini de Carvalho
  0 siblings, 2 replies; 53+ messages in thread
From: Yuan Fu @ 2022-11-03  1:25 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: João Paulo Labegalini de Carvalho, emacs-devel



> On Nov 2, 2022, at 12:17 PM, Eli Zaretskii <eliz@gnu.org> wrote:
> 
>> From: João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com>
>> Date: Wed, 2 Nov 2022 12:34:56 -0600
>> 
>> Yes, the fontification in the buffer is still not updated correctly.
>> 
>> With the step below, the definition of function foo is not re-fontified and remains with string-face until C-x x f is
>> executed.
>> 
>> emacs -Q from top of the feature/tree-sitter branch
>> M-: (require 'treesit)
>> M-x customize-variable treesit-settings RET
>> Set "Activate" to "Yes" and apply the change.
>> C-x b sample.py RET
>> M-x python-mode
>> Write the following program:
>> 
>> def main():
>>    return 0
>> 
>> M-<
>> """
>> M->
>> """ (at this point everything is in string-face, as expected)
>> C-a
>> C-k (everything is still fontified as string)
>> C-x x f (leading """ is not fontified and definition of foo is correctly fontified)
>> 
>> I am interested in understanding what is causing this as a similar thing happens with heredocs in sh-mode &
>> tree-sitter.
> 
> Yuan, can you look into this?  It sounds like some general problem
> with integration of tree-sitter with jit-lock.  

Yeah, this is what I’ve been working in the past two days. I just pushed a change f331be1f074d68e7e5cdbac324419e07c186492a which should fix it. [ I just saw that I missed a function in the commit message, I guess I can’t fix it now :-( ]

If you are interested to know what’s going on:

The problem is as I described earlier, when the user inserts the starting quote (“””), the parse tree is incomplete and there is no string node. Only when the user inserts the ending quote is there now a string node to be captured by the fontification rules. 

For example, in this snippet there are two regions A and B

"""          ---+
def main():     | Region A
    return 0 ---+
             ---+ 
"""             | Region B
             ---+

when user inserts the “”” in B, and jit-lock fontifies region B, it captures the string node and the part of the string in region A needs to be updated. If we fontify the whole string in string face, redisplay does not immediately reflect the change in region A (maybe due to some optimization? jit-lock is definitely aware of this, see jit-lock-force-redisplay).

Redisplay not reflecting the change in face is just the surface problem, and can be fixed by setting fontified to t on region A, which seems to trigger redisplay to reflect changes immediately (this is what jit-lock-force-redisplay does). The deeper problem is, if there is some regex-based-font-lock face in region A (applied when Emacs fontified region A), eg, highlighted TODO keywords in a docstring, they will be overwritten by the string face, if we just apply string face to the whole string and trigger redisplay.

Maybe we can apply string face and re-run regex-based font-lock on the whole string, but that works against jit-lock. If the string is long regexp-font-lock might take a long time.

What I ended up doing is to set jit-lock-context-unfontify-pos to the beginning of the string node (aka beginning of region A). Then in a timer jit-lock-context will refontify everything after that position. And I have some measure to break possible infinite recursion (fontify region -> set jit-lock-context-unfontify-pos -> cause refontification -> fontify region -> …).

The alternative, where we put everything in string face when we see an opening “””, is not really feasible. It’s kind of feasible in python, where an opening “”” alone looks like (ERROR “””), aka the quote node inside an error node. But if you insert /* in javascript, you get (ERROR “/“) and (ERROR (regexp_pattern))—tree-sitter can’t tell that they are opening comment (which is fair). Plus this approach requires non-trivial involvement from each major mode: each major mode needs to somehow tell Emacs what is a “potential opening comment/string”.

> Or maybe something is
> missing from the way tree-sitter nodes are mapped into face text
> properties.  Are the faces actually being put on the relevant text,
> for starters?

They are. Fortunately this is not the problem.

Yuan




^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-03  1:25                           ` Yuan Fu
@ 2022-11-03  6:36                             ` Eli Zaretskii
  2022-11-03  7:16                               ` Yuan Fu
  2022-11-03 16:08                             ` João Paulo Labegalini de Carvalho
  1 sibling, 1 reply; 53+ messages in thread
From: Eli Zaretskii @ 2022-11-03  6:36 UTC (permalink / raw)
  To: Yuan Fu; +Cc: jaopaulolc, emacs-devel

> From: Yuan Fu <casouri@gmail.com>
> Date: Wed, 2 Nov 2022 18:25:13 -0700
> Cc: João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com>,
>  emacs-devel@gnu.org
> 
> What I ended up doing is to set jit-lock-context-unfontify-pos to the beginning of the string node (aka beginning of region A). Then in a timer jit-lock-context will refontify everything after that position. And I have some measure to break possible infinite recursion (fontify region -> set jit-lock-context-unfontify-pos -> cause refontification -> fontify region -> …).

OK, but beware of the case where region A is very large.  Could
performance suffer in such cases?

Thanks.



^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-03  6:36                             ` Eli Zaretskii
@ 2022-11-03  7:16                               ` Yuan Fu
  0 siblings, 0 replies; 53+ messages in thread
From: Yuan Fu @ 2022-11-03  7:16 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: jaopaulolc, emacs-devel



> On Nov 2, 2022, at 11:36 PM, Eli Zaretskii <eliz@gnu.org> wrote:
> 
>> From: Yuan Fu <casouri@gmail.com>
>> Date: Wed, 2 Nov 2022 18:25:13 -0700
>> Cc: João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com>,
>> emacs-devel@gnu.org
>> 
>> What I ended up doing is to set jit-lock-context-unfontify-pos to the beginning of the string node (aka beginning of region A). Then in a timer jit-lock-context will refontify everything after that position. And I have some measure to break possible infinite recursion (fontify region -> set jit-lock-context-unfontify-pos -> cause refontification -> fontify region -> …).
> 
> OK, but beware of the case where region A is very large.  Could
> performance suffer in such cases?

That’s no problem, region A is still jit-locked, it’s only refortified when displayed.

Yuan


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-03  1:25                           ` Yuan Fu
  2022-11-03  6:36                             ` Eli Zaretskii
@ 2022-11-03 16:08                             ` João Paulo Labegalini de Carvalho
  2022-11-03 19:12                               ` Yuan Fu
  1 sibling, 1 reply; 53+ messages in thread
From: João Paulo Labegalini de Carvalho @ 2022-11-03 16:08 UTC (permalink / raw)
  To: Yuan Fu; +Cc: Eli Zaretskii, emacs-devel

[-- Attachment #1: Type: text/plain, Size: 1385 bytes --]

Thanks for working on this.

The deeper problem is, if there is some regex-based-font-lock face in
> region A (applied when Emacs fontified region A), eg, highlighted TODO
> keywords in a docstring, they will be overwritten by the string face, if we
> just apply string face to the whole string and trigger redisplay.
>

I still need to get familiar with the parts of the code involved. But can
we avoid this (potentially) expensive re-fontification by keeping a list of
text properties/fonts applied to a region? Because if the region was not
changed, aside from adding/removing comments, then it would not need to be
re-parsed.


> What I ended up doing is to set jit-lock-context-unfontify-pos to the
> beginning of the string node (aka beginning of region A). Then in a timer
> jit-lock-context will refontify everything after that position. And I have
> some measure to break possible infinite recursion (fontify region -> set
> jit-lock-context-unfontify-pos -> cause refontification -> fontify region
> -> …).
>

I still need to look at your fix more carefully as a similar issue exists
for sh-mode and heredoc strings.

-- 
João Paulo L. de Carvalho
Ph.D Computer Science |  IC-UNICAMP | Campinas , SP - Brazil
Postdoctoral Research Fellow | University of Alberta | Edmonton, AB - Canada
joao.carvalho@ic.unicamp.br
joao.carvalho@ualberta.ca

[-- Attachment #2: Type: text/html, Size: 2055 bytes --]

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-03 16:08                             ` João Paulo Labegalini de Carvalho
@ 2022-11-03 19:12                               ` Yuan Fu
  2022-11-04 20:44                                 ` João Paulo Labegalini de Carvalho
  0 siblings, 1 reply; 53+ messages in thread
From: Yuan Fu @ 2022-11-03 19:12 UTC (permalink / raw)
  To: João Paulo Labegalini de Carvalho; +Cc: Eli Zaretskii, emacs-devel



> On Nov 3, 2022, at 9:08 AM, João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com> wrote:
> 
> Thanks for working on this.
> 
>> The deeper problem is, if there is some regex-based-font-lock face in region A (applied when Emacs fontified region A), eg, highlighted TODO keywords in a docstring, they will be overwritten by the string face, if we just apply string face to the whole string and trigger redisplay.
> 
> I still need to get familiar with the parts of the code involved. But can we avoid this (potentially) expensive re-fontification by keeping a list of text properties/fonts applied to a region? Because if the region was not changed, aside from adding/removing comments, then it would not need to be re-parsed.

This shouldn’t be expensive, and most work is delayed until the region is actually displayed. I wouldn’t worry about it. 

>  
>> What I ended up doing is to set jit-lock-context-unfontify-pos to the beginning of the string node (aka beginning of region A). Then in a timer jit-lock-context will refontify everything after that position. And I have some measure to break possible infinite recursion (fontify region -> set jit-lock-context-unfontify-pos -> cause refontification -> fontify region -> …).
> 
> I still need to look at your fix more carefully as a similar issue exists for sh-mode and heredoc strings.

All you need to do is capture these contextual nodes in a special name “contextual”.

(heredoc_body) @contextual
(string) @contextual

This should take care of updating faces. But I don’t know about the bleeding you described in the very beginning. Do you still see it? Is there a recipe to reproduce it?

Yuan




^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-03 19:12                               ` Yuan Fu
@ 2022-11-04 20:44                                 ` João Paulo Labegalini de Carvalho
  2022-11-04 22:50                                   ` Yuan Fu
  0 siblings, 1 reply; 53+ messages in thread
From: João Paulo Labegalini de Carvalho @ 2022-11-04 20:44 UTC (permalink / raw)
  To: Yuan Fu; +Cc: Eli Zaretskii, emacs-devel


[-- Attachment #1.1: Type: text/plain, Size: 1645 bytes --]

>
> All you need to do is capture these contextual nodes in a special name
> “contextual”.
>
> (heredoc_body) @contextual
> (string) @contextual
>

I see. I'll give that a try.


> But I don’t know about the bleeding you described in the very beginning.
> Do you still see it? Is there a recipe to reproduce it?
>

It might just be because of how the tree-sitter-bash grammar defines
heredoc strings.

This are the steps to reproduce the issue on sh-mode:

;; build emacs from head of feature/tree-sitter branch
;; apply the attached patch.
;; launch emacs with: emacs -nw -Q
;;Write the forms bellow on *scratch* buffer)

(require 'treesit)
(add-to-list 'treesit-settings '(sh-mode t t))
(find-file "/tmp/heredoc-issue.sh")

;; contents of /tmp/heredoc-issue.sh file (also attached)

#!/usr/bin/env bash

cat <<EOF
heredoc string
EOF
echo "<<HELLO>>"

;; Then execute the commands below

ESC <                   ;; beginning-of-buffer
C-s                     ;; isearch-forward
heredoc                 ;; self-insert-command * 7
RET                     ;; newline
C-a                     ;; move-beginning-of-line
C-k                     ;; kill-line
C-n                     ;; next-line (At this point the echo command after
EOF is fontified as heredoc string)
C-_                     ;; undo (After a short delay, the whole buffer is
correctly fontified)

-- 
João Paulo L. de Carvalho
Ph.D Computer Science |  IC-UNICAMP | Campinas , SP - Brazil
Postdoctoral Research Fellow | University of Alberta | Edmonton, AB - Canada
joao.carvalho@ic.unicamp.br
joao.carvalho@ualberta.ca

[-- Attachment #1.2: Type: text/html, Size: 2513 bytes --]

[-- Attachment #2: 0001-Initial-fontification-in-sh-mode-with-tree-sitter.patch --]
[-- Type: text/x-patch, Size: 7226 bytes --]

From d52f449bfa5cc7265170b1ebebe4581aa31ac539 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20P=2E=20L=2E=20de=20Carvalho?=
 <jaopaulolc@gmail.com>
Date: Thu, 27 Oct 2022 15:45:56 -0600
Subject: [PATCH] Initial fontification in sh-mode with tree-sitter

---
 lisp/progmodes/sh-script.el | 153 +++++++++++++++++++++++++++++++++---
 1 file changed, 142 insertions(+), 11 deletions(-)

diff --git a/lisp/progmodes/sh-script.el b/lisp/progmodes/sh-script.el
index 558b62b20a..17f938e95a 100644
--- a/lisp/progmodes/sh-script.el
+++ b/lisp/progmodes/sh-script.el
@@ -148,6 +148,7 @@
   (require 'let-alist)
   (require 'subr-x))
 (require 'executable)
+(require 'treesit)
 
 (autoload 'comint-completion-at-point "comint")
 (autoload 'comint-filename-completion "comint")
@@ -1534,13 +1535,6 @@ sh-mode
   ;; we can't look if previous line ended with `\'
   (setq-local comint-prompt-regexp "^[ \t]*")
   (setq-local imenu-case-fold-search nil)
-  (setq font-lock-defaults
-	`((sh-font-lock-keywords
-	   sh-font-lock-keywords-1 sh-font-lock-keywords-2)
-	  nil nil
-	  ((?/ . "w") (?~ . "w") (?. . "w") (?- . "w") (?_ . "w")) nil
-	  (font-lock-syntactic-face-function
-	   . ,#'sh-font-lock-syntactic-face-function)))
   (setq-local syntax-propertize-function #'sh-syntax-propertize-function)
   (add-hook 'syntax-propertize-extend-region-functions
             #'syntax-propertize-multiline 'append 'local)
@@ -1587,7 +1581,28 @@ sh-mode
    nil nil)
   (add-hook 'flymake-diagnostic-functions #'sh-shellcheck-flymake nil t)
   (add-hook 'hack-local-variables-hook
-    #'sh-after-hack-local-variables nil t))
+    #'sh-after-hack-local-variables nil t)
+
+  (cond
+   ;; Tree-sitter
+   ((treesit-ready-p 'sh-mode sh-shell)
+    (setq-local font-lock-keywords-only t)
+    (setq-local treesit-font-lock-feature-list
+                '((comments functions strings heredocs)
+                  (variables keywords commands decl-commands)
+                  (constants operators builtin-variables)))
+    (setq-local treesit-font-lock-settings
+                sh-mode--treesit-settings)
+    (treesit-major-mode-setup))
+   ;; Elisp.
+   (t
+    (setq font-lock-defaults
+          `((sh-font-lock-keywords
+             sh-font-lock-keywords-1 sh-font-lock-keywords-2)
+            nil nil
+            ((?/ . "w") (?~ . "w") (?. . "w") (?- . "w") (?_ . "w")) nil
+            (font-lock-syntactic-face-function
+             . ,#'sh-font-lock-syntactic-face-function))))))
 
 ;;;###autoload
 (defalias 'shell-script-mode 'sh-mode)
@@ -3191,6 +3206,122 @@ sh-shellcheck-flymake
       (process-send-region sh--shellcheck-process (point-min) (point-max))
       (process-send-eof sh--shellcheck-process))))
 
-(provide 'sh-script)
-
-;;; sh-script.el ends here
+;;; Tree-sitter font-lock
+
+(defvar sh-mode--treesit-operators
+  '("|" "|&" "||" "&&" ">" ">>" "<" "<<" "<<-" "<<<" "==" "!=" ";"
+    ";;" ";&" ";;&")
+  "List of `sh-mode' operator to fontify")
+
+(defvar sh-mode--treesit-keywords
+  '("case" "do" "done" "elif" "else" "esac" "export" "fi" "for"
+    "function" "if" "in" "unset" "while" "then")
+  "Minimal list of keywords that belong to tree-sitter-bash's grammar.
+
+Some reserved words are not recognize to keep the grammar
+simpler. Those are identified with regex-based filtered queries.
+
+See `sh-mode--treesit-other-keywords' and
+`sh-mode--treesit-settings').")
+
+(defun sh-mode--treesit-other-keywords ()
+  "Returns a list `others' of key/reserved words to be fontified with
+regex-based queries as they are not part of tree-sitter-bash's
+grammar.
+
+See `sh-mode--treesit-other-keywords' and
+`sh-mode--treesit-settings')."
+  (let ((minimal sh-mode--treesit-keywords)
+        (all (append (sh-feature sh-leading-keywords)
+                     (sh-feature sh-other-keywords)))
+        (others))
+    (dolist (keyword all others)
+      (if (not (member keyword minimal))
+          (setq others (cons keyword others))))))
+
+(defun sh-mode--treesit-fontify-decl-command (node override _start _end)
+  "Fontifies only the name of declaration_command nodes.
+
+This is used instead of `font-lock-builtion-face' directly because
+otherwise the whole command, including the variable assignment part,
+is fontified with with `font-lock-builtin-face'. An alternative to
+this would be to declaration_command nodes to have a `name:' field."
+  (let* ((maybe-decl-cmd (treesit-node-parent node))
+         (node-type (treesit-node-type maybe-decl-cmd)))
+    (when (string= node-type "declaration_command")
+      (let* ((name-node (car (treesit-node-children maybe-decl-cmd)))
+             (name-beg (treesit-node-start name-node))
+             (name-end (treesit-node-end name-node)))
+        (put-text-property name-beg
+                           name-end
+                           'face
+                           font-lock-builtin-face)))))
+
+(defvar sh-mode--treesit-settings
+  (treesit-font-lock-rules
+   :feature 'comments
+   :language sh-shell
+   '((comment) @font-lock-comment-face)
+   :feature 'functions
+   :language sh-shell
+   '((function_definition name: (word) @font-lock-function-name-face))
+   :feature 'strings
+   :language sh-shell
+   '([(string) (raw_string)] @font-lock-string-face)
+   :feature 'heredocs
+   :language sh-shell
+   '([(heredoc_start) (heredoc_body)] @sh-heredoc)
+   :feature 'variables
+   :language sh-shell
+   '((variable_name) @font-lock-variable-name-face)
+   :feature 'keywords
+   :language sh-shell
+   `(;; keywords
+     [ ,@sh-mode--treesit-keywords ] @font-lock-keyword-face
+     ;; reserved words
+     (command_name
+      ((word) @font-lock-keyword-face
+       (:match
+        ,(rx-to-string
+            `(seq bol
+                  (or ,@(sh-mode--treesit-other-keywords))
+                  eol))
+        @font-lock-keyword-face))))
+   :feature 'commands
+   :language sh-shell
+   `(;; function/non-builtin command calls
+     (command_name (word) @font-lock-function-name-face)
+     ;; builtin commands
+     (command_name
+      ((word) @font-lock-builtin-face
+       (:match ,(let ((builtins
+                       (sh-feature sh-builtins)))
+                  (rx-to-string
+                   `(seq bol
+                         (or ,@builtins)
+                         eol)))
+               @font-lock-builtin-face))))
+   :feature 'decl-commands
+   :language sh-shell
+   '(;; declaration commands
+     (declaration_command _ @sh-mode--treesit-fontify-decl-command))
+   :feature 'constants
+   :language sh-shell
+   '((case_item value: (word) @font-lock-constant-face)
+     (file_descriptor) @font-lock-constant-face)
+   :feature 'operators
+   :language sh-shell
+   `([ ,@sh-mode--treesit-operators ] @font-lock-builtin-face)
+   :feature 'builtin-variables
+   :language sh-shell
+   `(((special_variable_name) @font-lock-builtin-face
+      (:match ,(let ((builtin-vars (sh-feature sh-variables)))
+                 (rx-to-string
+                  `(seq bol
+                        (or ,@builtin-vars)
+                        eol)))
+              @font-lock-builtin-face))))
+  "Tree-sitter font-lock settings for `sh-mode'.")
+
+(provide 'sh-mode)
+;;; sh-mode.el ends here
-- 
2.31.1


[-- Attachment #3: heredoc-issue.sh --]
[-- Type: application/x-shellscript, Size: 67 bytes --]

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-04 20:44                                 ` João Paulo Labegalini de Carvalho
@ 2022-11-04 22:50                                   ` Yuan Fu
  2022-11-12 22:04                                     ` João Paulo Labegalini de Carvalho
  0 siblings, 1 reply; 53+ messages in thread
From: Yuan Fu @ 2022-11-04 22:50 UTC (permalink / raw)
  To: João Paulo Labegalini de Carvalho; +Cc: Eli Zaretskii, emacs-devel



> On Nov 4, 2022, at 1:44 PM, João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com> wrote:
> 
> All you need to do is capture these contextual nodes in a special name “contextual”.
> 
> (heredoc_body) @contextual
> (string) @contextual
> 
> I see. I'll give that a try.
>  
> But I don’t know about the bleeding you described in the very beginning. Do you still see it? Is there a recipe to reproduce it?
> 
> It might just be because of how the tree-sitter-bash grammar defines heredoc strings.
> 
> This are the steps to reproduce the issue on sh-mode:
> 
> ;; build emacs from head of feature/tree-sitter branch
> ;; apply the attached patch.
> ;; launch emacs with: emacs -nw -Q
> ;;Write the forms bellow on *scratch* buffer)
> 
> (require 'treesit)
> (add-to-list 'treesit-settings '(sh-mode t t))
> (find-file "/tmp/heredoc-issue.sh")
> 
> ;; contents of /tmp/heredoc-issue.sh file (also attached)
> 
> #!/usr/bin/env bash
> 
> cat <<EOF
> heredoc string
> EOF
> echo "<<HELLO>>"
> 
> ;; Then execute the commands below
> 
> ESC <                   ;; beginning-of-buffer
> C-s                     ;; isearch-forward
> heredoc                 ;; self-insert-command * 7
> RET                     ;; newline
> C-a                     ;; move-beginning-of-line
> C-k                     ;; kill-line
> C-n                     ;; next-line (At this point the echo command after EOF is fontified as heredoc string)
> C-_                     ;; undo (After a short delay, the whole buffer is correctly fontified)

I see. This is tree-sitter-bash’s problem. When there are only newlines between two EOF’s, the parser erroneously marks everything that follows as heredoc_body. I tried tree-sitter’s online demo and it gives the same result[1]. We should report this to tree-sitter-bash’s author.

Also, when defining sh-mode--treesit-settings, instead of using the value sh-shell as the language, it’s better to just use ‘bash. Here is what happened to me: my default value for sh-shell is fish, so sh-mode--treesit-settings was defined with language = fish. When I open heredoc-issue.sh, sh-mode parses the shebang and sets sh-shell to bash. Since bash does have a parser, (treesit-ready-p ’sh-mode sh-shell) returns t, and tree-sitter is activated. However when font-lock tries to use the query, it errors because query tries to load a parser for fish.

[1] https://tree-sitter.github.io/tree-sitter/playground

Yuan


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-02 18:55   ` João Paulo Labegalini de Carvalho
@ 2022-11-12 12:47     ` Eli Zaretskii
  2022-11-12 19:45       ` Yuan Fu
  0 siblings, 1 reply; 53+ messages in thread
From: Eli Zaretskii @ 2022-11-12 12:47 UTC (permalink / raw)
  To: João Paulo Labegalini de Carvalho, Yuan Fu; +Cc: emacs-devel

> From: João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com>
> Date: Wed, 2 Nov 2022 12:55:31 -0600
> 
> I missed the update that adds override as the last argument of the capture function. Fixed now.
> 
> On Wed, Nov 2, 2022 at 12:22 PM João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com> wrote:
> 
>  Here is another version of the path. This has the queries separated into language related terms and
>  groups them into lists of features.
> 
>  Looking forward to your feedback.
> 
>  Thanks.
> 
>  On Thu, Oct 27, 2022 at 4:01 PM João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com> wrote:
> 
>  Hi everyone,
> 
>  Please find the patch for enabling fontification in sh-mode (currently only for bash) using
>  tree-sitter.
> 
>  I welcome all comments and suggestions to improve the patch.
> 
>  I noticed a weird behavior with heredocs. Take the code below:
> 
>  echo <<EOF
>  This is a here document.
>  EOF
>  echo "Done."
> 
>  My patch correctly fontifies the code above, but if I kill the whole line with the "This is a here
>  document." text, then the sh-heredoc face bleeds out and all the subsequent comments get
>  fontified as part of the heredoc.
> 
>  A similar behavior happens if tree-sitter is not enabled, if the heredoc is empty then all
>  subsequent commands are fontified as heredoc. However, as soon as anything is added to the
>  heredoc, then everything goes back to the correct fontification. 
> 
>  Such "refreshing" does not happen with tree-sitter enabled, but if I execute M-x sh-mode then the
>  buffer gets refreshed and everything looks good.
> 
>  What am I doing wrong?

Yuan, any comments?  If you think this is OK for the tree-sitter
branch, please install there.



^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-12 12:47     ` Eli Zaretskii
@ 2022-11-12 19:45       ` Yuan Fu
  2022-11-12 19:53         ` Eli Zaretskii
  0 siblings, 1 reply; 53+ messages in thread
From: Yuan Fu @ 2022-11-12 19:45 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: João Paulo Labegalini de Carvalho, emacs-devel



> On Nov 12, 2022, at 4:47 AM, Eli Zaretskii <eliz@gnu.org> wrote:
> 
>> From: João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com>
>> Date: Wed, 2 Nov 2022 12:55:31 -0600
>> 
>> I missed the update that adds override as the last argument of the capture function. Fixed now.
>> 
>> On Wed, Nov 2, 2022 at 12:22 PM João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com> wrote:
>> 
>> Here is another version of the path. This has the queries separated into language related terms and
>> groups them into lists of features.
>> 
>> Looking forward to your feedback.
>> 
>> Thanks.
>> 
>> On Thu, Oct 27, 2022 at 4:01 PM João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com> wrote:
>> 
>> Hi everyone,
>> 
>> Please find the patch for enabling fontification in sh-mode (currently only for bash) using
>> tree-sitter.
>> 
>> I welcome all comments and suggestions to improve the patch.
>> 
>> I noticed a weird behavior with heredocs. Take the code below:
>> 
>> echo <<EOF
>> This is a here document.
>> EOF
>> echo "Done."
>> 
>> My patch correctly fontifies the code above, but if I kill the whole line with the "This is a here
>> document." text, then the sh-heredoc face bleeds out and all the subsequent comments get
>> fontified as part of the heredoc.
>> 
>> A similar behavior happens if tree-sitter is not enabled, if the heredoc is empty then all
>> subsequent commands are fontified as heredoc. However, as soon as anything is added to the
>> heredoc, then everything goes back to the correct fontification. 
>> 
>> Such "refreshing" does not happen with tree-sitter enabled, but if I execute M-x sh-mode then the
>> buffer gets refreshed and everything looks good.
>> 
>> What am I doing wrong?
> 
> Yuan, any comments?  If you think this is OK for the tree-sitter
> branch, please install there.

I think there are still some details need to be addressed, eg, 

> Also, when defining sh-mode--treesit-settings, instead of using the value sh-shell as the language, it’s better to just use ‘bash. Here is what happened to me: my default value for sh-shell is fish, so sh-mode--treesit-settings was defined with language = fish. When I open heredoc-issue.sh, sh-mode parses the shebang and sets sh-shell to bash. Since bash does have a parser, (treesit-ready-p ’sh-mode sh-shell) returns t, and tree-sitter is activated. However when font-lock tries to use the query, it errors because query tries to load a parser for fish.

(I can make the change myself though)

Yuan


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-12 19:45       ` Yuan Fu
@ 2022-11-12 19:53         ` Eli Zaretskii
  0 siblings, 0 replies; 53+ messages in thread
From: Eli Zaretskii @ 2022-11-12 19:53 UTC (permalink / raw)
  To: Yuan Fu; +Cc: jaopaulolc, emacs-devel

> From: Yuan Fu <casouri@gmail.com>
> Date: Sat, 12 Nov 2022 11:45:53 -0800
> Cc: João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com>,
>  emacs-devel@gnu.org
> 
> > Yuan, any comments?  If you think this is OK for the tree-sitter
> > branch, please install there.
> 
> I think there are still some details need to be addressed, eg, 
> 
> > Also, when defining sh-mode--treesit-settings, instead of using the value sh-shell as the language, it’s better to just use ‘bash. Here is what happened to me: my default value for sh-shell is fish, so sh-mode--treesit-settings was defined with language = fish. When I open heredoc-issue.sh, sh-mode parses the shebang and sets sh-shell to bash. Since bash does have a parser, (treesit-ready-p ’sh-mode sh-shell) returns t, and tree-sitter is activated. However when font-lock tries to use the query, it errors because query tries to load a parser for fish.
> 
> (I can make the change myself though)

Fine with me, it's up to you two.

Thanks.



^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-04 22:50                                   ` Yuan Fu
@ 2022-11-12 22:04                                     ` João Paulo Labegalini de Carvalho
  2022-11-12 22:28                                       ` Yuan Fu
  2022-11-13  6:23                                       ` Eli Zaretskii
  0 siblings, 2 replies; 53+ messages in thread
From: João Paulo Labegalini de Carvalho @ 2022-11-12 22:04 UTC (permalink / raw)
  To: Yuan Fu; +Cc: Eli Zaretskii, emacs-devel


[-- Attachment #1.1: Type: text/plain, Size: 1857 bytes --]

>
> I see. This is tree-sitter-bash’s problem. When there are only newlines
> between two EOF’s, the parser erroneously marks everything that follows as
> heredoc_body. I tried tree-sitter’s online demo and it gives the same
> result[1]. We should report this to tree-sitter-bash’s author.
>

Sorry for the delay. I confirmed the problem was in the tree-sitter-bash
side and submitted a PR to fix it:
https://github.com/tree-sitter/tree-sitter-bash/pull/137
Once my fixes are pulled in, there is no change required to my patch.


> Also, when defining sh-mode--treesit-settings, instead of using the value
> sh-shell as the language, it’s better to just use ‘bash. Here is what
> happened to me: my default value for sh-shell is fish, so
> sh-mode--treesit-settings was defined with language = fish. When I open
> heredoc-issue.sh, sh-mode parses the shebang and sets sh-shell to bash.
> Since bash does have a parser, (treesit-ready-p ’sh-mode sh-shell) returns
> t, and tree-sitter is activated. However when font-lock tries to use the
> query, it errors because query tries to load a parser for fish.
>

I see. I thought that because sh-mode--treesit-settings is executed after
the local variable sh-shell is defined, it would always be equal to the
detected/file shell type. I am still getting my head around scope in elisp.

I did the change and I think it is good to go, unless there is anything
else to improve for now.

I hope to soon get time to work on imenu, navigation, and indentation for
sh-mode & bash with tree-sitter.

Please find the corrected patch attached.

-- 
João Paulo L. de Carvalho
Ph.D Computer Science |  IC-UNICAMP | Campinas , SP - Brazil
Postdoctoral Research Fellow | University of Alberta | Edmonton, AB - Canada
joao.carvalho@ic.unicamp.br
joao.carvalho@ualberta.ca

[-- Attachment #1.2: Type: text/html, Size: 2564 bytes --]

[-- Attachment #2: 0001-Initial-fontification-in-sh-mode-with-tree-sitter.patch --]
[-- Type: text/x-patch, Size: 7226 bytes --]

From 99c767972948aea4c1c6eba27fd72049655f8c32 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20P=2E=20L=2E=20de=20Carvalho?=
 <jaopaulolc@gmail.com>
Date: Thu, 27 Oct 2022 15:45:56 -0600
Subject: [PATCH] Initial fontification in sh-mode with tree-sitter

---
 lisp/progmodes/sh-script.el | 153 +++++++++++++++++++++++++++++++++---
 1 file changed, 142 insertions(+), 11 deletions(-)

diff --git a/lisp/progmodes/sh-script.el b/lisp/progmodes/sh-script.el
index 558b62b20a..17f938e95a 100644
--- a/lisp/progmodes/sh-script.el
+++ b/lisp/progmodes/sh-script.el
@@ -148,6 +148,7 @@
   (require 'let-alist)
   (require 'subr-x))
 (require 'executable)
+(require 'treesit)
 
 (autoload 'comint-completion-at-point "comint")
 (autoload 'comint-filename-completion "comint")
@@ -1534,13 +1535,6 @@ sh-mode
   ;; we can't look if previous line ended with `\'
   (setq-local comint-prompt-regexp "^[ \t]*")
   (setq-local imenu-case-fold-search nil)
-  (setq font-lock-defaults
-	`((sh-font-lock-keywords
-	   sh-font-lock-keywords-1 sh-font-lock-keywords-2)
-	  nil nil
-	  ((?/ . "w") (?~ . "w") (?. . "w") (?- . "w") (?_ . "w")) nil
-	  (font-lock-syntactic-face-function
-	   . ,#'sh-font-lock-syntactic-face-function)))
   (setq-local syntax-propertize-function #'sh-syntax-propertize-function)
   (add-hook 'syntax-propertize-extend-region-functions
             #'syntax-propertize-multiline 'append 'local)
@@ -1587,7 +1581,28 @@ sh-mode
    nil nil)
   (add-hook 'flymake-diagnostic-functions #'sh-shellcheck-flymake nil t)
   (add-hook 'hack-local-variables-hook
-    #'sh-after-hack-local-variables nil t))
+    #'sh-after-hack-local-variables nil t)
+
+  (cond
+   ;; Tree-sitter
+   ((treesit-ready-p 'sh-mode sh-shell)
+    (setq-local font-lock-keywords-only t)
+    (setq-local treesit-font-lock-feature-list
+                '((comments functions strings heredocs)
+                  (variables keywords commands decl-commands)
+                  (constants operators builtin-variables)))
+    (setq-local treesit-font-lock-settings
+                sh-mode--treesit-settings)
+    (treesit-major-mode-setup))
+   ;; Elisp.
+   (t
+    (setq font-lock-defaults
+          `((sh-font-lock-keywords
+             sh-font-lock-keywords-1 sh-font-lock-keywords-2)
+            nil nil
+            ((?/ . "w") (?~ . "w") (?. . "w") (?- . "w") (?_ . "w")) nil
+            (font-lock-syntactic-face-function
+             . ,#'sh-font-lock-syntactic-face-function))))))
 
 ;;;###autoload
 (defalias 'shell-script-mode 'sh-mode)
@@ -3191,6 +3206,122 @@ sh-shellcheck-flymake
       (process-send-region sh--shellcheck-process (point-min) (point-max))
       (process-send-eof sh--shellcheck-process))))
 
-(provide 'sh-script)
-
-;;; sh-script.el ends here
+;;; Tree-sitter font-lock
+
+(defvar sh-mode--treesit-operators
+  '("|" "|&" "||" "&&" ">" ">>" "<" "<<" "<<-" "<<<" "==" "!=" ";"
+    ";;" ";&" ";;&")
+  "List of `sh-mode' operator to fontify")
+
+(defvar sh-mode--treesit-keywords
+  '("case" "do" "done" "elif" "else" "esac" "export" "fi" "for"
+    "function" "if" "in" "unset" "while" "then")
+  "Minimal list of keywords that belong to tree-sitter-bash's grammar.
+
+Some reserved words are not recognize to keep the grammar
+simpler. Those are identified with regex-based filtered queries.
+
+See `sh-mode--treesit-other-keywords' and
+`sh-mode--treesit-settings').")
+
+(defun sh-mode--treesit-other-keywords ()
+  "Returns a list `others' of key/reserved words to be fontified with
+regex-based queries as they are not part of tree-sitter-bash's
+grammar.
+
+See `sh-mode--treesit-other-keywords' and
+`sh-mode--treesit-settings')."
+  (let ((minimal sh-mode--treesit-keywords)
+        (all (append (sh-feature sh-leading-keywords)
+                     (sh-feature sh-other-keywords)))
+        (others))
+    (dolist (keyword all others)
+      (if (not (member keyword minimal))
+          (setq others (cons keyword others))))))
+
+(defun sh-mode--treesit-fontify-decl-command (node override _start _end)
+  "Fontifies only the name of declaration_command nodes.
+
+This is used instead of `font-lock-builtion-face' directly because
+otherwise the whole command, including the variable assignment part,
+is fontified with with `font-lock-builtin-face'. An alternative to
+this would be to declaration_command nodes to have a `name:' field."
+  (let* ((maybe-decl-cmd (treesit-node-parent node))
+         (node-type (treesit-node-type maybe-decl-cmd)))
+    (when (string= node-type "declaration_command")
+      (let* ((name-node (car (treesit-node-children maybe-decl-cmd)))
+             (name-beg (treesit-node-start name-node))
+             (name-end (treesit-node-end name-node)))
+        (put-text-property name-beg
+                           name-end
+                           'face
+                           font-lock-builtin-face)))))
+
+(defvar sh-mode--treesit-settings
+  (treesit-font-lock-rules
+   :feature 'comments
+   :language sh-shell
+   '((comment) @font-lock-comment-face)
+   :feature 'functions
+   :language sh-shell
+   '((function_definition name: (word) @font-lock-function-name-face))
+   :feature 'strings
+   :language sh-shell
+   '([(string) (raw_string)] @font-lock-string-face)
+   :feature 'heredocs
+   :language sh-shell
+   '([(heredoc_start) (heredoc_body)] @sh-heredoc)
+   :feature 'variables
+   :language sh-shell
+   '((variable_name) @font-lock-variable-name-face)
+   :feature 'keywords
+   :language sh-shell
+   `(;; keywords
+     [ ,@sh-mode--treesit-keywords ] @font-lock-keyword-face
+     ;; reserved words
+     (command_name
+      ((word) @font-lock-keyword-face
+       (:match
+        ,(rx-to-string
+            `(seq bol
+                  (or ,@(sh-mode--treesit-other-keywords))
+                  eol))
+        @font-lock-keyword-face))))
+   :feature 'commands
+   :language sh-shell
+   `(;; function/non-builtin command calls
+     (command_name (word) @font-lock-function-name-face)
+     ;; builtin commands
+     (command_name
+      ((word) @font-lock-builtin-face
+       (:match ,(let ((builtins
+                       (sh-feature sh-builtins)))
+                  (rx-to-string
+                   `(seq bol
+                         (or ,@builtins)
+                         eol)))
+               @font-lock-builtin-face))))
+   :feature 'decl-commands
+   :language sh-shell
+   '(;; declaration commands
+     (declaration_command _ @sh-mode--treesit-fontify-decl-command))
+   :feature 'constants
+   :language sh-shell
+   '((case_item value: (word) @font-lock-constant-face)
+     (file_descriptor) @font-lock-constant-face)
+   :feature 'operators
+   :language sh-shell
+   `([ ,@sh-mode--treesit-operators ] @font-lock-builtin-face)
+   :feature 'builtin-variables
+   :language sh-shell
+   `(((special_variable_name) @font-lock-builtin-face
+      (:match ,(let ((builtin-vars (sh-feature sh-variables)))
+                 (rx-to-string
+                  `(seq bol
+                        (or ,@builtin-vars)
+                        eol)))
+              @font-lock-builtin-face))))
+  "Tree-sitter font-lock settings for `sh-mode'.")
+
+(provide 'sh-mode)
+;;; sh-mode.el ends here
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-12 22:04                                     ` João Paulo Labegalini de Carvalho
@ 2022-11-12 22:28                                       ` Yuan Fu
  2022-11-12 23:57                                         ` João Paulo Labegalini de Carvalho
  2022-11-13  6:23                                       ` Eli Zaretskii
  1 sibling, 1 reply; 53+ messages in thread
From: Yuan Fu @ 2022-11-12 22:28 UTC (permalink / raw)
  To: João Paulo Labegalini de Carvalho; +Cc: Eli Zaretskii, emacs-devel



> On Nov 12, 2022, at 2:04 PM, João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com> wrote:
> 
> I see. This is tree-sitter-bash’s problem. When there are only newlines between two EOF’s, the parser erroneously marks everything that follows as heredoc_body. I tried tree-sitter’s online demo and it gives the same result[1]. We should report this to tree-sitter-bash’s author.
> 
> Sorry for the delay. I confirmed the problem was in the tree-sitter-bash side and submitted a PR to fix it: https://github.com/tree-sitter/tree-sitter-bash/pull/137
> Once my fixes are pulled in, there is no change required to my patch.
>  
> Also, when defining sh-mode--treesit-settings, instead of using the value sh-shell as the language, it’s better to just use ‘bash. Here is what happened to me: my default value for sh-shell is fish, so sh-mode--treesit-settings was defined with language = fish. When I open heredoc-issue.sh, sh-mode parses the shebang and sets sh-shell to bash. Since bash does have a parser, (treesit-ready-p ’sh-mode sh-shell) returns t, and tree-sitter is activated. However when font-lock tries to use the query, it errors because query tries to load a parser for fish.
> 
> I see. I thought that because sh-mode--treesit-settings is executed after the local variable sh-shell is defined, it would always be equal to the detected/file shell type. I am still getting my head around scope in elisp.

When the defvar evaluates at load time, the value of sh-shell is the value set by user’s configuration, not the detected/file shell type. When the major-mode initialization runs (when we open a file), sh-shell’s value becomes the detected/file shell type. 

Because the tree-sitter language definition only works with bash, it doesn’t make sense to define those queries with anything other than bash, in sh-mode--treesit-settings.


> I did the change and I think it is good to go, unless there is anything else to improve for now.
> 
> I hope to soon get time to work on imenu, navigation, and indentation for sh-mode & bash with tree-sitter.
> 
> Please find the corrected patch attached.

Thanks, some comments:

+(defun sh-mode--treesit-fontify-decl-command (node override _start _end)
+  "Fontifies only the name of declaration_command nodes.
+
+This is used instead of `font-lock-builtion-face' directly because
+otherwise the whole command, including the variable assignment part,
+is fontified with with `font-lock-builtin-face'. An alternative to
+this would be to declaration_command nodes to have a `name:' field.”

I guess you meant “...for declaration_command node to have…”? (Declaimer: not native speaker)

+  (let* ((maybe-decl-cmd (treesit-node-parent node))
+         (node-type (treesit-node-type maybe-decl-cmd)))
+    (when (string= node-type "declaration_command")
+      (let* ((name-node (car (treesit-node-children maybe-decl-cmd)))
+             (name-beg (treesit-node-start name-node))
+             (name-end (treesit-node-end name-node)))
+        (put-text-property name-beg
+                           name-end
+                           'face
+                           font-lock-builtin-face)))))

+
+  (cond
+   ;; Tree-sitter
+   ((treesit-ready-p 'sh-mode sh-shell)

+    (setq-local font-lock-keywords-only t)

This line is not necessary anymore due to recent changes.

+    (setq-local treesit-font-lock-feature-list
+                '((comments functions strings heredocs)
+                  (variables keywords commands decl-commands)
+                  (constants operators builtin-variables)))
+    (setq-local treesit-font-lock-settings
+                sh-mode--treesit-settings)
+    (treesit-major-mode-setup))
+   ;; Elisp.
+   (t
+    (setq font-lock-defaults
+          `((sh-font-lock-keywords
+             sh-font-lock-keywords-1 sh-font-lock-keywords-2)
+            nil nil
+            ((?/ . "w") (?~ . "w") (?. . "w") (?- . "w") (?_ . "w")) nil
+            (font-lock-syntactic-face-function
+             . ,#'sh-font-lock-syntactic-face-function))))))

+(defvar sh-mode--treesit-settings
+  (treesit-font-lock-rules
+   :feature 'comments
+   :language sh-shell
+   '((comment) @font-lock-comment-face)
+   :feature 'functions
+   :language sh-shell
+   '((function_definition name: (word) @font-lock-function-name-face))
+   :feature 'strings
+   :language sh-shell
+   '([(string) (raw_string)] @font-lock-string-face)
+   :feature 'heredocs
+   :language sh-shell
+   '([(heredoc_start) (heredoc_body)] @sh-heredoc)
+   :feature 'variables
+   :language sh-shell
+   '((variable_name) @font-lock-variable-name-face)

Because of reasons I mentioned earlier, we should use ‘bash instead of sh-shell here.

Once those are changed I think we can push to feature/tree-sitter, other features/fixes can come later.

Yuan




^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-12 22:28                                       ` Yuan Fu
@ 2022-11-12 23:57                                         ` João Paulo Labegalini de Carvalho
  2022-11-16  8:34                                           ` Yuan Fu
  0 siblings, 1 reply; 53+ messages in thread
From: João Paulo Labegalini de Carvalho @ 2022-11-12 23:57 UTC (permalink / raw)
  To: Yuan Fu; +Cc: Eli Zaretskii, emacs-devel


[-- Attachment #1.1: Type: text/plain, Size: 1076 bytes --]

Thanks, Yuan for the feedback and explanation.

I am attaching the patch with the corrections pointed out by you.

In here

+This is used instead of `font-lock-builtion-face' directly because
> +otherwise the whole command, including the variable assignment part,
> +is fontified with with `font-lock-builtin-face'. An alternative to
> +this would be to declaration_command nodes to have a `name:' field.”
>
> I guess you meant “...for declaration_command node to have…”? (Declaimer:
> not native speaker)
>

You are right about the plural in nodes, as I was referring to a "class" of
commands. But I think "to" is the correct preposition. Thus I changed the
sentence to: *An alternative to this would be to declaration_commands to
have a `name:' field.*

Let me know what you think and if something else looks off.

-- 
João Paulo L. de Carvalho
Ph.D Computer Science |  IC-UNICAMP | Campinas , SP - Brazil
Postdoctoral Research Fellow | University of Alberta | Edmonton, AB - Canada
joao.carvalho@ic.unicamp.br
joao.carvalho@ualberta.ca

[-- Attachment #1.2: Type: text/html, Size: 1657 bytes --]

[-- Attachment #2: 0001-Initial-fontification-in-sh-mode-with-tree-sitter.patch --]
[-- Type: text/x-patch, Size: 7140 bytes --]

From da05fd3bc87f0cc836eadf36cc5d78137c865408 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20P=2E=20L=2E=20de=20Carvalho?=
 <jaopaulolc@gmail.com>
Date: Thu, 27 Oct 2022 15:45:56 -0600
Subject: [PATCH] Initial fontification in sh-mode with tree-sitter

---
 lisp/progmodes/sh-script.el | 152 +++++++++++++++++++++++++++++++++---
 1 file changed, 141 insertions(+), 11 deletions(-)

diff --git a/lisp/progmodes/sh-script.el b/lisp/progmodes/sh-script.el
index 558b62b20a..8c3168114c 100644
--- a/lisp/progmodes/sh-script.el
+++ b/lisp/progmodes/sh-script.el
@@ -148,6 +148,7 @@
   (require 'let-alist)
   (require 'subr-x))
 (require 'executable)
+(require 'treesit)
 
 (autoload 'comint-completion-at-point "comint")
 (autoload 'comint-filename-completion "comint")
@@ -1534,13 +1535,6 @@ sh-mode
   ;; we can't look if previous line ended with `\'
   (setq-local comint-prompt-regexp "^[ \t]*")
   (setq-local imenu-case-fold-search nil)
-  (setq font-lock-defaults
-	`((sh-font-lock-keywords
-	   sh-font-lock-keywords-1 sh-font-lock-keywords-2)
-	  nil nil
-	  ((?/ . "w") (?~ . "w") (?. . "w") (?- . "w") (?_ . "w")) nil
-	  (font-lock-syntactic-face-function
-	   . ,#'sh-font-lock-syntactic-face-function)))
   (setq-local syntax-propertize-function #'sh-syntax-propertize-function)
   (add-hook 'syntax-propertize-extend-region-functions
             #'syntax-propertize-multiline 'append 'local)
@@ -1587,7 +1581,27 @@ sh-mode
    nil nil)
   (add-hook 'flymake-diagnostic-functions #'sh-shellcheck-flymake nil t)
   (add-hook 'hack-local-variables-hook
-    #'sh-after-hack-local-variables nil t))
+    #'sh-after-hack-local-variables nil t)
+
+  (cond
+   ;; Tree-sitter
+   ((treesit-ready-p 'sh-mode 'bash)
+    (setq-local treesit-font-lock-feature-list
+                '((comments functions strings heredocs)
+                  (variables keywords commands decl-commands)
+                  (constants operators builtin-variables)))
+    (setq-local treesit-font-lock-settings
+                sh-mode--treesit-settings)
+    (treesit-major-mode-setup))
+   ;; Elisp.
+   (t
+    (setq font-lock-defaults
+          `((sh-font-lock-keywords
+             sh-font-lock-keywords-1 sh-font-lock-keywords-2)
+            nil nil
+            ((?/ . "w") (?~ . "w") (?. . "w") (?- . "w") (?_ . "w")) nil
+            (font-lock-syntactic-face-function
+             . ,#'sh-font-lock-syntactic-face-function))))))
 
 ;;;###autoload
 (defalias 'shell-script-mode 'sh-mode)
@@ -3191,6 +3205,122 @@ sh-shellcheck-flymake
       (process-send-region sh--shellcheck-process (point-min) (point-max))
       (process-send-eof sh--shellcheck-process))))
 
-(provide 'sh-script)
-
-;;; sh-script.el ends here
+;;; Tree-sitter font-lock
+
+(defvar sh-mode--treesit-operators
+  '("|" "|&" "||" "&&" ">" ">>" "<" "<<" "<<-" "<<<" "==" "!=" ";"
+    ";;" ";&" ";;&")
+  "List of `sh-mode' operator to fontify")
+
+(defvar sh-mode--treesit-keywords
+  '("case" "do" "done" "elif" "else" "esac" "export" "fi" "for"
+    "function" "if" "in" "unset" "while" "then")
+  "Minimal list of keywords that belong to tree-sitter-bash's grammar.
+
+Some reserved words are not recognize to keep the grammar
+simpler. Those are identified with regex-based filtered queries.
+
+See `sh-mode--treesit-other-keywords' and
+`sh-mode--treesit-settings').")
+
+(defun sh-mode--treesit-other-keywords ()
+  "Returns a list `others' of key/reserved words to be fontified with
+regex-based queries as they are not part of tree-sitter-bash's
+grammar.
+
+See `sh-mode--treesit-other-keywords' and
+`sh-mode--treesit-settings')."
+  (let ((minimal sh-mode--treesit-keywords)
+        (all (append (sh-feature sh-leading-keywords)
+                     (sh-feature sh-other-keywords)))
+        (others))
+    (dolist (keyword all others)
+      (if (not (member keyword minimal))
+          (setq others (cons keyword others))))))
+
+(defun sh-mode--treesit-fontify-decl-command (node override _start _end)
+  "Fontifies only the name of declaration_command nodes.
+
+This is used instead of `font-lock-builtion-face' directly because
+otherwise the whole command, including the variable assignment part,
+is fontified with with `font-lock-builtin-face'. An alternative to
+this would be to declaration_command to have a `name:' field."
+  (let* ((maybe-decl-cmd (treesit-node-parent node))
+         (node-type (treesit-node-type maybe-decl-cmd)))
+    (when (string= node-type "declaration_command")
+      (let* ((name-node (car (treesit-node-children maybe-decl-cmd)))
+             (name-beg (treesit-node-start name-node))
+             (name-end (treesit-node-end name-node)))
+        (put-text-property name-beg
+                           name-end
+                           'face
+                           font-lock-builtin-face)))))
+
+(defvar sh-mode--treesit-settings
+  (treesit-font-lock-rules
+   :feature 'comments
+   :language 'bash
+   '((comment) @font-lock-comment-face)
+   :feature 'functions
+   :language 'bash
+   '((function_definition name: (word) @font-lock-function-name-face))
+   :feature 'strings
+   :language 'bash
+   '([(string) (raw_string)] @font-lock-string-face)
+   :feature 'heredocs
+   :language 'bash
+   '([(heredoc_start) (heredoc_body)] @sh-heredoc)
+   :feature 'variables
+   :language 'bash
+   '((variable_name) @font-lock-variable-name-face)
+   :feature 'keywords
+   :language 'bash
+   `(;; keywords
+     [ ,@sh-mode--treesit-keywords ] @font-lock-keyword-face
+     ;; reserved words
+     (command_name
+      ((word) @font-lock-keyword-face
+       (:match
+        ,(rx-to-string
+            `(seq bol
+                  (or ,@(sh-mode--treesit-other-keywords))
+                  eol))
+        @font-lock-keyword-face))))
+   :feature 'commands
+   :language 'bash
+   `(;; function/non-builtin command calls
+     (command_name (word) @font-lock-function-name-face)
+     ;; builtin commands
+     (command_name
+      ((word) @font-lock-builtin-face
+       (:match ,(let ((builtins
+                       (sh-feature sh-builtins)))
+                  (rx-to-string
+                   `(seq bol
+                         (or ,@builtins)
+                         eol)))
+               @font-lock-builtin-face))))
+   :feature 'decl-commands
+   :language 'bash
+   '(;; declaration commands
+     (declaration_command _ @sh-mode--treesit-fontify-decl-command))
+   :feature 'constants
+   :language 'bash
+   '((case_item value: (word) @font-lock-constant-face)
+     (file_descriptor) @font-lock-constant-face)
+   :feature 'operators
+   :language 'bash
+   `([ ,@sh-mode--treesit-operators ] @font-lock-builtin-face)
+   :feature 'builtin-variables
+   :language 'bash
+   `(((special_variable_name) @font-lock-builtin-face
+      (:match ,(let ((builtin-vars (sh-feature sh-variables)))
+                 (rx-to-string
+                  `(seq bol
+                        (or ,@builtin-vars)
+                        eol)))
+              @font-lock-builtin-face))))
+  "Tree-sitter font-lock settings for `sh-mode'.")
+
+(provide 'sh-mode)
+;;; sh-mode.el ends here
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-12 22:04                                     ` João Paulo Labegalini de Carvalho
  2022-11-12 22:28                                       ` Yuan Fu
@ 2022-11-13  6:23                                       ` Eli Zaretskii
  2022-11-13  7:01                                         ` Yuan Fu
  2022-11-29 21:52                                         ` João Paulo Labegalini de Carvalho
  1 sibling, 2 replies; 53+ messages in thread
From: Eli Zaretskii @ 2022-11-13  6:23 UTC (permalink / raw)
  To: João Paulo Labegalini de Carvalho; +Cc: casouri, emacs-devel

> From: João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com>
> Date: Sat, 12 Nov 2022 15:04:26 -0700
> Cc: Eli Zaretskii <eliz@gnu.org>, emacs-devel@gnu.org
> 
>  I see. This is tree-sitter-bash’s problem. When there are only newlines between two EOF’s, the parser
>  erroneously marks everything that follows as heredoc_body. I tried tree-sitter’s online demo and it gives
>  the same result[1]. We should report this to tree-sitter-bash’s author.
> 
> Sorry for the delay. I confirmed the problem was in the tree-sitter-bash side and submitted a PR to fix it:
> https://github.com/tree-sitter/tree-sitter-bash/pull/137
> Once my fixes are pulled in, there is no change required to my patch.

Do we need to wait for their fix, or can we have code that will start
working correctly when they fix the parser?



^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-13  6:23                                       ` Eli Zaretskii
@ 2022-11-13  7:01                                         ` Yuan Fu
  2022-11-13  7:26                                           ` Eli Zaretskii
  2022-11-29 21:52                                         ` João Paulo Labegalini de Carvalho
  1 sibling, 1 reply; 53+ messages in thread
From: Yuan Fu @ 2022-11-13  7:01 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: João Paulo Labegalini de Carvalho, emacs-devel



> On Nov 12, 2022, at 10:23 PM, Eli Zaretskii <eliz@gnu.org> wrote:
> 
>> From: João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com>
>> Date: Sat, 12 Nov 2022 15:04:26 -0700
>> Cc: Eli Zaretskii <eliz@gnu.org>, emacs-devel@gnu.org
>> 
>> I see. This is tree-sitter-bash’s problem. When there are only newlines between two EOF’s, the parser
>> erroneously marks everything that follows as heredoc_body. I tried tree-sitter’s online demo and it gives
>> the same result[1]. We should report this to tree-sitter-bash’s author.
>> 
>> Sorry for the delay. I confirmed the problem was in the tree-sitter-bash side and submitted a PR to fix it:
>> https://github.com/tree-sitter/tree-sitter-bash/pull/137
>> Once my fixes are pulled in, there is no change required to my patch.
> 
> Do we need to wait for their fix, or can we have code that will start
> working correctly when they fix the parser?

 We don’t need to wait for their fix. When they fix the parser and users install the new parser, everything will work correctly.

Yuan


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-13  7:01                                         ` Yuan Fu
@ 2022-11-13  7:26                                           ` Eli Zaretskii
  0 siblings, 0 replies; 53+ messages in thread
From: Eli Zaretskii @ 2022-11-13  7:26 UTC (permalink / raw)
  To: Yuan Fu; +Cc: jaopaulolc, emacs-devel

> From: Yuan Fu <casouri@gmail.com>
> Date: Sat, 12 Nov 2022 23:01:30 -0800
> Cc: João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com>,
>  emacs-devel@gnu.org
> 
> 
> 
> > On Nov 12, 2022, at 10:23 PM, Eli Zaretskii <eliz@gnu.org> wrote:
> > 
> >> From: João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com>
> >> Date: Sat, 12 Nov 2022 15:04:26 -0700
> >> Cc: Eli Zaretskii <eliz@gnu.org>, emacs-devel@gnu.org
> >> 
> >> I see. This is tree-sitter-bash’s problem. When there are only newlines between two EOF’s, the parser
> >> erroneously marks everything that follows as heredoc_body. I tried tree-sitter’s online demo and it gives
> >> the same result[1]. We should report this to tree-sitter-bash’s author.
> >> 
> >> Sorry for the delay. I confirmed the problem was in the tree-sitter-bash side and submitted a PR to fix it:
> >> https://github.com/tree-sitter/tree-sitter-bash/pull/137
> >> Once my fixes are pulled in, there is no change required to my patch.
> > 
> > Do we need to wait for their fix, or can we have code that will start
> > working correctly when they fix the parser?
> 
>  We don’t need to wait for their fix. When they fix the parser and users install the new parser, everything will work correctly.

Thanks, then this aspect of the patch should not delay installing the
mode.



^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-12 23:57                                         ` João Paulo Labegalini de Carvalho
@ 2022-11-16  8:34                                           ` Yuan Fu
  2022-11-16 15:57                                             ` João Paulo Labegalini de Carvalho
  0 siblings, 1 reply; 53+ messages in thread
From: Yuan Fu @ 2022-11-16  8:34 UTC (permalink / raw)
  To: João Paulo Labegalini de Carvalho; +Cc: Eli Zaretskii, emacs-devel



> On Nov 12, 2022, at 3:57 PM, João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com> wrote:
> 
> Thanks, Yuan for the feedback and explanation.
> 
> I am attaching the patch with the corrections pointed out by you.
> 
> In here
> 
> +This is used instead of `font-lock-builtion-face' directly because
> +otherwise the whole command, including the variable assignment part,
> +is fontified with with `font-lock-builtin-face'. An alternative to
> +this would be to declaration_command nodes to have a `name:' field.”
> 
> I guess you meant “...for declaration_command node to have…”? (Declaimer: not native speaker)
> 
> You are right about the plural in nodes, as I was referring to a "class" of commands. But I think "to" is the correct preposition. Thus I changed the sentence to: An alternative to this would be to declaration_commands to have a `name:' field.
> 
> Let me know what you think and if something else looks off.

I forgot about one thing in the last message (sorry!):

+(provide 'sh-mode)
+;;; sh-mode.el ends here

Is there any particular reason why you changed sh-script to sh-mode?

Yuan





^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-16  8:34                                           ` Yuan Fu
@ 2022-11-16 15:57                                             ` João Paulo Labegalini de Carvalho
  2022-11-17 18:25                                               ` Yuan Fu
  0 siblings, 1 reply; 53+ messages in thread
From: João Paulo Labegalini de Carvalho @ 2022-11-16 15:57 UTC (permalink / raw)
  To: Yuan Fu; +Cc: Eli Zaretskii, emacs-devel


[-- Attachment #1.1: Type: text/plain, Size: 595 bytes --]

> +(provide 'sh-mode)
> +;;; sh-mode.el ends here
>
> Is there any particular reason why you changed sh-script to sh-mode?
>

Good catch. That must have been the result of an unintended replace-string.

Here is the patch without that silly change. (It might not be that silly,
as other progmodes are named <lang>-mode? But that can be a separate patch).

-- 
João Paulo L. de Carvalho
Ph.D Computer Science |  IC-UNICAMP | Campinas , SP - Brazil
Postdoctoral Research Fellow | University of Alberta | Edmonton, AB - Canada
joao.carvalho@ic.unicamp.br
joao.carvalho@ualberta.ca

[-- Attachment #1.2: Type: text/html, Size: 1173 bytes --]

[-- Attachment #2: 0001-Initial-fontification-in-sh-mode-with-tree-sitter.patch --]
[-- Type: text/x-patch, Size: 7113 bytes --]

From 2dcbc07b9100bf760fba8a594752f34e750d954d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20P=2E=20L=2E=20de=20Carvalho?=
 <jaopaulolc@gmail.com>
Date: Thu, 27 Oct 2022 15:45:56 -0600
Subject: [PATCH] Initial fontification in sh-mode with tree-sitter

---
 lisp/progmodes/sh-script.el | 148 +++++++++++++++++++++++++++++++++---
 1 file changed, 139 insertions(+), 9 deletions(-)

diff --git a/lisp/progmodes/sh-script.el b/lisp/progmodes/sh-script.el
index 558b62b20a..c6a828548d 100644
--- a/lisp/progmodes/sh-script.el
+++ b/lisp/progmodes/sh-script.el
@@ -148,6 +148,7 @@
   (require 'let-alist)
   (require 'subr-x))
 (require 'executable)
+(require 'treesit)
 
 (autoload 'comint-completion-at-point "comint")
 (autoload 'comint-filename-completion "comint")
@@ -1534,13 +1535,6 @@ sh-mode
   ;; we can't look if previous line ended with `\'
   (setq-local comint-prompt-regexp "^[ \t]*")
   (setq-local imenu-case-fold-search nil)
-  (setq font-lock-defaults
-	`((sh-font-lock-keywords
-	   sh-font-lock-keywords-1 sh-font-lock-keywords-2)
-	  nil nil
-	  ((?/ . "w") (?~ . "w") (?. . "w") (?- . "w") (?_ . "w")) nil
-	  (font-lock-syntactic-face-function
-	   . ,#'sh-font-lock-syntactic-face-function)))
   (setq-local syntax-propertize-function #'sh-syntax-propertize-function)
   (add-hook 'syntax-propertize-extend-region-functions
             #'syntax-propertize-multiline 'append 'local)
@@ -1587,7 +1581,27 @@ sh-mode
    nil nil)
   (add-hook 'flymake-diagnostic-functions #'sh-shellcheck-flymake nil t)
   (add-hook 'hack-local-variables-hook
-    #'sh-after-hack-local-variables nil t))
+    #'sh-after-hack-local-variables nil t)
+
+  (cond
+   ;; Tree-sitter
+   ((treesit-ready-p 'sh-mode 'bash)
+    (setq-local treesit-font-lock-feature-list
+                '((comments functions strings heredocs)
+                  (variables keywords commands decl-commands)
+                  (constants operators builtin-variables)))
+    (setq-local treesit-font-lock-settings
+                sh-mode--treesit-settings)
+    (treesit-major-mode-setup))
+   ;; Elisp.
+   (t
+    (setq font-lock-defaults
+          `((sh-font-lock-keywords
+             sh-font-lock-keywords-1 sh-font-lock-keywords-2)
+            nil nil
+            ((?/ . "w") (?~ . "w") (?. . "w") (?- . "w") (?_ . "w")) nil
+            (font-lock-syntactic-face-function
+             . ,#'sh-font-lock-syntactic-face-function))))))
 
 ;;;###autoload
 (defalias 'shell-script-mode 'sh-mode)
@@ -3191,6 +3205,122 @@ sh-shellcheck-flymake
       (process-send-region sh--shellcheck-process (point-min) (point-max))
       (process-send-eof sh--shellcheck-process))))
 
-(provide 'sh-script)
+;;; Tree-sitter font-lock
+
+(defvar sh-mode--treesit-operators
+  '("|" "|&" "||" "&&" ">" ">>" "<" "<<" "<<-" "<<<" "==" "!=" ";"
+    ";;" ";&" ";;&")
+  "List of `sh-mode' operator to fontify")
+
+(defvar sh-mode--treesit-keywords
+  '("case" "do" "done" "elif" "else" "esac" "export" "fi" "for"
+    "function" "if" "in" "unset" "while" "then")
+  "Minimal list of keywords that belong to tree-sitter-bash's grammar.
+
+Some reserved words are not recognize to keep the grammar
+simpler. Those are identified with regex-based filtered queries.
+
+See `sh-mode--treesit-other-keywords' and
+`sh-mode--treesit-settings').")
+
+(defun sh-mode--treesit-other-keywords ()
+  "Returns a list `others' of key/reserved words to be fontified with
+regex-based queries as they are not part of tree-sitter-bash's
+grammar.
+
+See `sh-mode--treesit-other-keywords' and
+`sh-mode--treesit-settings')."
+  (let ((minimal sh-mode--treesit-keywords)
+        (all (append (sh-feature sh-leading-keywords)
+                     (sh-feature sh-other-keywords)))
+        (others))
+    (dolist (keyword all others)
+      (if (not (member keyword minimal))
+          (setq others (cons keyword others))))))
+
+(defun sh-mode--treesit-fontify-decl-command (node override _start _end)
+  "Fontifies only the name of declaration_command nodes.
+
+This is used instead of `font-lock-builtion-face' directly because
+otherwise the whole command, including the variable assignment part,
+is fontified with with `font-lock-builtin-face'. An alternative to
+this would be to declaration_command to have a `name:' field."
+  (let* ((maybe-decl-cmd (treesit-node-parent node))
+         (node-type (treesit-node-type maybe-decl-cmd)))
+    (when (string= node-type "declaration_command")
+      (let* ((name-node (car (treesit-node-children maybe-decl-cmd)))
+             (name-beg (treesit-node-start name-node))
+             (name-end (treesit-node-end name-node)))
+        (put-text-property name-beg
+                           name-end
+                           'face
+                           font-lock-builtin-face)))))
+
+(defvar sh-mode--treesit-settings
+  (treesit-font-lock-rules
+   :feature 'comments
+   :language 'bash
+   '((comment) @font-lock-comment-face)
+   :feature 'functions
+   :language 'bash
+   '((function_definition name: (word) @font-lock-function-name-face))
+   :feature 'strings
+   :language 'bash
+   '([(string) (raw_string)] @font-lock-string-face)
+   :feature 'heredocs
+   :language 'bash
+   '([(heredoc_start) (heredoc_body)] @sh-heredoc)
+   :feature 'variables
+   :language 'bash
+   '((variable_name) @font-lock-variable-name-face)
+   :feature 'keywords
+   :language 'bash
+   `(;; keywords
+     [ ,@sh-mode--treesit-keywords ] @font-lock-keyword-face
+     ;; reserved words
+     (command_name
+      ((word) @font-lock-keyword-face
+       (:match
+        ,(rx-to-string
+            `(seq bol
+                  (or ,@(sh-mode--treesit-other-keywords))
+                  eol))
+        @font-lock-keyword-face))))
+   :feature 'commands
+   :language 'bash
+   `(;; function/non-builtin command calls
+     (command_name (word) @font-lock-function-name-face)
+     ;; builtin commands
+     (command_name
+      ((word) @font-lock-builtin-face
+       (:match ,(let ((builtins
+                       (sh-feature sh-builtins)))
+                  (rx-to-string
+                   `(seq bol
+                         (or ,@builtins)
+                         eol)))
+               @font-lock-builtin-face))))
+   :feature 'decl-commands
+   :language 'bash
+   '(;; declaration commands
+     (declaration_command _ @sh-mode--treesit-fontify-decl-command))
+   :feature 'constants
+   :language 'bash
+   '((case_item value: (word) @font-lock-constant-face)
+     (file_descriptor) @font-lock-constant-face)
+   :feature 'operators
+   :language 'bash
+   `([ ,@sh-mode--treesit-operators ] @font-lock-builtin-face)
+   :feature 'builtin-variables
+   :language 'bash
+   `(((special_variable_name) @font-lock-builtin-face
+      (:match ,(let ((builtin-vars (sh-feature sh-variables)))
+                 (rx-to-string
+                  `(seq bol
+                        (or ,@builtin-vars)
+                        eol)))
+              @font-lock-builtin-face))))
+  "Tree-sitter font-lock settings for `sh-mode'.")
 
+(provide 'sh-script)
 ;;; sh-script.el ends here
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-16 15:57                                             ` João Paulo Labegalini de Carvalho
@ 2022-11-17 18:25                                               ` Yuan Fu
  2022-11-17 18:53                                                 ` João Paulo Labegalini de Carvalho
  0 siblings, 1 reply; 53+ messages in thread
From: Yuan Fu @ 2022-11-17 18:25 UTC (permalink / raw)
  To: João Paulo Labegalini de Carvalho; +Cc: Eli Zaretskii, emacs-devel



> On Nov 16, 2022, at 7:57 AM, João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com> wrote:
> 
>  
> +(provide 'sh-mode)
> +;;; sh-mode.el ends here
> 
> Is there any particular reason why you changed sh-script to sh-mode?
> 
> Good catch. That must have been the result of an unintended replace-string.
> 
> Here is the patch without that silly change. (It might not be that silly, as other progmodes are named <lang>-mode? But that can be a separate patch).

Thanks! I applied your patch and made a few changes, the fist commit has stylistic changes, the second has some feature changes:

For the stylistic changes:

1. I added commit message for your patch as an example. For future patches it’s best if you can also have this ChangeLog style commit message, I think you can find more detailed explanation in in CONTRIBUTE file.

(I don’t know if you know the following already, bug FYI:)

2. Docstring sentences always end with a period
3. All comments and sentences should be complete sentences, with two spaces at the end.
4. The first line of a docstring shouldn’t exceeds 80 columns.

For the second commit, I changed all the feature names to singular, and decl-commands to declaration-command. I also simplified the rule for declaration-command, IIUC you want to highlight the command keywords?

Also, right now these command keywords are highlighted in builtin face, should they be fontified in keyword face? I’m no expert of bash so I can’t tell. But keyword face seems more natural to me.

Yuan


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-17 18:25                                               ` Yuan Fu
@ 2022-11-17 18:53                                                 ` João Paulo Labegalini de Carvalho
  2022-11-17 19:11                                                   ` Yuan Fu
  0 siblings, 1 reply; 53+ messages in thread
From: João Paulo Labegalini de Carvalho @ 2022-11-17 18:53 UTC (permalink / raw)
  To: Yuan Fu; +Cc: Eli Zaretskii, emacs-devel

[-- Attachment #1: Type: text/plain, Size: 1980 bytes --]

> 1. I added commit message for your patch as an example. For future patches
> it’s best if you can also have this ChangeLog style commit message, I think
> you can find more detailed explanation in in CONTRIBUTE file.
>
> (I don’t know if you know the following already, bug FYI:)
>
> 2. Docstring sentences always end with a period
> 3. All comments and sentences should be complete sentences, with two
> spaces at the end.
> 4. The first line of a docstring shouldn’t exceeds 80 columns.
>

I did not. Thanks for the corrections and improvements. I will keep those
in mind for future patches.

>
> For the second commit, I changed all the feature names to singular, and
> decl-commands to declaration-command. I also simplified the rule for
> declaration-command, IIUC you want to highlight the command keywords?
>

The changes to singular make sense to me. I considered your simplified
version as well, but decided against alternation queries to avoid hardcoded
command names. Matching a node via its type rather than comparing the
spelling might be fast, but my code also had a string= there, so in the end
the simpler version works well.

Also, right now these command keywords are highlighted in builtin face,
> should they be fontified in keyword face? I’m no expert of bash so I can’t
> tell. But keyword face seems more natural to me.
>

You are correct in the sense that all declaration commands are
builtin commands. I decided to highlight them differently than other
builtin commands because they are used to define variables, and it might be
useful to be able to distinguish them visually. But this is not required
nor more correct than using the builtin face for both types of commands.

-- 
João Paulo L. de Carvalho
Ph.D Computer Science |  IC-UNICAMP | Campinas , SP - Brazil
Postdoctoral Research Fellow | University of Alberta | Edmonton, AB - Canada
joao.carvalho@ic.unicamp.br
joao.carvalho@ualberta.ca

[-- Attachment #2: Type: text/html, Size: 2812 bytes --]

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-17 18:53                                                 ` João Paulo Labegalini de Carvalho
@ 2022-11-17 19:11                                                   ` Yuan Fu
  0 siblings, 0 replies; 53+ messages in thread
From: Yuan Fu @ 2022-11-17 19:11 UTC (permalink / raw)
  To: João Paulo Labegalini de Carvalho; +Cc: Eli Zaretskii, emacs-devel



> On Nov 17, 2022, at 10:53 AM, João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com> wrote:
> 
>  
> 1. I added commit message for your patch as an example. For future patches it’s best if you can also have this ChangeLog style commit message, I think you can find more detailed explanation in in CONTRIBUTE file.
> 
> (I don’t know if you know the following already, bug FYI:)
> 
> 2. Docstring sentences always end with a period
> 3. All comments and sentences should be complete sentences, with two spaces at the end.
> 4. The first line of a docstring shouldn’t exceeds 80 columns.
> 
> I did not. Thanks for the corrections and improvements. I will keep those in mind for future patches. 
> 
> For the second commit, I changed all the feature names to singular, and decl-commands to declaration-command. I also simplified the rule for declaration-command, IIUC you want to highlight the command keywords?
> 
> The changes to singular make sense to me. I considered your simplified version as well, but decided against alternation queries to avoid hardcoded command names. Matching a node via its type rather than comparing the spelling might be fast, but my code also had a string= there, so in the end the simpler version works well.

It should be safe to hardcode these command keywords, because they are hardcoded in the grammar anyway (and I don’t see them used elsewhere):

    declaration_command: $ => prec.left(seq(
      choice('declare', 'typeset', 'export', 'readonly', 'local'),
      repeat(choice(
        $._literal,
        $._simple_variable_name,
        $.variable_assignment
      ))
    )),

> 
> Also, right now these command keywords are highlighted in builtin face, should they be fontified in keyword face? I’m no expert of bash so I can’t tell. But keyword face seems more natural to me.
> 
> You are correct in the sense that all declaration commands are builtin commands. I decided to highlight them differently than other builtin commands because they are used to define variables, and it might be useful to be able to distinguish them visually. But this is not required nor more correct than using the builtin face for both types of commands. 

I meant to highlight them all in keyword face, but if you don’t have objections I’ll make that change :-) Thanks!

Yuan


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: Initial fontification in sh-mode with tree-sittter
  2022-11-13  6:23                                       ` Eli Zaretskii
  2022-11-13  7:01                                         ` Yuan Fu
@ 2022-11-29 21:52                                         ` João Paulo Labegalini de Carvalho
  1 sibling, 0 replies; 53+ messages in thread
From: João Paulo Labegalini de Carvalho @ 2022-11-29 21:52 UTC (permalink / raw)
  To: Eli Zaretskii, emacs-devel, Yuan Fu

[-- Attachment #1: Type: text/plain, Size: 1554 bytes --]

On Sat, Nov 12, 2022 at 11:23 PM Eli Zaretskii <eliz@gnu.org> wrote:

> > From: João Paulo Labegalini de Carvalho <jaopaulolc@gmail.com>
> > Date: Sat, 12 Nov 2022 15:04:26 -0700
> > Cc: Eli Zaretskii <eliz@gnu.org>, emacs-devel@gnu.org
> >
> >  I see. This is tree-sitter-bash’s problem. When there are only newlines
> between two EOF’s, the parser
> >  erroneously marks everything that follows as heredoc_body. I tried
> tree-sitter’s online demo and it gives
> >  the same result[1]. We should report this to tree-sitter-bash’s author.
> >
> > Sorry for the delay. I confirmed the problem was in the tree-sitter-bash
> side and submitted a PR to fix it:
> > https://github.com/tree-sitter/tree-sitter-bash/pull/137
> > Once my fixes are pulled in, there is no change required to my patch.
>
> Do we need to wait for their fix, or can we have code that will start
> working correctly when they fix the parser?
>


My PR to tree-sitter-bash fixing the issue with heredocs just landed. Now
the changes that we merged with the great help of Yuan will fontify
heredocs correctly with no changes to sh-script.el.
https://github.com/tree-sitter/tree-sitter-bash/pull/137

I hope to soon find some time to work on adding indentation, imenu, and
more tree-sitter-based features to sh-mode.

-- 
João Paulo L. de Carvalho
Ph.D Computer Science |  IC-UNICAMP | Campinas , SP - Brazil
Postdoctoral Research Fellow | University of Alberta | Edmonton, AB - Canada
joao.carvalho@ic.unicamp.br
joao.carvalho@ualberta.ca

[-- Attachment #2: Type: text/html, Size: 2544 bytes --]

^ permalink raw reply	[flat|nested] 53+ messages in thread

end of thread, other threads:[~2022-11-29 21:52 UTC | newest]

Thread overview: 53+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-27 22:01 Initial fontification in sh-mode with tree-sittter João Paulo Labegalini de Carvalho
2022-10-27 23:09 ` João Paulo Labegalini de Carvalho
2022-10-27 23:40   ` João Paulo Labegalini de Carvalho
2022-10-28  8:12     ` Yuan Fu
2022-10-28 15:09       ` Daniel Martín
2022-10-31  2:13         ` Yuan Fu
2022-10-31 21:56           ` Yuan Fu
2022-11-01  0:09             ` Daniel Martín
2022-11-01  0:25               ` Yuan Fu
2022-11-01  7:13                 ` Eli Zaretskii
2022-11-01  8:35                   ` Yuan Fu
2022-11-01  9:23                     ` Eli Zaretskii
     [not found]                       ` <CAGjvy2_6BReOVjSqgTM57+h+Ycjdu1o1TKoQHf6q-ypnAX3=rA@mail.gmail.com>
2022-11-02 19:17                         ` Eli Zaretskii
2022-11-03  1:25                           ` Yuan Fu
2022-11-03  6:36                             ` Eli Zaretskii
2022-11-03  7:16                               ` Yuan Fu
2022-11-03 16:08                             ` João Paulo Labegalini de Carvalho
2022-11-03 19:12                               ` Yuan Fu
2022-11-04 20:44                                 ` João Paulo Labegalini de Carvalho
2022-11-04 22:50                                   ` Yuan Fu
2022-11-12 22:04                                     ` João Paulo Labegalini de Carvalho
2022-11-12 22:28                                       ` Yuan Fu
2022-11-12 23:57                                         ` João Paulo Labegalini de Carvalho
2022-11-16  8:34                                           ` Yuan Fu
2022-11-16 15:57                                             ` João Paulo Labegalini de Carvalho
2022-11-17 18:25                                               ` Yuan Fu
2022-11-17 18:53                                                 ` João Paulo Labegalini de Carvalho
2022-11-17 19:11                                                   ` Yuan Fu
2022-11-13  6:23                                       ` Eli Zaretskii
2022-11-13  7:01                                         ` Yuan Fu
2022-11-13  7:26                                           ` Eli Zaretskii
2022-11-29 21:52                                         ` João Paulo Labegalini de Carvalho
2022-11-02 20:37             ` [SPAM UNSURE] " Stephen Leake
2022-10-28  0:18 ` Stefan Kangas
2022-10-28  0:48   ` João Paulo Labegalini de Carvalho
2022-10-28 15:27 ` João Paulo Labegalini de Carvalho
2022-10-28 15:57   ` Stefan Kangas
2022-10-28 16:15     ` Stefan Monnier
2022-10-28 16:23       ` Theodor Thornhill
2022-10-28 16:34       ` João Paulo Labegalini de Carvalho
2022-10-28 17:37         ` Stefan Monnier
2022-10-28 17:45           ` Yuan Fu
2022-10-28 18:12             ` Stefan Monnier
2022-11-01  0:33               ` Yuan Fu
2022-11-01  3:38                 ` Stefan Monnier
2022-11-01  8:37                   ` Yuan Fu
2022-10-29  7:13             ` Augusto Stoffel
2022-10-28 17:44       ` Yuan Fu
2022-11-02 18:22 ` João Paulo Labegalini de Carvalho
2022-11-02 18:55   ` João Paulo Labegalini de Carvalho
2022-11-12 12:47     ` Eli Zaretskii
2022-11-12 19:45       ` Yuan Fu
2022-11-12 19:53         ` Eli Zaretskii

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).