unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
From: "João Paulo Labegalini de Carvalho" <jaopaulolc@gmail.com>
To: emacs-devel@gnu.org
Subject: Re: Initial fontification in sh-mode with tree-sittter
Date: Fri, 28 Oct 2022 09:27:47 -0600	[thread overview]
Message-ID: <CAGjvy2-FCLp4TdLoB8b5epmv=8AXY_v-P8+OF4Vv2QWB+PwGug@mail.gmail.com> (raw)
In-Reply-To: <CAGjvy2-wWch9iO_BUMJLzquHBpMWXu+bbhpvtWtqZXisN5HY3A@mail.gmail.com>


[-- Attachment #1.1: Type: text/plain, Size: 1802 bytes --]

Hi,

In face of the comments about the risks of adding new faces, I edited my
patch to only use existing faces from font-lock.

Please let me know what you think of this new patch.

Thanks.

On Thu, Oct 27, 2022 at 4:01 PM João Paulo Labegalini de Carvalho <
jaopaulolc@gmail.com> wrote:

> Hi everyone,
>
> Please find the patch for enabling fontification in sh-mode (currently
> only for bash) using tree-sitter.
>
> I welcome all comments and suggestions to improve the patch.
>
> I noticed a weird behavior with heredocs. Take the code below:
>
> echo <<EOF
> This is a here document.
> EOF
> echo "Done."
>
> My patch correctly fontifies the code above, but if I kill the whole line
> with the "This is a here document." text, then the sh-heredoc face bleeds
> out and all the subsequent comments get fontified as part of the heredoc.
>
> A similar behavior happens if tree-sitter is not enabled, if the heredoc
> is empty then all subsequent commands are fontified as heredoc. However, as
> soon as anything is added to the heredoc, then everything goes back to the
> correct fontification.
>
> Such "refreshing" does not happen with tree-sitter enabled, but if I
> execute M-x sh-mode then the buffer gets refreshed and everything looks
> good.
>
> What am I doing wrong?
>
> --
> João Paulo L. de Carvalho
> Ph.D Computer Science |  IC-UNICAMP | Campinas , SP - Brazil
> Postdoctoral Research Fellow | University of Alberta | Edmonton, AB -
> Canada
> joao.carvalho@ic.unicamp.br
> joao.carvalho@ualberta.ca
>


-- 
João Paulo L. de Carvalho
Ph.D Computer Science |  IC-UNICAMP | Campinas , SP - Brazil
Postdoctoral Research Fellow | University of Alberta | Edmonton, AB - Canada
joao.carvalho@ic.unicamp.br
joao.carvalho@ualberta.ca

[-- Attachment #1.2: Type: text/html, Size: 2906 bytes --]

[-- Attachment #2: 0001-Initial-fontification-in-sh-mode-with-tree-sitter.patch --]
[-- Type: text/x-patch, Size: 6143 bytes --]

From 1d81266f11ee61aaa33abef37385cbcab75022ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20P=2E=20L=2E=20de=20Carvalho?=
 <jaopaulolc@gmail.com>
Date: Thu, 27 Oct 2022 15:45:56 -0600
Subject: [PATCH] Initial fontification in sh-mode with tree-sitter

---
 lisp/progmodes/sh-script.el | 132 +++++++++++++++++++++++++++++++++---
 1 file changed, 121 insertions(+), 11 deletions(-)

diff --git a/lisp/progmodes/sh-script.el b/lisp/progmodes/sh-script.el
index 558b62b20a..2ab225b77d 100644
--- a/lisp/progmodes/sh-script.el
+++ b/lisp/progmodes/sh-script.el
@@ -148,6 +148,7 @@
   (require 'let-alist)
   (require 'subr-x))
 (require 'executable)
+(require 'treesit)
 
 (autoload 'comint-completion-at-point "comint")
 (autoload 'comint-filename-completion "comint")
@@ -1534,13 +1535,6 @@ sh-mode
   ;; we can't look if previous line ended with `\'
   (setq-local comint-prompt-regexp "^[ \t]*")
   (setq-local imenu-case-fold-search nil)
-  (setq font-lock-defaults
-	`((sh-font-lock-keywords
-	   sh-font-lock-keywords-1 sh-font-lock-keywords-2)
-	  nil nil
-	  ((?/ . "w") (?~ . "w") (?. . "w") (?- . "w") (?_ . "w")) nil
-	  (font-lock-syntactic-face-function
-	   . ,#'sh-font-lock-syntactic-face-function)))
   (setq-local syntax-propertize-function #'sh-syntax-propertize-function)
   (add-hook 'syntax-propertize-extend-region-functions
             #'syntax-propertize-multiline 'append 'local)
@@ -1587,7 +1581,26 @@ sh-mode
    nil nil)
   (add-hook 'flymake-diagnostic-functions #'sh-shellcheck-flymake nil t)
   (add-hook 'hack-local-variables-hook
-    #'sh-after-hack-local-variables nil t))
+    #'sh-after-hack-local-variables nil t)
+
+  (cond
+   ;; Tree-sitter
+   ((treesit-ready-p 'sh-mode sh-shell)
+    (setq-local font-lock-keywords-only t)
+    (setq-local treesit-font-lock-feature-list
+                '((basic) (moderate) (elaborate)))
+    (setq-local treesit-font-lock-settings
+                sh-mode--treesit-settings)
+    (treesit-major-mode-setup))
+   ;; Elisp.
+   (t
+    (setq font-lock-defaults
+          `((sh-font-lock-keywords
+             sh-font-lock-keywords-1 sh-font-lock-keywords-2)
+            nil nil
+            ((?/ . "w") (?~ . "w") (?. . "w") (?- . "w") (?_ . "w")) nil
+            (font-lock-syntactic-face-function
+             . ,#'sh-font-lock-syntactic-face-function))))))
 
 ;;;###autoload
 (defalias 'shell-script-mode 'sh-mode)
@@ -3191,6 +3204,103 @@ sh-shellcheck-flymake
       (process-send-region sh--shellcheck-process (point-min) (point-max))
       (process-send-eof sh--shellcheck-process))))
 
-(provide 'sh-script)
-
-;;; sh-script.el ends here
+;;; Tree-sitter font-lock
+
+(defvar sh-mode--treesit-operators
+  '("|" "|&" "||" "&&" ">" ">>" "<" "<<" "<<-" "<<<" "==" "!=" ";"
+    ";;" ";&" ";;&")
+  "List of `sh-mode' operator to fontify")
+
+(defvar sh-mode--treesit-keywords
+  '("case" "do" "done" "elif" "else" "esac" "export" "fi" "for"
+    "function" "if" "in" "unset" "while" "then")
+  "Minimal list of keywords that belong to tree-sitter-bash's grammar.
+
+Some reserved words are not recognize to keep the grammar
+simpler. Those are identified with regex-based filtered queries.
+
+See `sh-mode--treesit-other-keywords' and
+`sh-mode--treesit-settings').")
+
+(defun sh-mode--treesit-other-keywords ()
+  "Returns a list `others' of key/reserved words to be fontified with
+regex-based queries as they are not part of tree-sitter-bash's
+grammar.
+
+See `sh-mode--treesit-other-keywords' and
+`sh-mode--treesit-settings')."
+  (let ((minimal sh-mode--treesit-keywords)
+        (all (append (sh-feature sh-leading-keywords)
+                     (sh-feature sh-other-keywords)))
+        (others))
+    (dolist (keyword all others)
+      (if (not (member keyword minimal))
+          (setq others (cons keyword others))))))
+
+(defvar sh-mode--treesit-settings
+  (treesit-font-lock-rules
+   :language sh-shell
+   :feature 'basic
+   :override t
+   '(;; function
+     (function_definition name: (word) @font-lock-function-name-face)
+     ;; comments
+     (comment) @font-lock-comment-face
+     ;; strings and heredoc
+     [ (string) (raw_string) ] @font-lock-string-face
+     ;; heredocs
+     [ (heredoc_start) (heredoc_body) ] @sh-heredoc
+     ;; variables
+     (variable_name) @font-lock-variable-name-face)
+   :language sh-shell
+   :feature 'moderate
+   :override t
+   `(;; keywords
+     [ ,@sh-mode--treesit-keywords ] @font-lock-keyword-face
+     ;; reserved words
+     (command_name
+      ((word) @font-lock-keyword-face
+       (:match
+        ,(rx-to-string
+            `(seq bol
+                  (or ,@(sh-mode--treesit-other-keywords))
+                  eol))
+        @font-lock-keyword-face)))
+     ;; function/non-builtin command calls
+     (command_name (word) @font-lock-function-name-face)
+     ;; builtin commands
+     (command_name
+      ((word) @font-lock-builtin-face
+       (:match ,(let ((builtins
+                       (sh-feature sh-builtins)))
+                  (rx-to-string
+                   `(seq bol
+                         (or ,@builtins)
+                         eol)))
+               @font-lock-builtin-face)))
+     ;; declaration commands
+     (declaration_command) @font-lock-builtin-face
+     ;; variables
+     (variable_name) @font-lock-variable-name-face)
+   :language sh-shell
+   :feature 'elaborate
+   :override t
+   `(;; everything inside command substitution
+     (command_substitution _ _ @sh-quoted-exec _)
+     ;; constants
+     (case_item value: (word) @font-lock-constant-face)
+     (file_descriptor) @font-lock-constant-face
+     ;; operators
+     [ ,@sh-mode--treesit-operators ] @font-lock-builtin-face
+     ;; special variables
+     ((variable_name) @font-lock-builtin-face
+      (:match ,(let ((builtin-vars (sh-feature sh-variables)))
+                 (rx-to-string
+                  `(seq bol
+                        (or ,@builtin-vars)
+                        eol)))
+              @font-lock-builtin-face))))
+  "Tree-sitter font-lock settings for `sh-mode'.")
+
+(provide 'sh-mode)
+;;; sh-mode.el ends here
-- 
2.31.1


  parent reply	other threads:[~2022-10-28 15:27 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-27 22:01 Initial fontification in sh-mode with tree-sittter João Paulo Labegalini de Carvalho
2022-10-27 23:09 ` João Paulo Labegalini de Carvalho
2022-10-27 23:40   ` João Paulo Labegalini de Carvalho
2022-10-28  8:12     ` Yuan Fu
2022-10-28 15:09       ` Daniel Martín
2022-10-31  2:13         ` Yuan Fu
2022-10-31 21:56           ` Yuan Fu
2022-11-01  0:09             ` Daniel Martín
2022-11-01  0:25               ` Yuan Fu
2022-11-01  7:13                 ` Eli Zaretskii
2022-11-01  8:35                   ` Yuan Fu
2022-11-01  9:23                     ` Eli Zaretskii
     [not found]                       ` <CAGjvy2_6BReOVjSqgTM57+h+Ycjdu1o1TKoQHf6q-ypnAX3=rA@mail.gmail.com>
2022-11-02 19:17                         ` Eli Zaretskii
2022-11-03  1:25                           ` Yuan Fu
2022-11-03  6:36                             ` Eli Zaretskii
2022-11-03  7:16                               ` Yuan Fu
2022-11-03 16:08                             ` João Paulo Labegalini de Carvalho
2022-11-03 19:12                               ` Yuan Fu
2022-11-04 20:44                                 ` João Paulo Labegalini de Carvalho
2022-11-04 22:50                                   ` Yuan Fu
2022-11-12 22:04                                     ` João Paulo Labegalini de Carvalho
2022-11-12 22:28                                       ` Yuan Fu
2022-11-12 23:57                                         ` João Paulo Labegalini de Carvalho
2022-11-16  8:34                                           ` Yuan Fu
2022-11-16 15:57                                             ` João Paulo Labegalini de Carvalho
2022-11-17 18:25                                               ` Yuan Fu
2022-11-17 18:53                                                 ` João Paulo Labegalini de Carvalho
2022-11-17 19:11                                                   ` Yuan Fu
2022-11-13  6:23                                       ` Eli Zaretskii
2022-11-13  7:01                                         ` Yuan Fu
2022-11-13  7:26                                           ` Eli Zaretskii
2022-11-29 21:52                                         ` João Paulo Labegalini de Carvalho
2022-11-02 20:37             ` [SPAM UNSURE] " Stephen Leake
2022-10-28  0:18 ` Stefan Kangas
2022-10-28  0:48   ` João Paulo Labegalini de Carvalho
2022-10-28 15:27 ` João Paulo Labegalini de Carvalho [this message]
2022-10-28 15:57   ` Stefan Kangas
2022-10-28 16:15     ` Stefan Monnier
2022-10-28 16:23       ` Theodor Thornhill
2022-10-28 16:34       ` João Paulo Labegalini de Carvalho
2022-10-28 17:37         ` Stefan Monnier
2022-10-28 17:45           ` Yuan Fu
2022-10-28 18:12             ` Stefan Monnier
2022-11-01  0:33               ` Yuan Fu
2022-11-01  3:38                 ` Stefan Monnier
2022-11-01  8:37                   ` Yuan Fu
2022-10-29  7:13             ` Augusto Stoffel
2022-10-28 17:44       ` Yuan Fu
2022-11-02 18:22 ` João Paulo Labegalini de Carvalho
2022-11-02 18:55   ` João Paulo Labegalini de Carvalho
2022-11-12 12:47     ` Eli Zaretskii
2022-11-12 19:45       ` Yuan Fu
2022-11-12 19:53         ` Eli Zaretskii

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAGjvy2-FCLp4TdLoB8b5epmv=8AXY_v-P8+OF4Vv2QWB+PwGug@mail.gmail.com' \
    --to=jaopaulolc@gmail.com \
    --cc=emacs-devel@gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).