unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
From: "João Paulo Labegalini de Carvalho" <jaopaulolc@gmail.com>
To: emacs-devel@gnu.org
Subject: Re: Initial fontification in sh-mode with tree-sittter
Date: Wed, 2 Nov 2022 12:22:39 -0600	[thread overview]
Message-ID: <CAGjvy288LXS3XV3xYHKUAHf-FfjnHxNzfasfbpeDNTmVGt_Bdg@mail.gmail.com> (raw)
In-Reply-To: <CAGjvy2-wWch9iO_BUMJLzquHBpMWXu+bbhpvtWtqZXisN5HY3A@mail.gmail.com>


[-- Attachment #1.1: Type: text/plain, Size: 1790 bytes --]

Here is another version of the path. This has the queries separated into
language related terms and groups them into lists of features.

Looking forward to your feedback.

Thanks.

On Thu, Oct 27, 2022 at 4:01 PM João Paulo Labegalini de Carvalho <
jaopaulolc@gmail.com> wrote:

> Hi everyone,
>
> Please find the patch for enabling fontification in sh-mode (currently
> only for bash) using tree-sitter.
>
> I welcome all comments and suggestions to improve the patch.
>
> I noticed a weird behavior with heredocs. Take the code below:
>
> echo <<EOF
> This is a here document.
> EOF
> echo "Done."
>
> My patch correctly fontifies the code above, but if I kill the whole line
> with the "This is a here document." text, then the sh-heredoc face bleeds
> out and all the subsequent comments get fontified as part of the heredoc.
>
> A similar behavior happens if tree-sitter is not enabled, if the heredoc
> is empty then all subsequent commands are fontified as heredoc. However, as
> soon as anything is added to the heredoc, then everything goes back to the
> correct fontification.
>
> Such "refreshing" does not happen with tree-sitter enabled, but if I
> execute M-x sh-mode then the buffer gets refreshed and everything looks
> good.
>
> What am I doing wrong?
>
> --
> João Paulo L. de Carvalho
> Ph.D Computer Science |  IC-UNICAMP | Campinas , SP - Brazil
> Postdoctoral Research Fellow | University of Alberta | Edmonton, AB -
> Canada
> joao.carvalho@ic.unicamp.br
> joao.carvalho@ualberta.ca
>


-- 
João Paulo L. de Carvalho
Ph.D Computer Science |  IC-UNICAMP | Campinas , SP - Brazil
Postdoctoral Research Fellow | University of Alberta | Edmonton, AB - Canada
joao.carvalho@ic.unicamp.br
joao.carvalho@ualberta.ca

[-- Attachment #1.2: Type: text/html, Size: 2892 bytes --]

[-- Attachment #2: 0001-Initial-fontification-in-sh-mode-with-tree-sitter.patch --]
[-- Type: text/x-patch, Size: 7220 bytes --]

From a30903758c2c776fd7e1f03e8f71a5a12bc00862 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20P=2E=20L=2E=20de=20Carvalho?=
 <jaopaulolc@gmail.com>
Date: Thu, 27 Oct 2022 15:45:56 -0600
Subject: [PATCH] Initial fontification in sh-mode with tree-sitter

---
 lisp/progmodes/sh-script.el | 153 +++++++++++++++++++++++++++++++++---
 1 file changed, 142 insertions(+), 11 deletions(-)

diff --git a/lisp/progmodes/sh-script.el b/lisp/progmodes/sh-script.el
index 558b62b20a..f174ca7714 100644
--- a/lisp/progmodes/sh-script.el
+++ b/lisp/progmodes/sh-script.el
@@ -148,6 +148,7 @@
   (require 'let-alist)
   (require 'subr-x))
 (require 'executable)
+(require 'treesit)
 
 (autoload 'comint-completion-at-point "comint")
 (autoload 'comint-filename-completion "comint")
@@ -1534,13 +1535,6 @@ sh-mode
   ;; we can't look if previous line ended with `\'
   (setq-local comint-prompt-regexp "^[ \t]*")
   (setq-local imenu-case-fold-search nil)
-  (setq font-lock-defaults
-	`((sh-font-lock-keywords
-	   sh-font-lock-keywords-1 sh-font-lock-keywords-2)
-	  nil nil
-	  ((?/ . "w") (?~ . "w") (?. . "w") (?- . "w") (?_ . "w")) nil
-	  (font-lock-syntactic-face-function
-	   . ,#'sh-font-lock-syntactic-face-function)))
   (setq-local syntax-propertize-function #'sh-syntax-propertize-function)
   (add-hook 'syntax-propertize-extend-region-functions
             #'syntax-propertize-multiline 'append 'local)
@@ -1587,7 +1581,28 @@ sh-mode
    nil nil)
   (add-hook 'flymake-diagnostic-functions #'sh-shellcheck-flymake nil t)
   (add-hook 'hack-local-variables-hook
-    #'sh-after-hack-local-variables nil t))
+    #'sh-after-hack-local-variables nil t)
+
+  (cond
+   ;; Tree-sitter
+   ((treesit-ready-p 'sh-mode sh-shell)
+    (setq-local font-lock-keywords-only t)
+    (setq-local treesit-font-lock-feature-list
+                '((comments functions strings heredocs)
+                  (variables keywords commands decl-commands)
+                  (constants operators builtin-variables)))
+    (setq-local treesit-font-lock-settings
+                sh-mode--treesit-settings)
+    (treesit-major-mode-setup))
+   ;; Elisp.
+   (t
+    (setq font-lock-defaults
+          `((sh-font-lock-keywords
+             sh-font-lock-keywords-1 sh-font-lock-keywords-2)
+            nil nil
+            ((?/ . "w") (?~ . "w") (?. . "w") (?- . "w") (?_ . "w")) nil
+            (font-lock-syntactic-face-function
+             . ,#'sh-font-lock-syntactic-face-function))))))
 
 ;;;###autoload
 (defalias 'shell-script-mode 'sh-mode)
@@ -3191,6 +3206,122 @@ sh-shellcheck-flymake
       (process-send-region sh--shellcheck-process (point-min) (point-max))
       (process-send-eof sh--shellcheck-process))))
 
-(provide 'sh-script)
-
-;;; sh-script.el ends here
+;;; Tree-sitter font-lock
+
+(defvar sh-mode--treesit-operators
+  '("|" "|&" "||" "&&" ">" ">>" "<" "<<" "<<-" "<<<" "==" "!=" ";"
+    ";;" ";&" ";;&")
+  "List of `sh-mode' operator to fontify")
+
+(defvar sh-mode--treesit-keywords
+  '("case" "do" "done" "elif" "else" "esac" "export" "fi" "for"
+    "function" "if" "in" "unset" "while" "then")
+  "Minimal list of keywords that belong to tree-sitter-bash's grammar.
+
+Some reserved words are not recognize to keep the grammar
+simpler. Those are identified with regex-based filtered queries.
+
+See `sh-mode--treesit-other-keywords' and
+`sh-mode--treesit-settings').")
+
+(defun sh-mode--treesit-other-keywords ()
+  "Returns a list `others' of key/reserved words to be fontified with
+regex-based queries as they are not part of tree-sitter-bash's
+grammar.
+
+See `sh-mode--treesit-other-keywords' and
+`sh-mode--treesit-settings')."
+  (let ((minimal sh-mode--treesit-keywords)
+        (all (append (sh-feature sh-leading-keywords)
+                     (sh-feature sh-other-keywords)))
+        (others))
+    (dolist (keyword all others)
+      (if (not (member keyword minimal))
+          (setq others (cons keyword others))))))
+
+(defun sh-mode--treesit-fontify-decl-command-name (_beg _end node)
+  "Fontifies only the name of declaration_command nodes.
+
+This is used instead of `font-lock-builtion-face' directly because
+otherwise the whole command, including the variable assignment part,
+is fontified with with `font-lock-builtin-face'. An alternative to
+this would be to declaration_command nodes to have a `name:' field."
+  (let* ((maybe-decl-cmd (treesit-node-parent node))
+         (node-type (treesit-node-type maybe-decl-cmd)))
+    (when (string= node-type "declaration_command")
+      (let* ((name-node (car (treesit-node-children maybe-decl-cmd)))
+             (name-beg (treesit-node-start name-node))
+             (name-end (treesit-node-end name-node)))
+        (put-text-property name-beg
+                           name-end
+                           'face
+                           font-lock-builtin-face)))))
+
+(defvar sh-mode--treesit-settings
+  (treesit-font-lock-rules
+   :feature 'comments
+   :language sh-shell
+   '((comment) @font-lock-comment-face)
+   :feature 'functions
+   :language sh-shell
+   '((function_definition name: (word) @font-lock-function-name-face))
+   :feature 'strings
+   :language sh-shell
+   '([(string) (raw_string)] @font-lock-string-face)
+   :feature 'heredocs
+   :language sh-shell
+   '([(heredoc_start) (heredoc_body)] @sh-heredoc)
+   :feature 'variables
+   :language sh-shell
+   '((variable_name) @font-lock-variable-name-face)
+   :feature 'keywords
+   :language sh-shell
+   `(;; keywords
+     [ ,@sh-mode--treesit-keywords ] @font-lock-keyword-face
+     ;; reserved words
+     (command_name
+      ((word) @font-lock-keyword-face
+       (:match
+        ,(rx-to-string
+            `(seq bol
+                  (or ,@(sh-mode--treesit-other-keywords))
+                  eol))
+        @font-lock-keyword-face))))
+   :feature 'commands
+   :language sh-shell
+   `(;; function/non-builtin command calls
+     (command_name (word) @font-lock-function-name-face)
+     ;; builtin commands
+     (command_name
+      ((word) @font-lock-builtin-face
+       (:match ,(let ((builtins
+                       (sh-feature sh-builtins)))
+                  (rx-to-string
+                   `(seq bol
+                         (or ,@builtins)
+                         eol)))
+               @font-lock-builtin-face))))
+   :feature 'decl-commands
+   :language sh-shell
+   '(;; declaration commands
+     (declaration_command _ @sh-mode--treesit-fontify-command-name))
+   :feature 'constants
+   :language sh-shell
+   '((case_item value: (word) @font-lock-constant-face)
+     (file_descriptor) @font-lock-constant-face)
+   :feature 'operators
+   :language sh-shell
+   `([ ,@sh-mode--treesit-operators ] @font-lock-builtin-face)
+   :feature 'builtin-variables
+   :language sh-shell
+   `(((special_variable_name) @font-lock-builtin-face
+      (:match ,(let ((builtin-vars (sh-feature sh-variables)))
+                 (rx-to-string
+                  `(seq bol
+                        (or ,@builtin-vars)
+                        eol)))
+              @font-lock-builtin-face))))
+  "Tree-sitter font-lock settings for `sh-mode'.")
+
+(provide 'sh-mode)
+;;; sh-mode.el ends here
-- 
2.31.1


  parent reply	other threads:[~2022-11-02 18:22 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-27 22:01 Initial fontification in sh-mode with tree-sittter João Paulo Labegalini de Carvalho
2022-10-27 23:09 ` João Paulo Labegalini de Carvalho
2022-10-27 23:40   ` João Paulo Labegalini de Carvalho
2022-10-28  8:12     ` Yuan Fu
2022-10-28 15:09       ` Daniel Martín
2022-10-31  2:13         ` Yuan Fu
2022-10-31 21:56           ` Yuan Fu
2022-11-01  0:09             ` Daniel Martín
2022-11-01  0:25               ` Yuan Fu
2022-11-01  7:13                 ` Eli Zaretskii
2022-11-01  8:35                   ` Yuan Fu
2022-11-01  9:23                     ` Eli Zaretskii
     [not found]                       ` <CAGjvy2_6BReOVjSqgTM57+h+Ycjdu1o1TKoQHf6q-ypnAX3=rA@mail.gmail.com>
2022-11-02 19:17                         ` Eli Zaretskii
2022-11-03  1:25                           ` Yuan Fu
2022-11-03  6:36                             ` Eli Zaretskii
2022-11-03  7:16                               ` Yuan Fu
2022-11-03 16:08                             ` João Paulo Labegalini de Carvalho
2022-11-03 19:12                               ` Yuan Fu
2022-11-04 20:44                                 ` João Paulo Labegalini de Carvalho
2022-11-04 22:50                                   ` Yuan Fu
2022-11-12 22:04                                     ` João Paulo Labegalini de Carvalho
2022-11-12 22:28                                       ` Yuan Fu
2022-11-12 23:57                                         ` João Paulo Labegalini de Carvalho
2022-11-16  8:34                                           ` Yuan Fu
2022-11-16 15:57                                             ` João Paulo Labegalini de Carvalho
2022-11-17 18:25                                               ` Yuan Fu
2022-11-17 18:53                                                 ` João Paulo Labegalini de Carvalho
2022-11-17 19:11                                                   ` Yuan Fu
2022-11-13  6:23                                       ` Eli Zaretskii
2022-11-13  7:01                                         ` Yuan Fu
2022-11-13  7:26                                           ` Eli Zaretskii
2022-11-29 21:52                                         ` João Paulo Labegalini de Carvalho
2022-11-02 20:37             ` [SPAM UNSURE] " Stephen Leake
2022-10-28  0:18 ` Stefan Kangas
2022-10-28  0:48   ` João Paulo Labegalini de Carvalho
2022-10-28 15:27 ` João Paulo Labegalini de Carvalho
2022-10-28 15:57   ` Stefan Kangas
2022-10-28 16:15     ` Stefan Monnier
2022-10-28 16:23       ` Theodor Thornhill
2022-10-28 16:34       ` João Paulo Labegalini de Carvalho
2022-10-28 17:37         ` Stefan Monnier
2022-10-28 17:45           ` Yuan Fu
2022-10-28 18:12             ` Stefan Monnier
2022-11-01  0:33               ` Yuan Fu
2022-11-01  3:38                 ` Stefan Monnier
2022-11-01  8:37                   ` Yuan Fu
2022-10-29  7:13             ` Augusto Stoffel
2022-10-28 17:44       ` Yuan Fu
2022-11-02 18:22 ` João Paulo Labegalini de Carvalho [this message]
2022-11-02 18:55   ` João Paulo Labegalini de Carvalho
2022-11-12 12:47     ` Eli Zaretskii
2022-11-12 19:45       ` Yuan Fu
2022-11-12 19:53         ` Eli Zaretskii

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAGjvy288LXS3XV3xYHKUAHf-FfjnHxNzfasfbpeDNTmVGt_Bdg@mail.gmail.com \
    --to=jaopaulolc@gmail.com \
    --cc=emacs-devel@gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).