unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
* Initial fontification in sh-mode with tree-sittter
@ 2022-10-27 22:01 João Paulo Labegalini de Carvalho
  2022-10-27 23:09 ` João Paulo Labegalini de Carvalho
                   ` (3 more replies)
  0 siblings, 4 replies; 53+ messages in thread
From: João Paulo Labegalini de Carvalho @ 2022-10-27 22:01 UTC (permalink / raw)
  To: emacs-devel


[-- Attachment #1.1: Type: text/plain, Size: 1201 bytes --]

Hi everyone,

Please find the patch for enabling fontification in sh-mode (currently
only for bash) using tree-sitter.

I welcome all comments and suggestions to improve the patch.

I noticed a weird behavior with heredocs. Take the code below:

echo <<EOF
This is a here document.
EOF
echo "Done."

My patch correctly fontifies the code above, but if I kill the whole line
with the "This is a here document." text, then the sh-heredoc face bleeds
out and all the subsequent comments get fontified as part of the heredoc.

A similar behavior happens if tree-sitter is not enabled, if the heredoc is
empty then all subsequent commands are fontified as heredoc. However, as
soon as anything is added to the heredoc, then everything goes back to the
correct fontification.

Such "refreshing" does not happen with tree-sitter enabled, but if I
execute M-x sh-mode then the buffer gets refreshed and everything looks
good.

What am I doing wrong?

-- 
João Paulo L. de Carvalho
Ph.D Computer Science |  IC-UNICAMP | Campinas , SP - Brazil
Postdoctoral Research Fellow | University of Alberta | Edmonton, AB - Canada
joao.carvalho@ic.unicamp.br
joao.carvalho@ualberta.ca

[-- Attachment #1.2: Type: text/html, Size: 1834 bytes --]

[-- Attachment #2: 0001-Initial-fontification-in-sh-mode-with-tree-sitter.patch --]
[-- Type: text/x-patch, Size: 6639 bytes --]

From 48fb6f8e949a8caf73b0714d647947c069260797 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20P=2E=20L=2E=20de=20Carvalho?=
 <jaopaulolc@gmail.com>
Date: Thu, 27 Oct 2022 15:45:56 -0600
Subject: [PATCH] Initial fontification in sh-mode with tree-sitter

---
 lisp/progmodes/sh-script.el | 144 +++++++++++++++++++++++++++++++++---
 1 file changed, 133 insertions(+), 11 deletions(-)

diff --git a/lisp/progmodes/sh-script.el b/lisp/progmodes/sh-script.el
index 558b62b20a..c3645eb9e9 100644
--- a/lisp/progmodes/sh-script.el
+++ b/lisp/progmodes/sh-script.el
@@ -148,6 +148,7 @@
   (require 'let-alist)
   (require 'subr-x))
 (require 'executable)
+(require 'treesit)
 
 (autoload 'comint-completion-at-point "comint")
 (autoload 'comint-filename-completion "comint")
@@ -1534,13 +1535,6 @@ sh-mode
   ;; we can't look if previous line ended with `\'
   (setq-local comint-prompt-regexp "^[ \t]*")
   (setq-local imenu-case-fold-search nil)
-  (setq font-lock-defaults
-	`((sh-font-lock-keywords
-	   sh-font-lock-keywords-1 sh-font-lock-keywords-2)
-	  nil nil
-	  ((?/ . "w") (?~ . "w") (?. . "w") (?- . "w") (?_ . "w")) nil
-	  (font-lock-syntactic-face-function
-	   . ,#'sh-font-lock-syntactic-face-function)))
   (setq-local syntax-propertize-function #'sh-syntax-propertize-function)
   (add-hook 'syntax-propertize-extend-region-functions
             #'syntax-propertize-multiline 'append 'local)
@@ -1587,7 +1581,26 @@ sh-mode
    nil nil)
   (add-hook 'flymake-diagnostic-functions #'sh-shellcheck-flymake nil t)
   (add-hook 'hack-local-variables-hook
-    #'sh-after-hack-local-variables nil t))
+    #'sh-after-hack-local-variables nil t)
+
+  (cond
+   ;; Tree-sitter
+   ((treesit-ready-p 'sh-mode sh-shell)
+    (setq-local font-lock-keywords-only t)
+    (setq-local treesit-font-lock-feature-list
+                '((basic) (moderate) (elaborate)))
+    (setq-local treesit-font-lock-settings
+                sh-mode--treesit-settings)
+    (treesit-major-mode-setup))
+   ;; Elisp.
+   (t
+    (setq font-lock-defaults
+          `((sh-font-lock-keywords
+             sh-font-lock-keywords-1 sh-font-lock-keywords-2)
+            nil nil
+            ((?/ . "w") (?~ . "w") (?. . "w") (?- . "w") (?_ . "w")) nil
+            (font-lock-syntactic-face-function
+             . ,#'sh-font-lock-syntactic-face-function))))))
 
 ;;;###autoload
 (defalias 'shell-script-mode 'sh-mode)
@@ -3191,6 +3204,115 @@ sh-shellcheck-flymake
       (process-send-region sh--shellcheck-process (point-min) (point-max))
       (process-send-eof sh--shellcheck-process))))
 
-(provide 'sh-script)
-
-;;; sh-script.el ends here
+;;; Tree-sitter font-lock
+
+(defface sh-mode--treesit-special-var-name-face
+  '((t (:inherit font-lock-builtin-face)))
+  "Face name to use for special `sh-mode' variables (e.g. PATH)")
+
+(defface sh-mode--treesit-operator-face
+  '((t (:inherit font-lock-builtin-face)))
+  "Face name to use for `sh-mode' operators (e.g. <<)")
+
+(defface sh-mode--treesit-call-face
+  '((t (:inherit font-lock-function-name-face)))
+  "Face name to use for `sh-mode' non-builtin command calls")
+
+(defvar sh-mode--treesit-operators
+  '("|" "|&" "||" "&&" ">" ">>" "<" "<<" "<<-" "<<<" "==" "!=" ";"
+    ";;" ";&" ";;&")
+  "List of `sh-mode' operator to fontify")
+
+(defvar sh-mode--treesit-keywords
+  '("case" "do" "done" "elif" "else" "esac" "export" "fi" "for"
+    "function" "if" "in" "unset" "while" "then")
+  "Minimal list of keywords that belong to tree-sitter-bash's grammar.
+
+Some reserved words are not recognize to keep the grammar
+simpler. Those are identified with regex-based filtered queries.
+
+See `sh-mode--treesit-other-keywords' and
+`sh-mode--treesit-settings').")
+
+(defun sh-mode--treesit-other-keywords ()
+  "Returns a list `others' of key/reserved words to be fontified with
+regex-based queries as they are not part of tree-sitter-bash's
+grammar.
+
+See `sh-mode--treesit-other-keywords' and
+`sh-mode--treesit-settings')."
+  (let ((minimal sh-mode--treesit-keywords)
+        (all (append (sh-feature sh-leading-keywords)
+                     (sh-feature sh-other-keywords)))
+        (others))
+    (dolist (keyword all others)
+      (if (not (member keyword minimal))
+          (setq others (cons keyword others))))))
+
+(defvar sh-mode--treesit-settings
+  (treesit-font-lock-rules
+   :language sh-shell
+   :feature 'basic
+   :override t
+   '(;; function
+     (function_definition name: (word) @font-lock-function-name-face)
+     ;; comments
+     (comment) @font-lock-comment-face
+     ;; strings and heredoc
+     [ (string) (raw_string) ] @font-lock-string-face
+     ;; heredocs
+     [ (heredoc_start) (heredoc_body) ] @sh-heredoc
+     ;; variables
+     (variable_name) @font-lock-variable-name-face)
+   :language sh-shell
+   :feature 'moderate
+   :override t
+   `(;; keywords
+     [ ,@sh-mode--treesit-keywords ] @font-lock-keyword-face
+     ;; reserved words
+     (command_name
+      ((word) @font-lock-keyword-face
+       (:match
+        ,(rx-to-string
+            `(seq bol
+                  (or ,@(sh-mode--treesit-other-keywords))
+                  eol))
+        @font-lock-keyword-face)))
+     ;; function/non-builtin command calls
+     (command_name (word) @sh-mode--treesit-call-face)
+     ;; builtin commands
+     (command_name
+      ((word) @font-lock-builtin-face
+       (:match ,(let ((builtins
+                       (sh-feature sh-builtins)))
+                  (rx-to-string
+                   `(seq bol
+                         (or ,@builtins)
+                         eol)))
+               @font-lock-builtin-face)))
+     ;; declaration commands
+     (declaration_command) @font-lock-builtin-face
+     ;; variables
+     (variable_name) @font-lock-variable-name-face)
+   :language sh-shell
+   :feature 'elaborate
+   :override t
+   `(;; everything inside command substitution
+     (command_substitution _ _ @sh-quoted-exec _)
+     ;; constants
+     (case_item value: (word) @font-lock-constant-face)
+     (file_descriptor) @font-lock-constant-face
+     ;; operators
+     [ ,@sh-mode--treesit-operators ] @sh-mode--treesit-operator-face
+     ;; special variables
+     ((variable_name) @sh-mode--treesit-special-var-name-face
+      (:match ,(let ((builtin-vars (sh-feature sh-variables)))
+                 (rx-to-string
+                  `(seq bol
+                        (or ,@builtin-vars)
+                        eol)))
+              @sh-mode--treesit-special-var-name-face))))
+  "Tree-sitter font-lock settings for `sh-mode'.")
+
+(provide 'sh-mode)
+;;; sh-mode.el ends here
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 53+ messages in thread

end of thread, other threads:[~2022-11-29 21:52 UTC | newest]

Thread overview: 53+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-27 22:01 Initial fontification in sh-mode with tree-sittter João Paulo Labegalini de Carvalho
2022-10-27 23:09 ` João Paulo Labegalini de Carvalho
2022-10-27 23:40   ` João Paulo Labegalini de Carvalho
2022-10-28  8:12     ` Yuan Fu
2022-10-28 15:09       ` Daniel Martín
2022-10-31  2:13         ` Yuan Fu
2022-10-31 21:56           ` Yuan Fu
2022-11-01  0:09             ` Daniel Martín
2022-11-01  0:25               ` Yuan Fu
2022-11-01  7:13                 ` Eli Zaretskii
2022-11-01  8:35                   ` Yuan Fu
2022-11-01  9:23                     ` Eli Zaretskii
     [not found]                       ` <CAGjvy2_6BReOVjSqgTM57+h+Ycjdu1o1TKoQHf6q-ypnAX3=rA@mail.gmail.com>
2022-11-02 19:17                         ` Eli Zaretskii
2022-11-03  1:25                           ` Yuan Fu
2022-11-03  6:36                             ` Eli Zaretskii
2022-11-03  7:16                               ` Yuan Fu
2022-11-03 16:08                             ` João Paulo Labegalini de Carvalho
2022-11-03 19:12                               ` Yuan Fu
2022-11-04 20:44                                 ` João Paulo Labegalini de Carvalho
2022-11-04 22:50                                   ` Yuan Fu
2022-11-12 22:04                                     ` João Paulo Labegalini de Carvalho
2022-11-12 22:28                                       ` Yuan Fu
2022-11-12 23:57                                         ` João Paulo Labegalini de Carvalho
2022-11-16  8:34                                           ` Yuan Fu
2022-11-16 15:57                                             ` João Paulo Labegalini de Carvalho
2022-11-17 18:25                                               ` Yuan Fu
2022-11-17 18:53                                                 ` João Paulo Labegalini de Carvalho
2022-11-17 19:11                                                   ` Yuan Fu
2022-11-13  6:23                                       ` Eli Zaretskii
2022-11-13  7:01                                         ` Yuan Fu
2022-11-13  7:26                                           ` Eli Zaretskii
2022-11-29 21:52                                         ` João Paulo Labegalini de Carvalho
2022-11-02 20:37             ` [SPAM UNSURE] " Stephen Leake
2022-10-28  0:18 ` Stefan Kangas
2022-10-28  0:48   ` João Paulo Labegalini de Carvalho
2022-10-28 15:27 ` João Paulo Labegalini de Carvalho
2022-10-28 15:57   ` Stefan Kangas
2022-10-28 16:15     ` Stefan Monnier
2022-10-28 16:23       ` Theodor Thornhill
2022-10-28 16:34       ` João Paulo Labegalini de Carvalho
2022-10-28 17:37         ` Stefan Monnier
2022-10-28 17:45           ` Yuan Fu
2022-10-28 18:12             ` Stefan Monnier
2022-11-01  0:33               ` Yuan Fu
2022-11-01  3:38                 ` Stefan Monnier
2022-11-01  8:37                   ` Yuan Fu
2022-10-29  7:13             ` Augusto Stoffel
2022-10-28 17:44       ` Yuan Fu
2022-11-02 18:22 ` João Paulo Labegalini de Carvalho
2022-11-02 18:55   ` João Paulo Labegalini de Carvalho
2022-11-12 12:47     ` Eli Zaretskii
2022-11-12 19:45       ` Yuan Fu
2022-11-12 19:53         ` Eli Zaretskii

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).