unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
From: Philip Kaludercic <philipk@posteo.net>
To: "Augustin Chéneau (BTuin)" <btuin@mailo.com>
Cc: emacs-devel <emacs-devel@gnu.org>
Subject: Re: New tree-sitter mode: bison-ts-mode
Date: Fri, 22 Sep 2023 07:38:55 +0000	[thread overview]
Message-ID: <871qeqwtbk.fsf@posteo.net> (raw)
In-Reply-To: <a15a1100-799e-4e18-8a4b-c0d7b8cdacaf@mailo.com> ("Augustin Chéneau (BTuin)"'s message of "Thu, 21 Sep 2023 22:15:45 +0200")

"Augustin Chéneau (BTuin)" <btuin@mailo.com> writes:

A few comments on the proposed file:

> ;;; bison-ts-mode --- Tree-sitter mode for Bison -*- lexical-binding: t; -*-
>
Could you add the usual header information here?

;; Copyright (C) 2022-2023 Free Software Foundation, Inc.

--8<---------------cut here---------------start------------->8---
;; Author: Augustin Chéneau <btuin@mailo.com>

;; This file is part of GNU Emacs.

;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.
--8<---------------cut here---------------end--------------->8---

> ;;; Commentary:
>
> ;; This is a mode based on tree-sitter for Bison and Yacc files, tools to generate parsers.

Shouldn't you mention what tree sitter grammar is being being used here?

> ;;; Code:
>
> (require 'treesit)
> (require 'c-ts-common)
>
> (declare-function treesit-parser-create "treesit.c")
> (declare-function treesit-induce-sparse-tree "treesit.c")
> (declare-function treesit-node-child-by-field-name "treesit.c")
> (declare-function treesit-search-subtree "treesit.c")
> (declare-function treesit-node-parent "treesit.c")
> (declare-function treesit-node-next-sibling "treesit.c")
> (declare-function treesit-node-type "treesit.c")
> (declare-function treesit-node-child "treesit.c")
> (declare-function treesit-node-end "treesit.c")
> (declare-function treesit-node-start "treesit.c")
> (declare-function treesit-node-string "treesit.c")
> (declare-function treesit-query-compile "treesit.c")
> (declare-function treesit-query-capture "treesit.c")
> (declare-function treesit-parser-add-notifier "treesit.c")
> (declare-function treesit-parser-buffer "treesit.c")
> (declare-function treesit-parser-list "treesit.c")
>
>
> (defgroup bison nil

bison or bison-ts?

>   "Support for Bison and Yacc."

Shouldn't tree-sitter be mentioned here?

>   :group 'languages)
>
> (defcustom bison-ts-mode-indent-offset 2
>   "Number of spaces for each indentation step in `bison-ts-mode'.
> It has no effect in the epilogue part of the file."
>   :version "30.1"
>   :type 'integer
>   :safe 'integerp
>   :group 'bison)
>
> (defcustom bison-ts-mode-autodetect-language t
>   "Search for a %language directive in the file at initialization.
> Changing the value of this directive in the file requires to reload the mode to
> be effective.  If `bison-ts-mode-buffer-language' is set by a file-local
>  variable, the auto-detection is not run."
>   :version "30.1"
>   :type 'boolean
>   :safe 'boolean
>   :group 'bison)
>
> (defvar-local bison-ts-mode-embedded-language nil
>   "Embedded language in Bison buffer.")
>
> (defun bison-ts-mode--merge-feature-lists (l1 l2)
>   "Merge the lists of lists L1 and L2.
> The first sublist of L1 is merged with the first sublist of L2 and so on.
> L1 and L2 don't need to have the same size."
>   (let ((res ()))
>     (while (or l1 l2)
>       (setq res (push (append (car l1) (car l2)) res))
>       (setq l1 (cdr l1) l2 (cdr l2)))
>     (nreverse res)))

Is this a generic function that should be extracted into some common file?

> (defun bison-ts-mode--find-language-in-buffer (&optional buffer)
>   "Find and return the language set by the Bison directive %language.
> If BUFFER is set, search in this buffer, otherwise search in the current
> buffer."
>   (save-excursion
>     (when buffer
>       (switch-to-buffer buffer))

Or rather?

  (with-current-buffer (or buffer (current-buffer))
    (save-excursion
      ...))

>     (goto-char (point-min))
>     (let ((pos-end
>            (re-search-forward
>             (rx
>              bol (0+ blank) "%language" (0+ blank) "\"" (1+ (in alpha "+")) "\"")
>             nil
>             t))
>           (pos-beg nil))
>       (when pos-end

Using when-let might be nice here.

>         (goto-char (1- pos-end))
>         (setq pos-beg (1+ (search-backward "\"" nil t)))
>         (buffer-substring-no-properties pos-beg (1- pos-end))))))

I'd use a single regular expression here with a group, then extract the
right information using `match-string'.

>
>
> (defun bison-ts-mode--detect-language (&optional buffer)
>   "Dectect the embedded language in a Bison buffer.
> Known languages are C, C++, D, and Java, but D is not supported as there is
> no support for tree-sitter D in Emacs yet.
> If BUFFER is set, search in this buffer, otherwise search in the current
> buffer."
>   (if-let ((str (bison-ts-mode--find-language-in-buffer buffer)))
>       (pcase (downcase str)
>         ("c" 'c)
>         ("c++" 'cpp)
>         ("d" (progn (message "D language not yet supported") nil))

Each pcase case has an implicit progn.

>         ("java" 'java))
>     (progn

Use when-let to avoid this progn.

>       (message
>        "bison-ts-mode: %%language specification not found or invalid, defaulting to C.")

Is it necessary to prefix the message with the major mode name?

>       'c)))
>
>
> (defun bison-ts-mode--language-at-point-function (position)
>   "Return the language at POSITION."
>   (let* ((node (treesit-node-at position 'bison)))
     ^
     let is enough

>     (if (equal (treesit-node-type node)
>                "embedded_code")

There is no need to break the line here.

>         bison-ts-mode-embedded-language
>       'bison)))
>
> (defun bison-ts-mode--font-lock-settings (language)
>   "Return the font-lock settings for Bison.
> LANGUAGE should be set to \\='bison."
>   (treesit-font-lock-rules
>    :language language
>    :feature 'bison-comment
>    '((comment) @font-lock-comment-face)
>
>    :language language
>    :feature 'bison-declaration
>    '((declaration_name) @font-lock-keyword-face)
>
>    :language language
>    :feature 'bison-type
>    '((type) @font-lock-type-face)
>
>    :language language
>    :feature 'bison-grammar-rule-usage
>    '((grammar_rule_identifier) @font-lock-variable-use-face)
>
>    :language language
>    :feature 'bison-grammar-rule-declaration
>    '((grammar_rule (grammar_rule_declaration)
>                    @font-lock-variable-use-face))
>
>    :language language
>    :feature 'bison-string
>    :override t
>    '((string) @font-lock-string-face)
>
>    :language language
>    :feature 'bison-literal
>    :override t
>    '((char_literal) @font-lock-keyword-face
>      (number_literal) @font-lock-number-face)
>
>    :language language
>    :feature 'bison-directive-grammar-rule
>    :override t
>    '((grammar_rule (directive) @font-lock-keyword-face))
>
>    :language language
>    :feature 'bison-operator
>    :override t
>    '(["|"] @font-lock-operator-face)
>
>    :language language
>    :feature 'bison-delimiter
>    :override t
>    '([";"] @font-lock-delimiter-face)))
>
>
> (defvar bison-ts-mode--font-lock-feature-list
>   '(( bison-comment bison-declaration bison-type
>       bison-grammar-rule-usage bison-grammar-rule-declaration
>       bison-string bison-literal bison-directive-grammar-rule
>       bison-operator bison-delimiter)))
>
>
> (defun bison-ts-mode--bison-matcher-action (root-name)
>   "Treesit matcher to check if NODE at BOL is not located in the epilogue.
> ROOT-NAME is the highest-level node of the embedded language."
>   (lambda (node _parent bol &rest _)
>     (if (equal (treesit-node-type (treesit-node-parent node)) root-name)
>         (let* ((bison-node (treesit-node-at bol 'bison)))
           ^
           here again, let is enough

           (if (equal
>                (treesit-node-type
>                 (treesit-node-parent(treesit-node-parent bison-node))) "action")

Though you could bind the (treesit-node-type ...) expression under the
above let.

>               t
>             nil)))))

Why (if foo t nil) when foo would do the same job (equal only returns
nil and t, so normalising the value isn't even necessary).

>
> (defun bison-ts-mode--bison-matcher-not-epilogue (root-name)
>   "Treesit matcher to check if NODE at BOL is not located in the epilogue.
> ROOT-NAME is the highest-level node of the embedded language."
>   (lambda (node _parent bol &rest _)
>     (if (equal (treesit-node-type (treesit-node-parent node)) root-name)
>         (let* ((bison-node (treesit-node-at bol 'bison)))
>           (if (equal (treesit-node-type (treesit-node-parent bison-node)) "epilogue")
>               nil
>             t)))))

Am I missing something, or couldn't these two functions be merged if you
give them a third argument NODE-TYPE and pass it "action" or "epilogue".

>
>
> (defun bison-ts-mode--bison-parent (_node _parent bol &rest _)
>   "Get the parent of the bison node at BOL."
>   (treesit-node-start (treesit-node-parent (treesit-node-at bol 'bison))))
>
>
> (defun bison-ts-mode--indent-rules ()
>   "Indent rules supported by `bison-ts-mode'."
>   (let*
>       ((common
>         `(((node-is "^declaration$")
>            column-0 0)
>           ((and (parent-is "^declaration$")
>                 (not (node-is "^code_block$")))
>            column-0 2)
>           ((and (parent-is "comment") c-ts-common-looking-at-star)
>            c-ts-common-comment-start-after-first-star -1)
>           (c-ts-common-comment-2nd-line-matcher
>            c-ts-common-comment-2nd-line-anchor
>            1)
>           ((parent-is "comment") prev-adaptive-prefix 0)
>
>           ;; Opening and closing brackets "{}" of declarations
>           ((and (parent-is "^declaration$")
>                 (node-is "^code_block$"))
>            column-0 0)
>           ((and (n-p-gp "}" "" "^declaration$"))
>            column-0 0)
>           ((parent-is "^declaration$") parent 2)
>           ((node-is "^grammar_rule$") column-0 0)
>           ((and
>             (parent-is "^grammar_rule$")
>             (node-is ";"))
>            column-0 bison-ts-mode-indent-offset)
>           ((and (parent-is "^grammar_rule$")
>                 (node-is "|"))
>            column-0 bison-ts-mode-indent-offset)
>           ((and (parent-is "^grammar_rule$")
>                 (not (node-is "^grammar_rule_declaration$"))
>                 (not (node-is "^action$")))
>            column-0 ,(+ bison-ts-mode-indent-offset 2))
>           ((or
>             (node-is "^action$")
>             (node-is "^}$"))
>            column-0 12)
>           ;; Set '%%' at the beginning of the line
>           ((or
>             (and (parent-is "^grammar_rules_section$")
>                  (node-is "%%"))
>             (node-is "^grammar_rules_section$"))
>            column-0 0)
>           (no-node parent-bol 0))))
>     `((bison . ,common)
>       ;; Import and override embedded languages rules to add an offset
>       ,(pcase bison-ts-mode-embedded-language
>          ('c `(c
>                ((bison-ts-mode--bison-matcher-action "translation_unit")
>                 bison-ts-mode--bison-parent ,bison-ts-mode-indent-offset)
>                ((bison-ts-mode--bison-matcher-not-epilogue "translation_unit")
>                 column-0 ,bison-ts-mode-indent-offset)
>                ,@(alist-get 'c (c-ts-mode--get-indent-style 'c))))
>          ('cpp `(cpp
>                  ((bison-ts-mode--bison-matcher-action "translation_unit")
>                   bison-ts-mode--bison-parent ,bison-ts-mode-indent-offset)
>                  ((bison-ts-mode--bison-matcher-not-epilogue "translation_unit")
>                   parent-0 ,bison-ts-mode-indent-offset)
>                  ,@(alist-get 'cpp (c-ts-mode--get-indent-style 'cpp))))
>          ('java `(java
>                   ((bison-ts-mode--bison-matcher-action "program")
>                    bison-ts-mode--bison-parent ,bison-ts-mode-indent-offset)
>                   ((bison-ts-mode--bison-matcher-not-epilogue "program")
>                    column-0 ,bison-ts-mode-indent-offset)
>                   ,@java-ts-mode--indent-rules))))))
>
>
> (define-derived-mode bison-ts-mode prog-mode "Bison"
>   "A mode for Bison."
       ^
       major-mode

Also, mentioning tree-sitter seems like something worth doing.

>   (when (treesit-ready-p 'bison)
>     (when (not bison-ts-mode-embedded-language)
>       (setq bison-ts-mode-embedded-language (bison-ts-mode--detect-language)))
>
>     ;; Require only if needed, to avoid warnings if a grammar is not
> 	;; installed but not used.
>     (pcase bison-ts-mode-embedded-language

Would a `pcase-exhaustive' be appropriate here?

>       ('c (require 'c-ts-mode))
>       ('cpp (require 'c-ts-mode))
>       ('java (require 'java-ts-mode)))
>
>     (setq-local treesit-font-lock-settings
>                 (append (bison-ts-mode--font-lock-settings 'bison)
>                         (pcase bison-ts-mode-embedded-language
>                           ('c (c-ts-mode--font-lock-settings 'c))
>                           ('cpp (c-ts-mode--font-lock-settings 'cpp))
>                           ('java java-ts-mode--font-lock-settings))))
>
>     (setq-local treesit-font-lock-feature-list
>                 (if bison-ts-mode-embedded-language
>                     (bison-ts-mode--merge-feature-lists
>                      bison-ts-mode--font-lock-feature-list
>                      (pcase bison-ts-mode-embedded-language
>                        ('c c-ts-mode--feature-list)
>                        ('cpp c-ts-mode--feature-list)
>                        ('java java-ts-mode--feature-list)))
>                   bison-ts-mode--font-lock-feature-list))
>
>     (setq-local treesit-simple-imenu-settings
>                 `(("Grammar"
>                    "\\`grammar_rule_declaration\\'"
>                    nil
>                    (lambda (node) (substring-no-properties (treesit-node-text node))))))
>
>     (c-ts-common-comment-setup)
>
>     (setq-local treesit-simple-indent-rules
>                 (bison-ts-mode--indent-rules))
>
>     (setq-local treesit-language-at-point-function 'bison-ts-mode--language-at-point-function)
>
>     (when bison-ts-mode-embedded-language
>       (setq-local treesit-range-settings
>                   (treesit-range-rules
>                    :embed bison-ts-mode-embedded-language
>                    :host 'bison
>                    :local t
>                    '((embedded_code) @capture))))
>
>     (treesit-major-mode-setup)))
>
> (provide 'bison-ts-mode)
> ;;; bison-ts-mode.el ends here

Sorry for the number of comments, but there has been a discussion on the
code-quality of tree-sitter major modes that has been less than optimal,
so I hope that your contribution could help raise the bar.



  parent reply	other threads:[~2023-09-22  7:38 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-09-21 20:15 New tree-sitter mode: bison-ts-mode Augustin Chéneau (BTuin)
2023-09-21 22:23 ` Stefan Kangas
2023-09-22  5:52 ` Eli Zaretskii
2023-09-22 23:44   ` Yuan Fu
2023-09-23  5:52     ` Eli Zaretskii
2023-09-26  3:42       ` Yuan Fu
2023-09-22  7:38 ` Philip Kaludercic [this message]
2023-09-22 14:53   ` Augustin Chéneau (BTuin)
2023-09-22 20:40     ` Philip Kaludercic
2023-09-22 23:21       ` Augustin Chéneau (BTuin)
2023-09-22  7:42 ` Stefan Kangas
2023-09-22  8:45 ` Yuan Fu
2023-09-24 21:10 ` Yuan Fu
2023-09-26 11:52   ` Augustin Chéneau (BTuin)
2023-09-28  7:03     ` Yuan Fu
     [not found]       ` <b999a251-1778-49ac-90dc-ef8d78d36d53@mailo.com>
2023-09-29  1:26         ` Yuan Fu
2023-09-29 14:13       ` Eli Zaretskii

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=871qeqwtbk.fsf@posteo.net \
    --to=philipk@posteo.net \
    --cc=btuin@mailo.com \
    --cc=emacs-devel@gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).