all messages for Emacs-related lists mirrored at yhetil.org
 help / color / mirror / code / Atom feed
* Tree sitter for RelaxNG Compact and rnc-ts-mode
@ 2023-08-21 15:52 LdBeth
  0 siblings, 0 replies; only message in thread
From: LdBeth @ 2023-08-21 15:52 UTC (permalink / raw)
  To: Emacs Devel; +Cc: Stefan Monnier

[-- Attachment #1: Type: text/plain, Size: 443 bytes --]

Hello List!

I have been using https://elpa.gnu.org/packages/rnc-mode.html for a
while and was somewhat unsatisfied with its limited font locking and
indenting capability. I tried to extend it but still cannot supported
annotation syntax. So after Emacs 29 I made a tree sitter parser
for RelaxNG Compact and also a rnc-ts-mode.

I'm looking for contribute the emacs mode to ELPA, and even better if
can update the existing rnc-mode.

LdBeth


[-- Attachment #2: grammar.js --]
[-- Type: application/javascript, Size: 6966 bytes --]

/* Tree Sitter Parser for RelaxNG Compact
 *
 * Copyright 2023 LdBeth
 *
 * This program is free software: you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program. If not, see <https://www.gnu.org/licenses/>.
 */
const NCNAME = /[_0-9A-Za-z][_0-9A-Za-z\-\.]*/;

module.exports = grammar({
  name: 'rnc',
  extras: $ => [
    /\s/,
    $.comment
  ],
  word: $ => $._NCName,
  rules: {
    source_file: $ => seq(
      repeat($.declare),
      choice($._innerPattern, repeat($._grammarContent))
    ),

    declare: $ => choice(
      seq('namespace',
          field('name', $.identifier),
          '=',
          field('uri', $._namespaceURILiteral)),
      seq('default', 'namespace',
          optional(field('name', $.identifier)),
          '=',
          field('uri', $._namespaceURILiteral)),
      seq('datatypes', field('name', $.identifier),
          '=', field('uri', $.literal))
    ),

    _grammarContent: $ => choice(
      $.annotation_element,
      choice(
        $.define,
        $.grammar_div,
        $.include)
    ),

    grammar_div: $ => seq(
      optional($.annotation),
      'div', $.grammar_block),

    grammar_block: $ => seq('{', repeat($._grammarContent), '}'),

    include: $ => seq(
      optional($.annotation),
      'include',
      field('uri', $.literal),
      optional($._inherit),
      $.include_block
    ),

    include_block: $ => seq(
      '{',
      choice($.define, $.include_div),
      '}'),

    include_div: $ => seq(optional($.annotation), 'div', $.include_block),

    define: $ => seq(
      optional($.annotation),
      field('name', $.identifier),
      choice('=', '|=', '&='),
      field('body', $._innerPattern)
    ),

    _innerPattern: $ => choice(
      $.pattern,
      $.choice_pattern,
      $.group_pattern,
      $.interleave_pattern,
      $.repeated_pattern,
    ),

    _primaryPattern: $ => seq(
      optional($.annotation),
      choice(
        $.primary,
        seq('(', $._innerPattern, ')')),
    ),

    pattern: $ => seq(
      $._primaryPattern, repeat($.follow_annotation)),

    primary: $ => choice(
      seq('element', $._nameClass, $.pattern_block),
      seq('attribute', $._nameClass, $.pattern_block),
      seq('list', $.pattern_block),
      seq('mixed', $.pattern_block),
      'empty', 'text', 'notAllowed',
      $.identifier,
      $.datatype,
      seq('parent', $.identifier),
      seq('grammar',  $.grammar_block),
      $.external
    ),

    external: $ => seq(
      'external',
      field('uri', $.literal),
      $._inherit
    ),
    datatype: $ => choice(
      seq(optional(field('name', $.datatype_name)),
          field('value', $.literal)),
      seq(field('name', $.datatype_name),
          optional(field('param', $.param_block)),
          optional(seq('-', field('except', alias($._primaryPattern, $.pattern)))))
    ),

    pattern_block: $ => seq('{', $._innerPattern, '}'),

    param_block: $ => seq('{', repeat($.param), '}'),
    param: $ => seq(
      optional($.annotation),
      field('name', $.identifier),
      '=',
      field('value', $.literal)),

    repeated_pattern: $ => seq(
      alias($._primaryPattern, $.pattern), choice('*', '+', '?')),

    _particle: $ => choice($.pattern, $.repeated_pattern),

    choice_pattern: $ => $._patternChoice,
    group_pattern: $ =>  $._patternGroup,
    interleave_pattern: $ => $._patternInterleave,

    _patternChoice: $ => choice(
      seq($._particle, '|', $._particle),
      seq($._particle, '|', $._patternChoice),
    ),

    _patternGroup: $ => choice(
      seq($._particle, ',', $._particle),
      seq($._particle, ',', $._patternGroup),
    ),

    _patternInterleave: $ => choice(
      seq($._particle, '&', $._particle),
      seq($._particle, '&', $._patternInterleave),
    ),

    _namespaceURILiteral: $ => choice(
      $.literal,
      'inherit'),
    _inherit: $ => seq('inherit', '=', field('ns', $.identifier)),

    literal: $ => $._literal,
    _literal: $ => choice(
      $.literal_segment,
      seq($.literal_segment, '~', $._literal)
    ),
    literal_segment: $ => token(choice(
      seq("'", /[^'\n]*/, "'"),
      seq('"', /[^"\n]*/, '"'),
      seq('"""', /("?"?[^"])*/, '"""'),
      seq("'''", /('?'?[^"])*/, "'''")
    )),

    _nameClass: $ => choice(
      $.simple_name_class,
      $.choice_name_class,
      $.except_name_class
    ),

    simple_name_class: $ => seq(
      $._simpleNameClass,
      repeat($.follow_annotation)),

    except_name_class: $ => seq(
      optional($.annotation),
      field('name', $.name), '-',
      field('except', alias($._simpleNameClass, $.simple_name_class)),
      repeat($.follow_annotation)),

    _simpleNameClass: $ => seq(
      optional($.annotation),
      choice(
        $.name,
        seq('(', $._nameClass, ')')),
    ),

    choice_name_class: $ => $._nameClassChoice,
    _nameClassChoice: $ => choice(
      seq($.simple_name_class, '|', $.simple_name_class),
      seq($.simple_name_class, '|', $._nameClassChoice),
    ),

    datatype_name: $ => choice($._CName, 'string', 'token'),


    identifier: $ => choice($._NCName, $._QuotedName),
    name: $ => choice(
      $._NCName,
      $._QuotedName,
      $._CName,
      seq(field('ns', $.prefix), token.immediate(':*')),
      '*'
    ),
    _CName: $ => seq(field('ns', $.prefix), token.immediate(seq(':', NCNAME))),
    prefix: $ => $._NCName,
    _NCName: _ => NCNAME,
    _QuotedName: _ => token(seq('\\', NCNAME)),

    annotation: $ => choice(
      repeat1($.documentation),
      seq(optional(repeat1($.documentation)), $.annotation_block)
    ),

    annotation_block: $ => seq(
      '[',
      repeat($.annotation_attribute),
      repeat($.annotation_element),
      ']'),
    annotation_attribute: $ => seq(
      field('name', alias($.element_name, $.name)),
      '=',
      field('value', $.literal)),
    annotation_element: $ => seq(
      alias($.element_name, $.name),
      $.annotation_element_block
    ),
    annotation_element_block: $ => seq(
      '[',
      repeat($.annotation_attribute),
      repeat(choice($.annotation_element, $.literal)),
      ']'),
    element_name: $ => choice(
      $._NCName,
      $._QuotedName,
      $._CName
    ),

    follow_annotation: $ => seq(
      '>>', $.annotation_element
    ),

    comment: $ => token(prec(-10, /#.*/)),
    documentation: $ => token(/##.*/)
  }
});
/*
  Local Variables:
  js-indent-level: 2
  End:
*/

[-- Attachment #3: rnc-ts-mode.el --]
[-- Type: application/octet-stream, Size: 5226 bytes --]

;;; rnc-ts-mode.el --- Emacs mode to edit Relax-NG Compact files  -*- lexical-binding:t -*-

;; Copyright (C) 2023 LdBeth

;; Author: LdBeth <ldbeth@sdf.org>
;; Keywords: xml relaxng
;; Version: 0.3

;; This file is not part of GNU Emacs.

;; rnc-ts-mode is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.

;; rnc-ts-mode is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.


;;; Commentary:

(require 'nxml-mode)
(require 'treesit)
(eval-when-compile
  (require 'rx))

;;; Code:

;;;###autoload
(add-to-list 'auto-mode-alist '("\\.rnc\\'" . rnc-ts-mode))

(defconst rnc-mode-syntax-table
  (let ((st (make-syntax-table)))
    (modify-syntax-entry ?# "<" st)
    (modify-syntax-entry ?\n ">" st)
    (modify-syntax-entry ?\" "\"" st)
    (modify-syntax-entry ?\' "\"" st)
    (modify-syntax-entry ?- "_" st)
    (modify-syntax-entry ?. "_" st)
    (modify-syntax-entry ?: "_" st)
    (modify-syntax-entry ?_ "_" st)
    st))

(defconst rnc--keywords
  ;; Taken from the grammar in http://relaxng.org/compact-20021121.html,
  ;; by order of appearance.
  '("namespace" "default" "datatypes" "element" "attribute"
    "list" "mixed" "parent" "empty" "text" "notAllowed" "external"
    "grammar" "div" "include" ;; "start"
    "string" "token" "inherit"))

(defconst rnc--operators
  '("=" "&=" "|=" "*" "?" "+" "-" "~"))

(defconst rnc--delimiters '("&" "," "|")) 

(defvar rnc-indent-level 2)

(defvar rnc--treesit-font-lock-settings
  (treesit-font-lock-rules

   :language 'rnc
   :feature 'comment
   '((comment) @font-lock-comment-face)

   :language 'rnc
   :feature 'keyword
   `([,@rnc--keywords] @font-lock-keyword-face)

   :language 'rnc
   :feature 'string
   '((literal_segment) @font-lock-string-face)

   :language 'rnc
   :feature 'definition
   '((define
      name: (identifier) @font-lock-function-name-face)

     (param
      name: (identifier) @font-lock-variable-name-face)

     (annotation_attribute
      name: (name) @font-lock-variable-name-face))

   :language 'rnc
   :feature 'namespace
   :override t
   '((declare
      name: (identifier) @font-lock-constant-face)
     (name
      ns: (prefix) @font-lock-constant-face)
     (datatype_name
      ns: (prefix) @font-lock-constant-face))

   :language 'rnc
   :feature 'docstring
   '((documentation) @font-lock-doc-face)

   :language 'rnc
   :feature 'operator
   `([,@rnc--operators] @font-lock-operator-face)

   :language 'rnc
   :feature 'bracket
   '((["(" ")" "[" "]" "{" "}"]) @font-lock-bracket-face)

   :language 'rnc
   :feature 'delimiter
   `(([,@rnc--delimiters]) @font-lock-delimiter-face)))

(defvar rnc--treesit-indent-rules
  `((rnc
     ((parent-is "declare") parent-bol 0)
     ((node-is "}") grand-parent 0)
     ((node-is ")") prev-sibling 0)
     ((node-is "]") grand-parent 0)
     ((node-is "literal_segment") parent-bol 0)
     ((node-is "follow_annotation") prev-sibling 0)
     ((parent-is "comment") prev-adaptive-prefix 0)
     ((parent-is ,(rx (seq (one-or-more alpha) "_name_class")))
      first-sibling 0)
     ((node-is "documentation") parent-bol 0)
     ((parent-is ,(rx (seq (one-or-more alpha) "_pattern"))) first-sibling 0)
     ((parent-is ,(rx (seq (one-or-more alpha) "_block"))) grand-parent rnc-indent-level)
     ((parent-is "param") great-grand-parent rnc-indent-level)
     ((parent-is "primary") great-grand-parent rnc-indent-level)
     ((field-is "body") parent-bol rnc-indent-level)
     ((field-is "except") parent-bol rnc-indent-level)
     )))

(defun rnc--treesit-defun-name (node)
  "Return the defun name of NODE.
Return nil if there is no name or if NODE is not a defun node."
  (treesit-node-text
   (treesit-node-child-by-field-name
    node
    "name")
   t))

;;;###autoload
(define-derived-mode rnc-ts-mode prog-mode "RNC"
  "Major mode to edit Relax-NG Compact files."
  :syntax-table rnc-mode-syntax-table
  (when (treesit-ready-p 'rnc)
    (setq-local comment-start "#")
    (treesit-parser-create 'rnc)
    (setq-local treesit-font-lock-settings rnc--treesit-font-lock-settings)
    (setq-local treesit-font-lock-feature-list
                '((comment definition)
                  (keyword string)
                  (bracket delimiter operator docstring namespace)))
    (setq-local treesit-defun-type-regexp (rx bos (or "define" "declare") eos))
    (setq-local treesit-defun-name-function #'rnc--treesit-defun-name)
    (setq-local treesit-simple-indent-rules rnc--treesit-indent-rules)
    (setq-local treesit-simple-imenu-settings
                `(("Definition" ,(rx bos "define" eos) nil nil)
                  ("Namespace" ,(rx bos "declare" eos)
                   nil nil)))
    (treesit-major-mode-setup)))


(provide 'rnc-ts-mode)
;;; rnc-ts-mode.el ends here

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-08-21 15:52 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-08-21 15:52 Tree sitter for RelaxNG Compact and rnc-ts-mode LdBeth

Code repositories for project(s) associated with this external index

	https://git.savannah.gnu.org/cgit/emacs.git
	https://git.savannah.gnu.org/cgit/emacs/org-mode.git

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.