;;; tree-sitter.el --- tree-sitter utilities -*- lexical-binding: t -*-
;; Copyright (C) 2021 Free Software Foundation, Inc.
;; This file is part of GNU Emacs.
;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs. If not, see .
;;; Commentary:
;;; Code:
;;; Node & parser accessors
(defun tree-sitter-node-buffer (node)
"Return the buffer in where NODE belongs."
(tree-sitter-parser-buffer
(tree-sitter-node-parser node)))
;;; Parser API supplement
(defun tree-sitter-get-parser (name)
"Find the first parser with name NAME in `tree-sitter-parser-list'.
Return nil if we can't find any."
(catch 'found
(dolist (parser tree-sitter-parser-list)
(when (equal name (tree-sitter-parser-name parser))
(throw 'found parser)))))
(defun tree-sitter-get-parser-create (name language)
"Find the first parser with name NAME in `tree-sitter-parser-list'.
If none exists, create one and return it. LANGUAGE is passed to
`tree-sitter-create-parser' when creating the parser."
(or (tree-sitter-get-parser name)
(tree-sitter-create-parser (current-buffer) language name)))
;;; Node API supplement
(defun tree-sitter-node-beginning (node)
"Return the start position of NODE."
(byte-to-position (tree-sitter-node-start-byte node)))
(defun tree-sitter-node-end (node)
"Return the end position of NODE."
(byte-to-position (tree-sitter-node-end-byte node)))
(defun tree-sitter-node-in-range (beg end &optional parser-name named)
"Return the smallest node covering BEG to END.
Find node in current buffer. Return nil if none find. If NAMED
non-nil, only look for named node. NAMED defaults to nil. By
default, use the first parser in `tree-sitter-parser-list'; but
if PARSER-NAME is non-nil, it specifies the name of the parser that
should be used."
(when-let ((root (tree-sitter-parser-root-node
(if parser-name
(tree-sitter-get-parser parser-name)
(car tree-sitter-parser-list)))))
(tree-sitter-node-descendant-for-byte-range
root (position-bytes beg) (position-bytes end) named)))
(defun tree-sitter-filter-child (node pred &optional named)
"Return children of NODE that satisfies PRED.
PRED is a function that takes one argument, the child node. If
NAMED non-nil, only search named node. NAMED defaults to nil."
(let ((child (tree-sitter-node-child node 0 named))
result)
(while child
(when (funcall pred child)
(push child result))
(setq child (tree-sitter-node-next-sibling child named)))
result))
(defun tree-sitter-node-content (node)
"Return the buffer content corresponding to NODE."
(with-current-buffer (tree-sitter-node-buffer node)
(buffer-substring-no-properties
(tree-sitter-node-beginning node)
(tree-sitter-node-end node))))
;;; Font-lock
(defvar-local tree-sitter-font-lock-settings nil
"A list of settings for tree-sitter-based font-locking.
Each setting controls one parser (often of different language).
A settings is a list of form (NAME LANGUAGE PATTERN). NAME is
the name given to the parser, by convention it is
\"font-lock-\", where is the language that
the parser uses. LANGUAGE is the language object returned by
tree-sitter language dynamic modules.
PATTERN is a tree-sitter query pattern. (See manual for how to
write query patterns.) This pattern should capture nodes with
either face symbols or function symbols. If captured with a face
symbol, the node's corresponding text in the buffer is fontified
with that face; if captured with a function symbol, the function
is called with three arguments, BEG END NODE, where BEG and END
marks the span of the corresponding text, and NODE is the node
itself. If a symbol is both a face and a function, it is treated
as a face.")
(defun tree-sitter-fontify-region-function (beg end &optional verbose)
"Fontify the region between BEG and END.
If VERBOSE is non-nil, print status messages.
\(See `font-lock-fontify-region-function'.)"
(dolist (elm tree-sitter-font-lock-settings)
(let ((parser-name (car elm))
(language (nth 1 elm))
(match-pattern (nth 2 elm)))
(tree-sitter-get-parser-create parser-name language)
(when-let ((node (tree-sitter-node-in-range beg end parser-name)))
(let ((captures (tree-sitter-query-capture
node match-pattern
;; specifying the range is important. More
;; often than not, NODE will be the root
;; node, and if we don't specify the range,
;; we are basically querying the whole file.
(position-bytes beg) (position-bytes end))))
(with-silent-modifications
(while captures
(let* ((face (caar captures))
(node (cdar captures))
(beg (tree-sitter-node-beginning node))
(end (tree-sitter-node-end node)))
(cond ((facep face)
(put-text-property beg end 'face face))
((functionp face)
(funcall face beg end node)))
(if verbose
(message "Fontifying text from %d to %d with %s"
beg end face)))
(setq captures (cdr captures))))
`(jit-lock-bounds ,(tree-sitter-node-beginning node)
. ,(tree-sitter-node-end node)))))))
(define-derived-mode json-mode js-mode "JSON"
"Major mode for JSON documents."
(setq-local font-lock-fontify-region-function
#'tree-sitter-fontify-region-function)
(setq-local tree-sitter-font-lock-settings
`(("font-lock-json"
,(tree-sitter-json)
"(string) @font-lock-string-face
(true) @font-lock-constant-face
(false) @font-lock-constant-face
(null) @font-lock-constant-face"))))
(defun ts-c-fontify-system-lib (beg end _)
(put-text-property beg (1+ beg) 'face 'font-lock-preprocessor-face)
(put-text-property (1- end) end 'face 'font-lock-preprocessor-face)
(put-text-property (1+ beg) (1- end)
'face 'font-lock-string-face))
(define-derived-mode ts-c-mode prog-mode "TS C"
"C mode with tree-sitter support."
(setq-local font-lock-fontify-region-function
#'tree-sitter-fontify-region-function)
(setq-local tree-sitter-font-lock-settings
`(("font-lock-c"
,(tree-sitter-c)
"(null) @font-lock-constant-face
(true) @font-lock-constant-face
(false) @font-lock-constant-face
(comment) @font-lock-comment-face
(system_lib_string) @ts-c-fontify-system-lib
(unary_expression
operator: _ @font-lock-negation-char-face)
(string_literal) @font-lock-string-face
(char_literal) @font-lock-string-face
(function_definition
declarator: (identifier) @font-lock-function-name-face)
(declaration
declarator: (identifier) @font-lock-function-name-face)
(function_declarator
declarator: (identifier) @font-lock-function-name-face)
(init_declarator
declarator: (identifier) @font-lock-variable-name-face)
(parameter_declaration
declarator: (identifier) @font-lock-variable-name-face)
(preproc_def
name: (identifier) @font-lock-variable-name-face)
(enumerator
name: (identifier) @font-lock-variable-name-face)
(field_identifier) @font-lock-variable-name-face
(parameter_list
(parameter_declaration
(identifier) @font-lock-variable-name-face))
(pointer_declarator
declarator: (identifier) @font-lock-variable-name-face)
(array_declarator
declarator: (identifier) @font-lock-variable-name-face)
(preproc_function_def
name: (identifier) @font-lock-variable-name-face
parameters: (preproc_params
(identifier) @font-lock-variable-name-face))
(type_identifier) @font-lock-type-face
(primitive_type) @font-lock-type-face
\"auto\" @font-lock-keyword-face
\"break\" @font-lock-keyword-face
\"case\" @font-lock-keyword-face
\"const\" @font-lock-keyword-face
\"continue\" @font-lock-keyword-face
\"default\" @font-lock-keyword-face
\"do\" @font-lock-keyword-face
\"else\" @font-lock-keyword-face
\"enum\" @font-lock-keyword-face
\"extern\" @font-lock-keyword-face
\"for\" @font-lock-keyword-face
\"goto\" @font-lock-keyword-face
\"if\" @font-lock-keyword-face
\"register\" @font-lock-keyword-face
\"return\" @font-lock-keyword-face
\"sizeof\" @font-lock-keyword-face
\"static\" @font-lock-keyword-face
\"struct\" @font-lock-keyword-face
\"switch\" @font-lock-keyword-face
\"typedef\" @font-lock-keyword-face
\"union\" @font-lock-keyword-face
\"volatile\" @font-lock-keyword-face
\"while\" @font-lock-keyword-face
\"long\" @font-lock-type-face
\"short\" @font-lock-type-face
\"signed\" @font-lock-type-face
\"unsigned\" @font-lock-type-face
\"#include\" @font-lock-preprocessor-face
\"#define\" @font-lock-preprocessor-face
\"#ifdef\" @font-lock-preprocessor-face
\"#ifndef\" @font-lock-preprocessor-face
\"#endif\" @font-lock-preprocessor-face
\"#else\" @font-lock-preprocessor-face
\"#elif\" @font-lock-preprocessor-face"))))
(add-to-list 'auto-mode-alist '("\\.json\\'" . json-mode))
(add-to-list 'auto-mode-alist '("\\.tsc\\'" . ts-c-mode))
(provide 'tree-sitter)
;;; tree-sitter.el ends here