;;; c-ts-mode.el --- tree-sitter support for C and C++ -*- lexical-binding: t; -*- ;; Copyright (C) 2022 Free Software Foundation, Inc. ;; Author : Theodor Thornhill ;; Maintainer : Theodor Thornhill ;; Created : November 2022 ;; Keywords : c c++ cpp languages tree-sitter ;; This file is part of GNU Emacs. ;; This program is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation, either version 3 of the License, or ;; (at your option) any later version. ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License ;; along with this program. If not, see . ;;; Commentary: ;; ;;; Code: (require 'treesit) (require 'rx) (defcustom c-ts-mode-indent-offset 2 "Number of spaces for each indentation step in `c-ts-mode'." :version "29.1" :type 'integer :safe 'integerp :group 'c) (defcustom c-ts-mode-indent-style 'gnu "Style used for indentation. The selected style could be one of GNU, K&R, LINUX or BSD. If one of the supplied styles doesn't suffice a function could be set instead. This function is expected return a list that follows the form of `treesit-simple-indent-rules'." :version "29.1" :type '(choice (symbol :tag "Gnu" 'gnu) (symbol :tag "K&R" 'k&r) (symbol :tag "Linux" 'linux) (symbol :tag "BSD" 'bsd) (function :tag "A function for user customized style" ignore)) :group 'c) (defvar c-ts-mode--syntax-table (let ((table (make-syntax-table))) ;; Taken from the cc-langs version (modify-syntax-entry ?_ "_" table) (modify-syntax-entry ?\\ "\\" table) (modify-syntax-entry ?+ "." table) (modify-syntax-entry ?- "." table) (modify-syntax-entry ?= "." table) (modify-syntax-entry ?% "." table) (modify-syntax-entry ?< "." table) (modify-syntax-entry ?> "." table) (modify-syntax-entry ?& "." table) (modify-syntax-entry ?| "." table) (modify-syntax-entry ?\' "\"" table) (modify-syntax-entry ?\240 "." table) (modify-syntax-entry ?/ ". 124b" table) (modify-syntax-entry ?* ". 23" table) table) "Syntax table for `c-ts-mode'.") (defun c-ts-mode--indent-styles (mode) "Indent rules supported by `c-ts-mode'. MODE is either `c' or `cpp'." (let ((common `(((parent-is "translation_unit") parent-bol 0) ((node-is ")") parent 1) ((node-is "]") parent-bol 0) ((node-is "}") (and parent parent-bol) 0) ((node-is "else") parent-bol 0) ((node-is "case") parent-bol 0) ((node-is "preproc_arg") no-indent) ((node-is "comment") no-indent) ((parent-is "comment") no-indent) ((node-is "labeled_statement") parent-bol 0) ((parent-is "labeled_statement") parent-bol c-ts-mode-indent-offset) ((match "preproc_ifdef" "compound_statement") point-min 0) ((match "#endif" "preproc_ifdef") point-min 0) ((match "preproc_if" "compound_statement") point-min 0) ((match "#endif" "preproc_if") point-min 0) ((match "preproc_function_def" "compound_statement") point-min 0) ((match "preproc_call" "compound_statement") point-min 0) ((parent-is "compound_statement") (and parent parent-bol) c-ts-mode-indent-offset) ((parent-is "function_definition") parent-bol 0) ((parent-is "conditional_expression") first-sibling 0) ((parent-is "assignment_expression") parent-bol c-ts-mode-indent-offset) ((parent-is "comma_expression") first-sibling 0) ((parent-is "init_declarator") parent-bol c-ts-mode-indent-offset) ((parent-is "parenthesized_expression") first-sibling 1) ((parent-is "argument_list") first-sibling 1) ((parent-is "parameter_list") first-sibling 1) ((parent-is "binary_expression") parent 0) ((query "(for_statement initializer: (_) @indent)") parent-bol 5) ((query "(for_statement condition: (_) @indent)") parent-bol 5) ((query "(for_statement update: (_) @indent)") parent-bol 5) ((query "(call_expression arguments: (_) @indent)") parent c-ts-mode-indent-offset) ((parent-is "call_expression") parent 0) ((parent-is "enumerator_list") parent-bol c-ts-mode-indent-offset) ((parent-is "field_declaration_list") parent-bol c-ts-mode-indent-offset) ((parent-is "initializer_list") parent-bol c-ts-mode-indent-offset) ((parent-is "if_statement") parent-bol c-ts-mode-indent-offset) ((parent-is "for_statement") parent-bol c-ts-mode-indent-offset) ((parent-is "while_statement") parent-bol c-ts-mode-indent-offset) ((parent-is "switch_statement") parent-bol c-ts-mode-indent-offset) ((parent-is "case_statement") parent-bol c-ts-mode-indent-offset) ((parent-is "do_statement") parent-bol c-ts-mode-indent-offset) ,@(when (eq mode 'cpp) `(((node-is "field_initializer_list") parent-bol ,(* c-ts-mode-indent-offset 2))))))) `((gnu ;; Prepend rules to set highest priority ((match "while" "do_statement") parent 0) ,@common) (k&r ,@common) (linux ,@common) (bsd ((parent-is "if_statement") parent-bol 0) ((parent-is "for_statement") parent-bol 0) ((parent-is "while_statement") parent-bol 0) ((parent-is "switch_statement") parent-bol 0) ((parent-is "case_statement") parent-bol 0) ((parent-is "do_statement") parent-bol 0) ,@common)))) (defun c-ts-mode--set-indent-style (mode) "Helper function to set indentation style. MODE is either `c' or `cpp'." (let ((style (if (functionp c-ts-mode-indent-style) (funcall c-ts-mode-indent-style) (pcase c-ts-mode-indent-style ('gnu (alist-get 'gnu (c-ts-mode--indent-styles mode))) ('k&r (alist-get 'k&r (c-ts-mode--indent-styles mode))) ('bsd (alist-get 'bsd (c-ts-mode--indent-styles mode))) ('linux (alist-get 'linux (c-ts-mode--indent-styles mode))))))) `((,mode ,@style)))) (defvar c-ts-mode--preproc-keywords '("#define" "#if" "#ifdef" "#ifndef" "#else" "#elif" "#endif" "#include") "C/C++ keywords for tree-sitter font-locking.") (defun c-ts-mode--keywords (mode) "C/C++ keywords for tree-sitter font-locking. MODE is either `c' or `cpp'." (let ((c-keywords '("break" "case" "const" "continue" "default" "do" "else" "enum" "extern" "for" "goto" "if" "long" "register" "return" "short" "signed" "sizeof" "static" "struct" "switch" "typedef" "union" "unsigned" "volatile" "while"))) (if (eq mode 'cpp) (append c-keywords '("and" "and_eq" "bitand" "bitor" "catch" "class" "co_await" "co_return" "co_yield" "compl" "concept" "consteval" "constexpr" "constinit" "decltype" "delete" "explicit" "final" "friend" "friend" "mutable" "namespace" "new" "noexcept" "not" "not_eq" "operator" "or" "or_eq" "override" "private" "protected" "public" "requires" "template" "throw" "try" "typename" "using" "virtual" "xor" "xor_eq")) (append '("auto") c-keywords)))) (defvar c-ts-mode--operators '("=" "-" "*" "/" "+" "%" "~" "|" "&" "^" "<<" ">>" "->" "." "<" "<=" ">=" ">" "==" "!=" "!" "&&" "||" "-=" "+=" "*=" "/=" "%=" "|=" "&=" "^=" ">>=" "<<=" "--" "++") "C/C++ operators for tree-sitter font-locking.") (defun c-ts-mode--font-lock-settings (mode) "Tree-sitter font-lock settings. MODE is either `c' or `cpp'." (treesit-font-lock-rules :language mode :override t :feature 'comment `((comment) @font-lock-comment-face (comment) @contextual) :language mode :override t :feature 'preprocessor `((preproc_directive) @font-lock-preprocessor-face (preproc_def name: (identifier) @font-lock-variable-name-face) (preproc_ifdef name: (identifier) @font-lock-variable-name-face) (preproc_function_def name: (identifier) @font-lock-function-name-face) (preproc_params (identifier) @font-lock-variable-name-face) (preproc_defined) @font-lock-preprocessor-face (preproc_defined (identifier) @font-lock-variable-name-face) [,@c-ts-mode--preproc-keywords] @font-lock-preprocessor-face) :language mode :override t :feature 'constant `((true) @font-lock-constant-face (false) @font-lock-constant-face (null) @font-lock-constant-face ,@(when (eq mode 'cpp) '((this) @font-lock-constant-face))) :language mode :override t :feature 'keyword `([,@(c-ts-mode--keywords mode)] @font-lock-keyword-face ,@(when (eq mode 'cpp) '((auto) @font-lock-keyword-face))) :language mode :override t :feature 'operator `([,@c-ts-mode--operators] @font-lock-builtin-face) :language mode :override t :feature 'string `((string_literal) @font-lock-string-face ((string_literal)) @contextual (system_lib_string) @font-lock-string-face (escape_sequence) @font-lock-string-face) :language mode :override t :feature 'literal `((number_literal) @font-lock-constant-face (char_literal) @font-lock-constant-face) :language mode :override t :feature 'type `((primitive_type) @font-lock-type-face ,@(when (eq mode 'cpp) '((type_qualifier) @font-lock-type-face (qualified_identifier scope: (namespace_identifier) @font-lock-type-face) (operator_cast) type: (type_identifier) @font-lock-type-face))) :language mode :override t :feature 'definition `((declaration declarator: (identifier) @font-lock-variable-name-face) (declaration type: (type_identifier) @font-lock-type-face) (field_declaration declarator: (field_identifier) @font-lock-variable-name-face) (field_declaration type: (type_identifier) @font-lock-type-face) (parameter_declaration type: (type_identifier) @font-lock-type-face) (function_definition type: (type_identifier) @font-lock-type-face) (function_declarator declarator: (identifier) @font-lock-function-name-face) (array_declarator declarator: (identifier) @font-lock-variable-name-face) (init_declarator declarator: (identifier) @font-lock-variable-name-face) (struct_specifier name: (type_identifier) @font-lock-type-face) (sized_type_specifier) @font-lock-type-face (enum_specifier name: (type_identifier) @font-lock-type-face) (enumerator name: (identifier) @font-lock-variable-name-face) (parameter_declaration type: (_) @font-lock-type-face declarator: (identifier) @font-lock-variable-name-face) (pointer_declarator declarator: (identifier) @font-lock-variable-name-face) (pointer_declarator declarator: (field_identifier) @font-lock-variable-name-face)) :language mode :override t :feature 'expression '((assignment_expression left: (identifier) @font-lock-variable-name-face) (call_expression function: (identifier) @font-lock-function-name-face) (field_expression field: (field_identifier) @font-lock-variable-name-face) (field_expression argument: (identifier) @font-lock-variable-name-face field: (field_identifier) @font-lock-variable-name-face) (pointer_expression argument: (identifier) @font-lock-variable-name-face)) :language mode :override t :feature 'statement '((expression_statement (identifier) @font-lock-variable-name-face) (labeled_statement label: (statement_identifier) @font-lock-type-face)) :language mode :override t :feature 'error '((ERROR) @font-lock-warning-face) :language mode :feature 'emacs-devel :override 't '(((call_expression function: (identifier) @fn) @c-ts-mode--fontify-defun (:match "^DEFUN$" @fn))))) (defun c-ts-mode--fontify-defun (node override start end &rest _) "Correctly fontify the DEFUN macro. For NODE, OVERRIDE, START, and END, see `treesit-font-lock-rules'. The captured NODE is a call_expression where DEFUN is the function. This function corrects the fontification on the colon in \"doc:\", and the parameter list." (let* ((parent (treesit-node-parent node)) ;; ARG-LIST-1 and 2 are like this: ;; ;; DEFUN (ARG-LIST-1) ;; (ARG-LIST-2) (arg-list-1 (treesit-node-children (treesit-node-child-by-field-name node "arguments"))) ;; ARG-LIST-2 is the (arg-list-2 (treesit-node-children (treesit-node-child-by-field-name parent "arguments") t))) ;; Fix the colon. (dolist (node arg-list-1) (when (equal (treesit-node-text node t) ":") (treesit-fontify-with-override (treesit-node-start node) (treesit-node-end node) 'default override))) ;; Fix the parameter list. (while arg-list-2 (let ((type (and arg-list-2 (pop arg-list-2))) (arg (and arg-list-2 (pop arg-list-2)))) (when type (treesit-fontify-with-override (max start (treesit-node-start type)) (min end (treesit-node-end type)) 'font-lock-type-face t)) (when arg (treesit-fontify-with-override (max start(treesit-node-start arg)) (min end (treesit-node-end arg)) 'default t)))))) (defun c-ts-mode--imenu-1 (node) "Helper for `c-ts-mode--imenu'. Find string representation for NODE and set marker, then recurse the subtrees." (let* ((ts-node (car node)) (subtrees (mapcan #'c-ts-mode--imenu-1 (cdr node))) (name (when ts-node (or (treesit-node-text (pcase (treesit-node-type ts-node) ("function_definition" (treesit-node-child-by-field-name (treesit-node-child-by-field-name ts-node "declarator") "declarator")) ("declaration" (let ((child (treesit-node-child ts-node -1 t))) (pcase (treesit-node-type child) ("identifier" child) (_ (treesit-node-child-by-field-name child "declarator"))))) ("struct_specifier" (treesit-node-child-by-field-name ts-node "name"))) t) "Unnamed node"))) (marker (when ts-node (set-marker (make-marker) (treesit-node-start ts-node))))) ;; A struct_specifier could be inside a parameter list, another ;; struct definition, a variable declaration, a function ;; declaration. In those cases we don't include it. (cond ((string-match-p (rx (or "parameter_declaration" "field_declaration" "declaration" "function_definition")) (or (treesit-node-type (treesit-node-parent ts-node)) "")) nil) ((and (equal (treesit-node-type ts-node) "declaration") (not (equal (treesit-node-type (treesit-node-parent ts-node)) "translation_unit"))) nil) ((null ts-node) subtrees) (subtrees `((,name ,(cons name marker) ,@subtrees))) (t `((,name . ,marker)))))) (defun c-ts-mode--imenu () "Return Imenu alist for the current buffer." (let* ((node (treesit-buffer-root-node)) (func-tree (treesit-induce-sparse-tree node "^function_definition$")) (var-tree (treesit-induce-sparse-tree node "^declaration$")) (struct-tree (treesit-induce-sparse-tree node "^struct_specifier$"))) `(("Struct" . ,(c-ts-mode--imenu-1 struct-tree)) ("Variable" . ,(c-ts-mode--imenu-1 var-tree)) ("Function" . ,(c-ts-mode--imenu-1 func-tree))))) ;;;###autoload (define-derived-mode c-ts-mode--base-mode prog-mode "C" "Major mode for editing C, powered by tree-sitter." :syntax-table c-ts-mode--syntax-table ;; Navigation. (setq-local treesit-defun-type-regexp (rx (or "specifier" "definition"))) ;; Indent. (when (eq c-ts-mode-indent-style 'linux) (setq-local indent-tabs-mode t)) ;; Electric (setq-local electric-indent-chars (append "{}():;," electric-indent-chars)) ;; Imenu. (setq-local imenu-create-index-function #'c-ts-mode--imenu) (setq-local which-func-functions nil) (setq-local treesit-font-lock-feature-list '((comment preprocessor operator constant string literal keyword) (type definition expression statement) (error)))) ;;;###autoload (define-derived-mode c-ts-mode c-ts-mode--base-mode "C" "Major mode for editing C, powered by tree-sitter." :group 'c (unless (treesit-ready-p nil 'c) (error "Tree-sitter for C isn't available")) (treesit-parser-create 'c) ;; Comments. (setq-local comment-start "/* ") (setq-local comment-start-skip "\\(?://+\\|/\\*+\\)\\s *") (setq-local comment-end " */") (setq-local treesit-simple-indent-rules (c-ts-mode--set-indent-style 'c)) ;; Font-lock. (setq-local treesit-font-lock-settings (c-ts-mode--font-lock-settings 'c)) (treesit-major-mode-setup)) ;;;###autoload (define-derived-mode c++-ts-mode c-ts-mode--base-mode "C++" "Major mode for editing C, powered by tree-sitter." :group 'c++ (unless (treesit-ready-p nil 'cpp) (error "Tree-sitter for C++ isn't available")) ;; Comments. (setq-local comment-start "// ") (setq-local comment-start-skip "\\(?://+\\|/\\*+\\)\\s *") (setq-local comment-end "") (treesit-parser-create 'cpp) (setq-local treesit-simple-indent-rules (c-ts-mode--set-indent-style 'cpp)) ;; Font-lock. (setq-local treesit-font-lock-settings (c-ts-mode--font-lock-settings 'cpp)) (treesit-major-mode-setup)) (provide 'c-ts-mode) ;;; c-ts-mode.el ends here