;;; nxml-mode.el --- a new XML mode -*- lexical-binding:t -*- ;; Copyright (C) 2003-2004, 2007-2023 Free Software Foundation, Inc. ;; Author: James Clark ;; Keywords: wp, hypermedia, languages, XML ;; This file is part of GNU Emacs. ;; GNU Emacs is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation, either version 3 of the License, or ;; (at your option) any later version. ;; GNU Emacs is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License ;; along with GNU Emacs. If not, see . ;;; Commentary: ;; See nxml-rap.el for description of parsing strategy. ;;; Code: (eval-when-compile (require 'cl-lib)) (require 'xmltok) (require 'nxml-enc) (require 'nxml-util) (require 'nxml-rap) (require 'nxml-outln) ;; nxml-mode calls rng-nxml-mode-init, which is autoloaded from rng-nxml. ;; So we might as well just require it and silence the compiler. (provide 'nxml-mode) ; avoid recursive require (require 'rng-nxml) (require 'sgml-mode) ;;; Customization (defgroup nxml nil "New XML editing mode." :link '(custom-manual "(nxml-mode) Top") :group 'languages) (defgroup nxml-faces nil "Faces for XML syntax highlighting." :group 'nxml) (defcustom nxml-char-ref-display-glyph-flag t "Non-nil means display glyph following character reference. The glyph is displayed in face `nxml-glyph'." :group 'nxml :type 'boolean :safe #'booleanp) (defcustom nxml-sexp-element-flag t "Non-nil means sexp commands treat an element as a single expression." :version "27.1" ; nil -> t :group 'nxml :type 'boolean :safe #'booleanp) (defcustom nxml-slash-auto-complete-flag nil "Non-nil means typing a slash automatically completes the end-tag. This is used by `nxml-electric-slash'." :group 'nxml :type 'boolean :safe #'booleanp) (defcustom nxml-child-indent 2 "Indentation for the children of an element relative to the start-tag. This only applies when the line or lines containing the start-tag contains nothing else other than that start-tag." :group 'nxml :type 'integer :safe #'integerp) (defcustom nxml-attribute-indent 4 "Indentation for the attributes of an element relative to the start-tag. This only applies when the first attribute of a tag starts a line. In other cases, the first attribute on one line is indented the same as the first attribute on the previous line." :group 'nxml :type 'integer :safe #'integerp) (defcustom nxml-bind-meta-tab-to-complete-flag t "Non-nil means to use nXML completion in \\[completion-at-point]." :group 'nxml :type 'boolean :safe #'booleanp) (defcustom nxml-prefer-utf-16-to-utf-8-flag nil "Non-nil means prefer UTF-16 to UTF-8 when saving a buffer. This is used only when a buffer does not contain an encoding declaration and when its current `buffer-file-coding-system' specifies neither UTF-16 nor UTF-8." :group 'nxml :type 'boolean :safe #'booleanp) (defcustom nxml-prefer-utf-16-little-to-big-endian-flag (eq system-type 'windows-nt) "Non-nil means prefer little-endian to big-endian byte-order for UTF-16. This is used only for saving a buffer; when reading the byte-order is auto-detected. It may be relevant both when there is no encoding declaration and when the encoding declaration specifies `UTF-16'." :group 'nxml :type 'boolean :safe #'booleanp) (defcustom nxml-default-buffer-file-coding-system nil "Default value for `buffer-file-coding-system' for a buffer for a new file. A value of nil means use the default value of `buffer-file-coding-system' as normal. A buffer's `buffer-file-coding-system' affects what \\[nxml-insert-xml-declaration] inserts." :group 'nxml :type 'coding-system :safe #'coding-system-p) (defcustom nxml-auto-insert-xml-declaration-flag nil "Non-nil means automatically insert an XML declaration in a new file. The XML declaration is inserted using `nxml-insert-xml-declaration'." :group 'nxml :type 'boolean :safe #'booleanp) (defface nxml-delimited-data '((t (:inherit font-lock-doc-face))) "Face used to highlight data enclosed between delimiters. This is not used directly, but only via inheritance by other faces." :group 'nxml-faces) (defface nxml-name '((t (:inherit font-lock-builtin-face))) "Face used to highlight various names. This includes element and attribute names, processing instruction targets and the CDATA keyword in a CDATA section. This is not used directly, but only via inheritance by other faces." :group 'nxml-faces) (defface nxml-ref '((t (:inherit font-lock-constant-face))) "Face used to highlight character and entity references. This is not used directly, but only via inheritance by other faces." :group 'nxml-faces) (defface nxml-delimiter nil "Face used to highlight delimiters. This is not used directly, but only via inheritance by other faces." :group 'nxml-faces) (defface nxml-text nil "Face used to highlight text." :group 'nxml-faces) (defface nxml-processing-instruction-delimiter '((t (:inherit nxml-delimiter))) "Face used for the delimiters of processing instructions, i.e., ." :group 'nxml-faces) (defface nxml-processing-instruction-target '((t (:inherit font-lock-keyword-face))) "Face used for the target of processing instructions." :group 'nxml-faces) (defface nxml-processing-instruction-content '((t (:inherit nxml-delimited-data))) "Face used for the content of processing instructions." :group 'nxml-faces) (defface nxml-cdata-section-delimiter '((t (:inherit nxml-delimiter))) "Face used for the delimiters of CDATA sections, i.e., ." :group 'nxml-faces) (defface nxml-cdata-section-CDATA '((t (:inherit nxml-name))) "Face used for the CDATA keyword in CDATA sections." :group 'nxml-faces) (defface nxml-cdata-section-content '((t (:inherit nxml-text))) "Face used for the content of CDATA sections." :group 'nxml-faces) (defface nxml-char-ref-number '((t (:inherit nxml-ref))) "Face used for the number in character references. This includes the `x' in hex references." :group 'nxml-faces) (defface nxml-char-ref-delimiter '((t (:inherit nxml-ref))) "Face used for the delimiters of character references, i.e., &# and ;." :group 'nxml-faces) (defface nxml-entity-ref-name '((t (:inherit nxml-ref))) "Face used for the entity name in general entity references." :group 'nxml-faces) (defface nxml-entity-ref-delimiter '((t (:inherit nxml-ref))) "Face used for the delimiters of entity references, i.e., & and ;." :group 'nxml-faces) (defface nxml-tag-delimiter '((t (:inherit nxml-delimiter))) "Face used for the angle brackets delimiting tags. `nxml-tag-slash' is used for slashes." :group 'nxml-faces) (defface nxml-tag-slash '((t (:inherit nxml-tag-delimiter))) "Face used for slashes in tags, both in end-tags and empty-elements." :group 'nxml-faces) (defface nxml-element-prefix '((t (:inherit nxml-name))) "Face used for the prefix of elements." :group 'nxml-faces) (defface nxml-element-colon nil "Face used for the colon in element names." :group 'nxml-faces) (defface nxml-element-local-name '((t (:inherit font-lock-function-name-face))) "Face used for the local name of elements." :group 'nxml-faces) (defface nxml-attribute-prefix '((t (:inherit nxml-name))) "Face used for the prefix of attributes." :group 'nxml-faces) (defface nxml-attribute-colon '((t (:inherit nxml-delimiter))) "Face used for the colon in attribute names." :group 'nxml-faces) (defface nxml-attribute-local-name '((t (:inherit font-lock-variable-name-face))) "Face used for the local name of attributes." :group 'nxml-faces) (defface nxml-namespace-attribute-xmlns '((t (:inherit nxml-attribute-prefix))) "Face used for `xmlns' in namespace attributes." :group 'nxml-faces) (defface nxml-namespace-attribute-colon '((t (:inherit nxml-attribute-colon))) "Face used for the colon in namespace attributes." :group 'nxml-faces) (defface nxml-namespace-attribute-prefix '((t (:inherit nxml-attribute-local-name))) "Face used for the prefix declared in namespace attributes." :group 'nxml-faces) (defface nxml-attribute-value '((t (:inherit font-lock-string-face))) "Face used for the value of attributes." :group 'nxml-faces) (defface nxml-attribute-value-delimiter '((t (:inherit nxml-attribute-value))) "Face used for the delimiters of attribute values." :group 'nxml-faces) (defface nxml-prolog-literal-delimiter '((t (:inherit nxml-delimited-data))) "Face used for the delimiters of literals in the prolog." :group 'nxml-faces) (defface nxml-prolog-literal-content '((t (:inherit nxml-delimited-data))) "Face used for the content of literals in the prolog." :group 'nxml-faces) (defface nxml-prolog-keyword '((t (:inherit font-lock-keyword-face))) "Face used for keywords in the prolog." :group 'nxml-faces) (defface nxml-markup-declaration-delimiter '((t (:inherit nxml-delimiter))) "Face used for the delimiters of markup declarations in the prolog. The delimiters are ." :group 'nxml-faces) (defface nxml-hash '((t (:inherit nxml-name))) "Face used for # before a name in the prolog." :group 'nxml-faces) (defface nxml-glyph '((((type x)) (:family "misc-fixed" :background "light grey" :foreground "black" :weight normal :slant normal)) (t (:background "light grey" :foreground "black" :weight normal :slant normal))) "Face used for glyph for char references." :group 'nxml-faces) ;;; Global variables (defvar-local nxml-parent-document nil "The parent document for a part of a modular document. Use `nxml-parent-document-set' to set it.") (put 'nxml-parent-document 'safe-local-variable 'stringp) (defvar-local nxml-prolog-regions nil "List of regions in the prolog to be fontified. See the function `xmltok-forward-prolog' for more information.") (defvar-local nxml-degraded nil "Non-nil if currently operating in degraded mode. Degraded mode is enabled when an internal error is encountered in the fontification or after-change functions.") (defvar nxml-completion-hook nil "Hook run by `nxml-complete'. This hook is run until success.") (defvar nxml-in-mixed-content-hook nil "Hook to determine whether point is in mixed content. The hook is called without arguments. It should return nil if it is definitely not mixed; non-nil otherwise. The hook will be run until one of the functions returns nil.") (defvar nxml-mixed-scan-distance 4000 "Maximum distance from point to scan when checking for mixed content.") (defvar nxml-end-tag-indent-scan-distance 4000 "Maximum distance from point to scan backwards when indenting end-tag.") (defvar-local nxml-char-ref-extra-display t "Non-nil means display extra information for character references. The extra information consists of a tooltip with the character name and, if `nxml-char-ref-display-glyph-flag' is non-nil, a glyph corresponding to the referenced character following the character reference.") (defvar-keymap nxml-mode-map :doc "Keymap for `nxml-mode'." "C-M-u" #'nxml-backward-up-element "C-M-d" #'nxml-down-element "C-M-n" #'nxml-forward-element "C-M-p" #'nxml-backward-element "M-{" #'nxml-backward-paragraph "M-}" #'nxml-forward-paragraph "M-h" #'nxml-mark-paragraph "C-c C-f" #'nxml-finish-element "C-c ]" #'nxml-finish-element "C-c /" #'nxml-finish-element "C-c C-m" #'nxml-split-element "C-c C-b" #'nxml-balanced-close-start-tag-block "C-c C-i" #'nxml-balanced-close-start-tag-inline "C-c C-x" #'nxml-insert-xml-declaration "C-c C-d" #'nxml-dynamic-markup-word ;; u is for Unicode "C-c C-u" #'nxml-insert-named-char "C-c C-o" nxml-outline-prefix-map "/" #'nxml-electric-slash "M-TAB" #'completion-at-point "S-" #'nxml-mouse-hide-direct-text-content) (defvar nxml-font-lock-keywords '(nxml-fontify-matcher) "Default font lock keywords for `nxml-mode'.") (defsubst nxml-set-face (start end face) (when (and face (< start end)) ;; Prepend, so the character reference highlighting takes precedence over ;; the string highlighting applied syntactically. (font-lock-prepend-text-property start end 'face face))) (defun nxml-parent-document-set (parent-document) "Set `nxml-parent-document' and inherit the DTD &c." ;; FIXME: this does not work. ;; the idea is that by inheriting some variables from the parent, ;; `rng-validate-mode' will validate entities declared in the parent. ;; alas, the most interesting variables (`rng-compile-table' et al) ;; are circular and cannot be printed even with `print-circle'. (interactive "fParent document") (let (dtd current-schema current-schema-file-name compile-table ipattern-table last-ipattern-index) (when (string= (file-truename parent-document) (file-truename buffer-file-name)) (error "Parent document cannot be the same as the document")) (with-current-buffer (find-file-noselect parent-document) (setq dtd rng-dtd current-schema rng-current-schema current-schema-file-name rng-current-schema-file-name compile-table rng-compile-table ipattern-table rng-ipattern-table last-ipattern-index rng-last-ipattern-index parent-document buffer-file-name)) (setq rng-dtd dtd rng-current-schema current-schema rng-current-schema-file-name current-schema-file-name rng-compile-table compile-table rng-ipattern-table ipattern-table rng-last-ipattern-index last-ipattern-index nxml-parent-document parent-document) (message "Set parent document to %s" parent-document) (when rng-validate-mode (rng-validate-while-idle (current-buffer))))) (defvar nxml-prolog-end) ;; nxml-rap.el (defun nxml-syntax-propertize (start end) "Syntactic keywords for `nxml-mode'." ;; Like `sgml-syntax-propertize', but rescan prolog if needed. (when (< start nxml-prolog-end) (nxml-scan-prolog)) (sgml-syntax-propertize start end)) (defvar tildify-space-string) (defvar tildify-foreach-region-function) ;;;###autoload (define-derived-mode nxml-mode text-mode "nXML" ;; We use C-c C-i instead of \\[nxml-balanced-close-start-tag-inline] ;; because Emacs turns C-c C-i into C-c TAB which is hard to type and ;; not mnemonic. "Major mode for editing XML. \\ \\[nxml-finish-element] finishes the current element by inserting an end-tag. C-c C-i closes a start-tag with `>' and then inserts a balancing end-tag leaving point between the start-tag and end-tag. \\[nxml-balanced-close-start-tag-block] is similar but for block rather than inline elements: the start-tag, point, and end-tag are all left on separate lines. If `nxml-slash-auto-complete-flag' is non-nil, then inserting a `") ("<" . ">")))) (setq-local mode-line-process '((nxml-degraded "/degraded"))) ;; We'll determine the fill prefix ourselves (setq-local adaptive-fill-mode nil) (setq-local forward-sexp-function #'nxml-forward-balanced-item) (setq-local indent-line-function #'nxml-indent-line) (setq-local fill-paragraph-function #'nxml-do-fill-paragraph) ;; Comment support ;; This doesn't seem to work too well; ;; I think we should probably roll our own nxml-comment-dwim function. (setq-local comment-indent-function #'nxml-indent-line) (setq-local comment-start "") (setq-local comment-end-skip "[ \t\r\n]*-->") (setq-local comment-line-break-function #'nxml-newline-and-indent) (setq-local comment-quote-nested-function #'nxml-comment-quote-nested) (save-excursion (save-restriction (widen) (with-silent-modifications (nxml-scan-prolog)))) (setq-local syntax-ppss-table sgml-tag-syntax-table) (setq-local syntax-propertize-function #'nxml-syntax-propertize) (add-function :filter-return (local 'filter-buffer-substring-function) #'nxml--buffer-substring-filter) (add-hook 'change-major-mode-hook #'nxml-cleanup nil t) (when (not (and (buffer-file-name) (file-exists-p (buffer-file-name)))) (when (and nxml-default-buffer-file-coding-system (not (local-variable-p 'buffer-file-coding-system))) (setq buffer-file-coding-system nxml-default-buffer-file-coding-system)) ;; When starting a new file, insert the XML declaration. (when (and nxml-auto-insert-xml-declaration-flag (zerop (buffer-size))) (nxml-insert-xml-declaration))) (setq font-lock-defaults '(nxml-font-lock-keywords nil ; highlight comments and strings based on syntax-tables nil ; font-lock-keywords-case-fold-search. XML is case sensitive nil ; no special syntax table (font-lock-extend-region-functions . (nxml-extend-region)) (jit-lock-contextually . t) (font-lock-unfontify-region-function . nxml-unfontify-region) (font-lock-syntactic-face-function . sgml-font-lock-syntactic-face))) (with-demoted-errors "RNG NXML error: %S" (rng-nxml-mode-init))) (defun nxml--buffer-substring-filter (string) ;; The `rng-state' property is huge, so don't copy it to the kill ring. ;; This avoids problems when saving the kill ring with savehist. (when (seq-find (lambda (elem) (plist-get (nth 2 elem) 'rng-state)) (object-intervals string)) (remove-text-properties 0 (length string) '(rng-state nil) string)) string) (defun nxml-cleanup () "Clean up after `nxml-mode'." ;; Disable associated minor modes. (rng-validate-mode -1) ;; Clean up fontification. (save-excursion (widen) (with-silent-modifications (remove-text-properties (point-min) (point-max) '(face nil)))) (remove-hook 'change-major-mode-hook #'nxml-cleanup t)) (defun nxml-degrade (context err) (message "Internal nXML mode error in %s (%s), degrading" context (error-message-string err)) (ding) (setq nxml-degraded t) (setq nxml-prolog-end 1)) ;;; Change management (defvar font-lock-beg) (defvar font-lock-end) (defun nxml-debug-region (start end) (interactive "r") (let ((font-lock-beg start) (font-lock-end end)) (nxml-extend-region) (goto-char font-lock-beg) (set-mark font-lock-end))) ;;; Encodings (defun nxml-insert-xml-declaration () "Insert an XML declaration at the beginning of buffer. The XML declaration will declare an encoding depending on the buffer's `buffer-file-coding-system'." (interactive "*") (let ((coding-system (if (and buffer-file-coding-system (coding-system-p buffer-file-coding-system) (coding-system-get buffer-file-coding-system 'mime-charset)) buffer-file-coding-system (nxml-choose-utf-coding-system)))) (goto-char (point-min)) (insert (format "\n" (nxml-coding-system-name coding-system))))) (defun nxml-prepare-to-save () (unless (and (not enable-multibyte-characters) (local-variable-p 'buffer-file-coding-system) buffer-file-coding-system (or (eq (coding-system-type buffer-file-coding-system) 5) (eq buffer-file-coding-system 'no-conversion))) (save-excursion (setq buffer-file-coding-system (nxml-select-coding-system)))) ;; nil from a function in `write-contents-hooks' means ;; to continue and write the file as normal nil) (defun nxml-select-coding-system () (let* ((suitable-coding-systems (find-coding-systems-region (point-min) (point-max))) (enc-pos (progn (goto-char (point-min)) (xmltok-get-declared-encoding-position))) (enc-name (and (consp enc-pos) (buffer-substring-no-properties (car enc-pos) (cdr enc-pos)))) (coding-system (cond (enc-name (if (string= (downcase enc-name) "utf-16") (nxml-choose-utf-16-coding-system) (nxml-mime-charset-coding-system enc-name))) (enc-pos (nxml-choose-utf-coding-system))))) ;; Make sure we have a coding-system (unless coding-system (setq coding-system (and (not buffer-read-only) (nxml-choose-suitable-coding-system suitable-coding-systems))) (let ((message (if enc-name (format "Unknown encoding %s" enc-name) "XML declaration is not well-formed"))) (cond ((not coding-system) (error "%s" message)) ((y-or-n-p (concat message ". " (format (if enc-name "Save with %s" "Modify and save with encoding %s") (nxml-coding-system-name coding-system)) " ")) (nxml-fix-encoding-declaration enc-pos coding-system)) (t (signal 'quit nil))))) ;; Make sure it can encode all the characters in the buffer (unless (or (memq (coding-system-base coding-system) suitable-coding-systems) (equal suitable-coding-systems '(undecided))) (let ((message (nxml-unsuitable-coding-system-message coding-system enc-name))) (setq coding-system (and (not buffer-read-only) (nxml-choose-suitable-coding-system suitable-coding-systems))) (cond ((not coding-system) (error "%s" message)) ((y-or-n-p (concat message (format ". Save with %s " (nxml-coding-system-name coding-system)))) (nxml-fix-encoding-declaration enc-pos coding-system)) (t (signal 'quit nil))))) ;; Merge the newline type of our existing encoding (let ((current-eol-type (coding-system-eol-type buffer-file-coding-system))) (when (and current-eol-type (integerp current-eol-type)) (setq coding-system (coding-system-change-eol-conversion coding-system current-eol-type)))) coding-system)) (defun nxml-unsuitable-coding-system-message (coding-system &optional enc-name) (if (nxml-coding-system-unicode-p coding-system) "Cannot translate some characters to Unicode" (format "Cannot encode some characters with %s" (or enc-name (nxml-coding-system-name coding-system))))) (defconst nxml-utf-16-coding-systems (and (coding-system-p 'utf-16-be) (coding-system-p 'utf-16-le) '(utf-16-be utf-16-le))) (defconst nxml-utf-coding-systems (cons 'utf-8 nxml-utf-16-coding-systems)) (defun nxml-coding-system-unicode-p (coding-system) (nxml-coding-system-member (coding-system-base coding-system) nxml-utf-coding-systems)) (defun nxml-coding-system-name (coding-system) (setq coding-system (coding-system-base coding-system)) (symbol-name (if (nxml-coding-system-member coding-system nxml-utf-16-coding-systems) 'utf-16 (or (coding-system-get coding-system 'mime-charset) coding-system)))) (defun nxml-fix-encoding-declaration (enc-pos coding-system) (let ((charset (nxml-coding-system-name coding-system))) (cond ((consp enc-pos) (delete-region (car enc-pos) (cdr enc-pos)) (goto-char (car enc-pos)) (insert charset)) ((integerp enc-pos) (goto-char enc-pos) (insert " encoding=\"" charset ?\")) (t (goto-char (point-min)) (insert "\n") (when (and (not enc-pos) (let ((case-fold-search t)) (looking-at xmltok-bad-xml-decl-regexp))) (delete-region (point) (match-end 0))))))) (defun nxml-choose-suitable-coding-system (suitable-coding-systems) (let (ret coding-system) (if (and buffer-file-coding-system (memq (coding-system-base buffer-file-coding-system) suitable-coding-systems)) buffer-file-coding-system (while (and suitable-coding-systems (not ret)) (setq coding-system (car suitable-coding-systems)) (if (coding-system-get coding-system 'mime-charset) (setq ret coding-system) (setq suitable-coding-systems (cdr suitable-coding-systems)))) ret))) (defun nxml-choose-utf-coding-system () (let ((cur (and (local-variable-p 'buffer-file-coding-system) buffer-file-coding-system (coding-system-base buffer-file-coding-system)))) (cond ((car (nxml-coding-system-member cur nxml-utf-coding-systems))) ((and nxml-prefer-utf-16-to-utf-8-flag (coding-system-p 'utf-16-le) (coding-system-p 'utf-16-be)) (if nxml-prefer-utf-16-little-to-big-endian-flag 'utf-16-le 'utf-16-be)) (t 'utf-8)))) (defun nxml-choose-utf-16-coding-system () (let ((cur (and (local-variable-p 'buffer-file-coding-system) buffer-file-coding-system (coding-system-base buffer-file-coding-system)))) (cond ((car (nxml-coding-system-member cur nxml-utf-16-coding-systems))) (nxml-prefer-utf-16-little-to-big-endian-flag (and (coding-system-p 'utf-16-le) 'utf-16-le)) (t (and (coding-system-p 'utf-16-be) 'utf-16-be))))) (defun nxml-coding-system-member (coding-system coding-systems) (let (ret) (while (and coding-systems (not ret)) (if (coding-system-equal coding-system (car coding-systems)) (setq ret coding-systems) (setq coding-systems (cdr coding-systems)))) ret)) ;;; Fontification (defun nxml-unfontify-region (start end) (font-lock-default-unfontify-region start end) (nxml-clear-char-ref-extra-display start end)) (defun nxml-extend-region () "Extend the region to hold the minimum area we can fontify with nXML. Called with `font-lock-beg' and `font-lock-end' dynamically bound." (let ((start font-lock-beg) (end font-lock-end)) (nxml-debug-change "nxml-extend-region(input)" start end) (when (< start nxml-prolog-end) (setq start (point-min))) (cond ((<= end nxml-prolog-end) (setq end nxml-prolog-end)) (t (goto-char start) ;; some font-lock backends (like Emacs 22 jit-lock) snap ;; the region to the beginning of the line no matter what ;; we say here. To mitigate the resulting excess ;; fontification, ignore leading whitespace. (skip-syntax-forward " ") ;; find the beginning of the previous tag (when (not (equal (char-after) ?\<)) (search-backward "<" nxml-prolog-end t)) (nxml-ensure-scan-up-to-date) (nxml-move-outside-backwards) (setq start (point)) (while (< (point) end) (nxml-tokenize-forward)) (setq end (point)))) (when (or (< start font-lock-beg) (> end font-lock-end)) (setq font-lock-beg start font-lock-end end) (nxml-debug-change "nxml-extend-region" start end) t))) (defun nxml-fontify-matcher (bound) "Called as font-lock keyword matcher." (syntax-propertize bound) (unless nxml-degraded (nxml-debug-change "nxml-fontify-matcher" (point) bound) (when (< (point) nxml-prolog-end) ;; Prolog needs to be fontified in one go, and ;; nxml-extend-region makes sure we start at BOB. (cl-assert (bobp)) (nxml-fontify-prolog) (goto-char nxml-prolog-end)) (let (xmltok-errors) (while (and (nxml-tokenize-forward) (<= (point) bound)) ; Intervals are open-ended. (nxml-apply-fontify-rule))) ) ;; Since we did the fontification internally, tell font-lock to not ;; do anything itself. nil) (defun nxml-fontify-prolog () "Fontify the prolog. The buffer is assumed to be prepared for fontification. This does not set the fontified property, but it does clear faces appropriately." (let ((regions nxml-prolog-regions)) (while regions (let ((region (car regions))) (nxml-apply-fontify-rule (aref region 0) (aref region 1) (aref region 2))) (setq regions (cdr regions))))) ;; Vectors identify a substring of the token to be highlighted in some face. ;; Token types returned by xmltok-forward. (put 'start-tag 'nxml-fontify-rule '([nil 1 nxml-tag-delimiter] [-1 nil nxml-tag-delimiter] (element-qname . 1) attributes)) (put 'partial-start-tag 'nxml-fontify-rule '([nil 1 nxml-tag-delimiter] (element-qname . 1) attributes)) (put 'end-tag 'nxml-fontify-rule '([nil 1 nxml-tag-delimiter] [1 2 nxml-tag-slash] [-1 nil nxml-tag-delimiter] (element-qname . 2))) (put 'partial-end-tag 'nxml-fontify-rule '([nil 1 nxml-tag-delimiter] [1 2 nxml-tag-slash] (element-qname . 2))) (put 'empty-element 'nxml-fontify-rule '([nil 1 nxml-tag-delimiter] [-2 -1 nxml-tag-slash] [-1 nil nxml-tag-delimiter] (element-qname . 1) attributes)) (put 'partial-empty-element 'nxml-fontify-rule '([nil 1 nxml-tag-delimiter] [-1 nil nxml-tag-slash] (element-qname . 1) attributes)) (put 'char-ref 'nxml-fontify-rule '([nil 2 nxml-char-ref-delimiter] [2 -1 nxml-char-ref-number] [-1 nil nxml-char-ref-delimiter] char-ref)) (put 'entity-ref 'nxml-fontify-rule '([nil 1 nxml-entity-ref-delimiter] [1 -1 nxml-entity-ref-name] [-1 nil nxml-entity-ref-delimiter])) ;; (put 'comment ;; 'nxml-fontify-rule ;; '([nil 4 nxml-comment-delimiter] ;; [4 -3 nxml-comment-content] ;; [-3 nil nxml-comment-delimiter])) (put 'processing-instruction 'nxml-fontify-rule '([nil 2 nxml-processing-instruction-delimiter] [-2 nil nxml-processing-instruction-delimiter] processing-instruction-content)) (put 'cdata-section 'nxml-fontify-rule '([nil 3 nxml-cdata-section-delimiter] ; [-3 nil nxml-cdata-section-delimiter])) (put 'data 'nxml-fontify-rule '([nil nil nxml-text])) ;; Prolog region types in list returned by xmltok-forward-prolog. (put 'xml-declaration 'nxml-fontify-rule '([nil 2 nxml-processing-instruction-delimiter] [2 5 nxml-processing-instruction-target] [-2 nil nxml-processing-instruction-delimiter])) (put 'xml-declaration-attribute-name 'nxml-fontify-rule '([nil nil nxml-attribute-local-name])) (put 'xml-declaration-attribute-value ;FIXME: What is this for? 'nxml-fontify-rule '([nil 1 nxml-attribute-value-delimiter] [1 -1 nxml-attribute-value] [-1 nil nxml-attribute-value-delimiter])) (put 'processing-instruction-left 'nxml-fontify-rule '([nil 2 nxml-processing-instruction-delimiter] [2 nil nxml-processing-instruction-target])) (put 'processing-instruction-right 'nxml-fontify-rule '([nil -2 nxml-processing-instruction-content] [-2 nil nxml-processing-instruction-delimiter])) (put 'literal 'nxml-fontify-rule '([nil 1 nxml-prolog-literal-delimiter] [1 -1 nxml-prolog-literal-content] [-1 nil nxml-prolog-literal-delimiter])) (put 'keyword 'nxml-fontify-rule '([nil nil nxml-prolog-keyword])) (put 'markup-declaration-open 'nxml-fontify-rule '([0 2 nxml-markup-declaration-delimiter] [2 nil nxml-prolog-keyword])) (put 'markup-declaration-close 'nxml-fontify-rule '([nil nil nxml-markup-declaration-delimiter])) (put 'internal-subset-open 'nxml-fontify-rule '([nil nil nxml-markup-declaration-delimiter])) (put 'internal-subset-close 'nxml-fontify-rule '([nil 1 nxml-markup-declaration-delimiter] [-1 nil nxml-markup-declaration-delimiter])) (put 'hash-name 'nxml-fontify-rule '([nil 1 nxml-hash] [1 nil nxml-prolog-keyword])) (defun nxml-apply-fontify-rule (&optional type start end) (let ((rule (get (or type xmltok-type) 'nxml-fontify-rule))) (unless start (setq start xmltok-start)) (unless end (setq end (point))) (while rule (let* ((action (car rule))) (setq rule (cdr rule)) (cond ((vectorp action) (nxml-set-face (let ((offset (aref action 0))) (cond ((not offset) start) ((< offset 0) (+ end offset)) (t (+ start offset)))) (let ((offset (aref action 1))) (cond ((not offset) end) ((< offset 0) (+ end offset)) (t (+ start offset)))) (aref action 2))) ((and (consp action) (eq (car action) 'element-qname)) (when xmltok-name-end ; maybe nil in partial-end-tag case (nxml-fontify-qname (+ start (cdr action)) xmltok-name-colon xmltok-name-end 'nxml-element-prefix 'nxml-element-colon 'nxml-element-local-name))) ((eq action 'attributes) (nxml-fontify-attributes)) ((eq action 'processing-instruction-content) (nxml-set-face (+ start 2) xmltok-name-end 'nxml-processing-instruction-target) (nxml-set-face (save-excursion (goto-char xmltok-name-end) (skip-chars-forward " \t\r\n") (point)) (- end 2) 'nxml-processing-instruction-content)) ((eq action 'char-ref) (nxml-char-ref-display-extra start end (xmltok-char-number start end))) (t (error "Invalid nxml-fontify-rule action %s" action))))))) (defun nxml-fontify-attributes () (while xmltok-namespace-attributes (nxml-fontify-attribute (car xmltok-namespace-attributes) 'namespace) (setq xmltok-namespace-attributes (cdr xmltok-namespace-attributes))) (while xmltok-attributes (nxml-fontify-attribute (car xmltok-attributes)) (setq xmltok-attributes (cdr xmltok-attributes)))) (defun nxml-fontify-attribute (att &optional namespace-declaration) (if namespace-declaration (nxml-fontify-qname (xmltok-attribute-name-start att) (xmltok-attribute-name-colon att) (xmltok-attribute-name-end att) 'nxml-namespace-attribute-xmlns 'nxml-namespace-attribute-colon 'nxml-namespace-attribute-prefix 'nxml-namespace-attribute-xmlns) (nxml-fontify-qname (xmltok-attribute-name-start att) (xmltok-attribute-name-colon att) (xmltok-attribute-name-end att) 'nxml-attribute-prefix 'nxml-attribute-colon 'nxml-attribute-local-name)) (dolist (ref (xmltok-attribute-refs att)) (let* ((ref-type (aref ref 0)) (ref-start (aref ref 1)) (ref-end (aref ref 2))) (nxml-apply-fontify-rule ref-type ref-start ref-end)))) (defun nxml-fontify-qname (start colon end prefix-face colon-face local-name-face &optional unprefixed-face) (cond (colon (nxml-set-face start colon prefix-face) (nxml-set-face colon (1+ colon) colon-face) (nxml-set-face (1+ colon) end local-name-face)) (t (nxml-set-face start end (or unprefixed-face local-name-face))))) ;;; Editing (defun nxml-electric-slash (arg) "Insert a slash. With a prefix ARG, do nothing other than insert the slash. Otherwise, if `nxml-slash-auto-complete-flag' is non-nil, insert the rest of the end-tag or empty-element if the slash is potentially part of an end-tag or the close of an empty-element. If the slash is part of an end-tag that is the first non-whitespace on the line, reindent the line." (interactive "*P") (nxml-ensure-scan-up-to-date) (let* ((slash-pos (point)) (end-tag-p (and (eq (char-before slash-pos) ?<) (not (nxml-get-inside slash-pos)))) (at-indentation (save-excursion (back-to-indentation) (eq (point) (1- slash-pos))))) (self-insert-command (prefix-numeric-value arg)) (unless arg (if nxml-slash-auto-complete-flag (if end-tag-p (condition-case nil (let ((start-tag-end (nxml-scan-element-backward (1- slash-pos) t))) (when start-tag-end (insert (xmltok-start-tag-qname) ">") ;; copy the indentation of the start-tag (when (and at-indentation (save-excursion (goto-char xmltok-start) (back-to-indentation) (eq (point) xmltok-start))) (save-excursion (indent-line-to (save-excursion (goto-char xmltok-start) (current-column))))))) (nxml-scan-error nil)) (when (and (eq (nxml-token-before) (point)) (eq xmltok-type 'partial-empty-element)) (insert ">")))) (when (and end-tag-p at-indentation) (nxml-indent-line))))) (defun nxml-balanced-close-start-tag-block () "Close the start-tag before point with `>' and insert a balancing end-tag. Point is left between the start-tag and the end-tag. If there is nothing but whitespace before the `<' that opens the start-tag, then put point on a blank line, and put the end-tag on another line aligned with the start-tag." (interactive "*") (nxml-balanced-close-start-tag 'block)) (defun nxml-balanced-close-start-tag-inline () "Close the start-tag before point with `>' and insert a balancing end-tag. Point is left between the start-tag and the end-tag. No extra whitespace is inserted." (interactive "*") (nxml-balanced-close-start-tag 'inline)) (defun nxml-balanced-close-start-tag (block-or-inline) (let ((token-end (nxml-token-before)) (pos (1+ (point))) (token-start xmltok-start)) (unless (or (eq xmltok-type 'partial-start-tag) (and (memq xmltok-type '(start-tag empty-element partial-empty-element)) (>= token-end pos))) (error "Not in a start-tag")) ;; Note that this insertion changes xmltok-start. (insert ">") (if (eq block-or-inline 'inline) (goto-char pos) (goto-char token-start) (back-to-indentation) (if (= (point) token-start) (let ((indent (current-column))) (goto-char pos) (insert "\n") (indent-line-to indent) (goto-char pos) (insert "\n") (indent-line-to (+ nxml-child-indent indent))) (goto-char pos))))) (defun nxml-finish-element () "Finish the current element by inserting an end-tag." (interactive "*") (nxml-finish-element-1 nil)) (defvar nxml-last-split-position nil "Position where `nxml-split-element' split the current element.") (defun nxml-split-element () "Split the current element by inserting an end-tag and a start-tag. Point is left after the newly inserted start-tag. When repeated, split immediately before the previously inserted start-tag and leave point unchanged." (interactive "*") (setq nxml-last-split-position (if (and (eq last-command this-command) nxml-last-split-position) (save-excursion (goto-char nxml-last-split-position) (nxml-finish-element-1 t)) (nxml-finish-element-1 t)))) (defun nxml-finish-element-1 (startp) "Insert an end-tag for the current element and optionally a start-tag. The start-tag is inserted if STARTP is non-nil. Return the position of the inserted start-tag or nil if none was inserted." (interactive "*") (let* ((token-end (nxml-token-before)) (start-tag-end (save-excursion (when (and (< (point) token-end) (memq xmltok-type '(cdata-section processing-instruction comment start-tag end-tag empty-element))) (error "Point is inside a %s" (nxml-token-type-friendly-name xmltok-type))) (nxml-scan-element-backward token-end t))) (starts-line (save-excursion (unless (eq xmltok-type 'start-tag) (error "No matching start-tag")) (goto-char xmltok-start) (back-to-indentation) (eq (point) xmltok-start))) (ends-line (save-excursion (goto-char start-tag-end) (looking-at "[ \t\r\n]*$"))) (start-tag-indent (save-excursion (goto-char xmltok-start) (current-column))) (qname (xmltok-start-tag-qname)) inserted-start-tag-pos) (when (and starts-line ends-line) ;; start-tag is on a line by itself ;; => put the end-tag on a line by itself (unless (<= (point) (save-excursion (back-to-indentation) (point))) (insert "\n")) (indent-line-to start-tag-indent)) (insert "") (when startp (when starts-line (insert "\n") (indent-line-to start-tag-indent)) (setq inserted-start-tag-pos (point)) (insert "<" qname ">") (when (and starts-line ends-line) (insert "\n") (indent-line-to (save-excursion (goto-char xmltok-start) (forward-line 1) (back-to-indentation) (if (= (current-column) (+ start-tag-indent nxml-child-indent)) (+ start-tag-indent nxml-child-indent) start-tag-indent))))) inserted-start-tag-pos)) (defun nxml-comment-quote-nested (_cs _ce unp) "Quote nested comments in buffer. See `comment-quote-nested-function' for more information." (goto-char (point-min)) (save-match-data (while (re-search-forward "-[\\]*-" nil t) (goto-char (match-beginning 0)) (forward-char 1) (if unp (delete-char 1) (insert "\\"))))) ;;; Indentation (defun nxml-indent-line () "Indent current line as XML." (let* ((savep (point)) (indent (condition-case nil (save-excursion (forward-line 0) (skip-chars-forward " \t") (if (>= (point) savep) (setq savep nil)) (or (nxml-compute-indent) 0)) (error 0)))) (if (not (numberp indent)) ;; If something funny is used (e.g. `noindent'), return it. indent (if (< indent 0) (setq indent 0)) ;Just in case. (if savep (save-excursion (indent-line-to indent)) (indent-line-to indent))))) (defun nxml-compute-indent () "Return the indent for the line containing point." (or (nxml-compute-indent-from-matching-start-tag) (nxml-compute-indent-from-previous-line))) (defun nxml-compute-indent-from-matching-start-tag () "Compute the indent for a line with an end-tag using the matching start-tag. When the line containing point ends with an end-tag and does not start in the middle of a token, return the indent of the line containing the matching start-tag, if there is one and it occurs at the beginning of its line. Otherwise return nil." (save-excursion (back-to-indentation) (let ((bol (point))) (let ((inhibit-field-text-motion t)) (end-of-line)) (skip-chars-backward " \t") (and (= (nxml-token-before) (point)) (memq xmltok-type '(end-tag partial-end-tag)) ;; start of line must not be inside a token (or (= xmltok-start bol) (save-excursion (goto-char bol) (nxml-token-after) (= xmltok-start bol)) (eq xmltok-type 'data)) (condition-case nil (nxml-scan-element-backward (point) nil (- (point) nxml-end-tag-indent-scan-distance)) (nxml-scan-error nil)) (< xmltok-start bol) (progn (goto-char xmltok-start) (skip-chars-backward " \t") (bolp)) (current-indentation))))) (defun nxml-compute-indent-from-previous-line () "Compute the indent for a line using the indentation of a previous line." (save-excursion (end-of-line) (let ((eol (point)) bol prev-bol ref before-context after-context) (back-to-indentation) (setq bol (point)) (catch 'indent ;; Move backwards until the start of a non-blank line that is ;; not inside a token. (while (progn (when (= (forward-line -1) -1) (throw 'indent 0)) (back-to-indentation) (if (looking-at "[ \t]*$") t (or prev-bol (setq prev-bol (point))) (nxml-token-after) (not (or (= xmltok-start (point)) (eq xmltok-type 'data)))))) (setq ref (point)) ;; Now scan over tokens until the end of the line to be indented. ;; Determine the context before and after the beginning of the ;; line. (while (< (point) eol) (nxml-tokenize-forward) (cond ((<= bol xmltok-start) (setq after-context (nxml-merge-indent-context-type after-context))) ((and (<= (point) bol) (not (and (eq xmltok-type 'partial-start-tag) (= (point) bol)))) (setq before-context (nxml-merge-indent-context-type before-context))) ((eq xmltok-type 'data) (setq before-context (nxml-merge-indent-context-type before-context)) (setq after-context (nxml-merge-indent-context-type after-context))) ;; If in the middle of a token that looks inline, ;; then indent relative to the previous non-blank line ((eq (nxml-merge-indent-context-type before-context) 'mixed) (goto-char prev-bol) (throw 'indent (current-column))) (t (throw 'indent (nxml-compute-indent-in-token bol)))) (skip-chars-forward " \t\r\n")) (goto-char ref) (+ (current-column) (* nxml-child-indent (+ (if (eq before-context 'start-tag) 1 0) (if (eq after-context 'end-tag) -1 0)))))))) (defun nxml-merge-indent-context-type (context) "Merge the indent context type CONTEXT with the token in `xmltok-type'. Return the merged indent context type. An indent context type is either nil or one of the symbols `start-tag', `end-tag', `markup', `comment', `mixed'." (cond ((memq xmltok-type '(start-tag partial-start-tag)) (if (memq context '(nil start-tag comment)) 'start-tag 'mixed)) ((memq xmltok-type '(end-tag partial-end-tag)) (if (memq context '(nil end-tag comment)) 'end-tag 'mixed)) ((eq xmltok-type 'comment) (cond ((memq context '(start-tag end-tag comment)) context) (context 'mixed) (t 'comment))) (context 'mixed) (t 'markup))) (defun nxml-compute-indent-in-token (pos) "Return the indent for a line that starts inside a token. POS is the position of the first non-whitespace character of the line. This expects the xmltok-* variables to be set up as by `xmltok-forward'." (cond ((memq xmltok-type '(start-tag partial-start-tag empty-element partial-empty-element)) (nxml-compute-indent-in-start-tag pos)) ((eq xmltok-type 'comment) (nxml-compute-indent-in-delimited-token pos "")) ((eq xmltok-type 'cdata-section) (nxml-compute-indent-in-delimited-token pos "")) ((eq xmltok-type 'processing-instruction) (nxml-compute-indent-in-delimited-token pos "")) (t (goto-char pos) (if (and (= (forward-line -1) 0) (< xmltok-start (point))) (back-to-indentation) (goto-char xmltok-start)) (current-column)))) (defun nxml-compute-indent-in-start-tag (pos) "Return the indent for a line that starts inside a start-tag. Also for a line that starts inside an empty element. POS is the position of the first non-whitespace character of the line. This expects the xmltok-* variables to be set up as by `xmltok-forward'." (let ((value-boundary (nxml-attribute-value-boundary pos)) (off 0)) (if value-boundary ;; inside an attribute value (let ((value-start (car value-boundary))) (goto-char pos) (forward-line -1) (if (< (point) value-start) (goto-char value-start) (back-to-indentation))) ;; outside an attribute value (goto-char pos) (while (and (= (forward-line -1) 0) (nxml-attribute-value-boundary (point)))) (cond ((<= (point) xmltok-start) (goto-char xmltok-start) (setq off nxml-attribute-indent) (let ((atts (xmltok-merge-attributes))) (when atts (let* ((att (car atts)) (start (xmltok-attribute-name-start att))) (when (< start pos) (goto-char start) (setq off 0)))))) (t (back-to-indentation)))) (+ (current-column) off))) (defun nxml-attribute-value-boundary (pos) "Return a pair (START . END) if POS is inside an attribute value. Otherwise return nil. START and END are the positions of the start and end of the attribute value containing POS. This expects the xmltok-* variables to be set up as by `xmltok-forward'." (let ((atts (xmltok-merge-attributes)) att value-start value-end value-boundary) (while atts (setq att (car atts)) (setq value-start (xmltok-attribute-value-start att)) (setq value-end (xmltok-attribute-value-end att)) (cond ((and value-start (< pos value-start)) (setq atts nil)) ((and value-start value-end (<= pos value-end)) (setq value-boundary (cons value-start value-end)) (setq atts nil)) (t (setq atts (cdr atts))))) value-boundary)) (defun nxml-compute-indent-in-delimited-token (pos open-delim close-delim) "Return the indent for a line that starts inside a token with delimiters. OPEN-DELIM and CLOSE-DELIM are strings giving the opening and closing delimiters. POS is the position of the first non-whitespace character of the line. This expects the xmltok-* variables to be set up as by `xmltok-forward'." (cond ((let ((end (+ pos (length close-delim)))) (and (<= end (point-max)) (string= (buffer-substring-no-properties pos end) close-delim))) (goto-char xmltok-start)) ((progn (goto-char pos) (forward-line -1) (<= (point) xmltok-start)) (goto-char (+ xmltok-start (length open-delim))) (when (and (string= open-delim "