;;; sgml-mode.el --- SGML- and HTML-editing modes -*- lexical-binding:t -*-
;; Copyright (C) 1992, 1995-1996, 1998, 2001-2019 Free Software
;; Foundation, Inc.
;; Author: James Clark
;; Maintainer: emacs-devel@gnu.org
;; Adapted-By: ESR, Daniel Pfeiffer ,
;; F.Potorti@cnuce.cnr.it
;; Keywords: wp, hypermedia, comm, languages
;; This file is part of GNU Emacs.
;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs. If not, see .
;;; Commentary:
;; Configurable major mode for editing document in the SGML standard general
;; markup language. As an example contains a mode for editing the derived
;; HTML hypertext markup language.
;;; Code:
(require 'dom)
(require 'seq)
(eval-when-compile (require 'subr-x))
(eval-when-compile
(require 'skeleton)
(require 'cl-lib))
(defgroup sgml nil
"SGML editing mode."
:link '(custom-group-link :tag "Font Lock Faces group" font-lock-faces)
:group 'languages)
(defcustom sgml-basic-offset 2
"Specifies the basic indentation level for `sgml-indent-line'."
:type 'integer)
(defcustom sgml-attribute-offset 0
"Specifies a delta for attribute indentation in `sgml-indent-line'.
When 0, attribute indentation looks like this:
When 2, attribute indentation looks like this:
"
:version "25.1"
:type 'integer
:safe 'integerp)
(defcustom sgml-xml-mode nil
"When non-nil, tag insertion functions will be XML-compliant.
It is set to be buffer-local when the file has
a DOCTYPE or an XML declaration."
:type 'boolean
:version "22.1")
(defvaralias 'sgml-transformation 'sgml-transformation-function)
(defcustom sgml-transformation-function 'identity
"Default value for `skeleton-transformation-function' in SGML mode."
:type 'function
:initialize 'custom-initialize-default
:set (lambda (sym val)
(set-default sym val)
(mapc (lambda (buff)
(with-current-buffer buff
(and (derived-mode-p 'sgml-mode)
(not sgml-xml-mode)
(setq skeleton-transformation-function val))))
(buffer-list))))
(put 'sgml-transformation-function 'variable-interactive
"aTransformation function: ")
(defcustom sgml-mode-hook nil
"Hook run by command `sgml-mode'.
`text-mode-hook' is run first."
:type 'hook)
;; The official handling of "--" is complicated in SGML, and
;; historically not well supported by browser HTML parsers.
;; Recommendations for writing HTML comments is to use
;; (where ... doesn't contain "--") to avoid the complications
;; altogether (XML goes even further by requiring this in the spec).
;; So there is probably no need to handle it "correctly".
(defvar sgml-specials '(?\" ?\')
"List of characters that have a special meaning for SGML mode.
This list is used when first loading the `sgml-mode' library.
The supported characters are ?\\\", ?\\=', and ?-.
Including ?- makes double dashes into comment delimiters, but
they are really only supposed to delimit comments within DTD
definitions. So we normally turn it off.")
(defvar sgml-quick-keys nil
"Use <, >, &, /, SPC and `sgml-specials' keys \"electrically\" when non-nil.
This takes effect when first loading the `sgml-mode' library.")
(defvar sgml-mode-map
(let ((map (make-keymap)) ;`sparse' doesn't allow binding to charsets.
(menu-map (make-sparse-keymap "SGML")))
(define-key map "\C-c\C-i" 'sgml-tags-invisible)
(define-key map "/" 'sgml-slash)
(define-key map "\C-c\C-n" 'sgml-name-char)
(define-key map "\C-c\C-t" 'sgml-tag)
(define-key map "\C-c\C-a" 'sgml-attributes)
(define-key map "\C-c\C-b" 'sgml-skip-tag-backward)
(define-key map [?\C-c left] 'sgml-skip-tag-backward)
(define-key map "\C-c\C-f" 'sgml-skip-tag-forward)
(define-key map [?\C-c right] 'sgml-skip-tag-forward)
(define-key map "\C-c\C-d" 'sgml-delete-tag)
(define-key map "\C-c\^?" 'sgml-delete-tag)
(define-key map "\C-c?" 'sgml-tag-help)
(define-key map "\C-c]" 'sgml-close-tag)
(define-key map "\C-c/" 'sgml-close-tag)
;; Redundant keybindings, for consistency with TeX mode.
(define-key map "\C-c\C-o" 'sgml-tag)
(define-key map "\C-c\C-e" 'sgml-close-tag)
(define-key map "\C-c8" 'sgml-name-8bit-mode)
(define-key map "\C-c\C-v" 'sgml-validate)
(when sgml-quick-keys
(define-key map "&" 'sgml-name-char)
(define-key map "<" 'sgml-tag)
(define-key map " " 'sgml-auto-attributes)
(define-key map ">" 'sgml-maybe-end-tag)
(when (memq ?\" sgml-specials)
(define-key map "\"" 'sgml-name-self))
(when (memq ?' sgml-specials)
(define-key map "'" 'sgml-name-self)))
(let ((c 127)
(map (nth 1 map)))
(while (< (setq c (1+ c)) 256)
(aset map c 'sgml-maybe-name-self)))
(define-key map [menu-bar sgml] (cons "SGML" menu-map))
(define-key menu-map [sgml-validate] '("Validate" . sgml-validate))
(define-key menu-map [sgml-name-8bit-mode]
'("Toggle 8 Bit Insertion" . sgml-name-8bit-mode))
(define-key menu-map [sgml-tags-invisible]
'("Toggle Tag Visibility" . sgml-tags-invisible))
(define-key menu-map [sgml-tag-help]
'("Describe Tag" . sgml-tag-help))
(define-key menu-map [sgml-delete-tag]
'("Delete Tag" . sgml-delete-tag))
(define-key menu-map [sgml-skip-tag-forward]
'("Forward Tag" . sgml-skip-tag-forward))
(define-key menu-map [sgml-skip-tag-backward]
'("Backward Tag" . sgml-skip-tag-backward))
(define-key menu-map [sgml-attributes]
'("Insert Attributes" . sgml-attributes))
(define-key menu-map [sgml-tag] '("Insert Tag" . sgml-tag))
map)
"Keymap for SGML mode. See also `sgml-specials'.")
(defun sgml-make-syntax-table (specials)
(let ((table (make-syntax-table text-mode-syntax-table)))
(modify-syntax-entry ?< "(>" table)
(modify-syntax-entry ?> ")<" table)
(modify-syntax-entry ?: "_" table)
(modify-syntax-entry ?_ "_" table)
(modify-syntax-entry ?. "_" table)
(if (memq ?- specials)
(modify-syntax-entry ?- "_ 1234" table))
(if (memq ?\" specials)
(modify-syntax-entry ?\" "\"\"" table))
(if (memq ?' specials)
(modify-syntax-entry ?\' "\"'" table))
table))
(defvar sgml-mode-syntax-table (sgml-make-syntax-table sgml-specials)
"Syntax table used in SGML mode. See also `sgml-specials'.")
(defconst sgml-tag-syntax-table
(let ((table (sgml-make-syntax-table sgml-specials)))
(dolist (char '(?\( ?\) ?\{ ?\} ?\[ ?\] ?$ ?% ?& ?* ?+ ?/))
(modify-syntax-entry char "." table))
(unless (memq ?' sgml-specials)
;; Avoid that skipping a tag backwards skips any "'" prefixing it.
(modify-syntax-entry ?' "w" table))
table)
"Syntax table used to parse SGML tags.")
(defcustom sgml-name-8bit-mode nil
"When non-nil, insert non-ASCII characters as named entities."
:type 'boolean)
(defvar sgml-char-names
[nil nil nil nil nil nil nil nil
nil nil nil nil nil nil nil nil
nil nil nil nil nil nil nil nil
nil nil nil nil nil nil nil nil
"nbsp" "excl" "quot" "num" "dollar" "percnt" "amp" "apos"
"lpar" "rpar" "ast" "plus" "comma" "hyphen" "period" "sol"
nil nil nil nil nil nil nil nil
nil nil "colon" "semi" "lt" "eq" "gt" "quest"
"commat" nil nil nil nil nil nil nil
nil nil nil nil nil nil nil nil
nil nil nil nil nil nil nil nil
nil nil nil "lsqb" nil "rsqb" "uarr" "lowbar"
"lsquo" nil nil nil nil nil nil nil
nil nil nil nil nil nil nil nil
nil nil nil nil nil nil nil nil
nil nil nil "lcub" "verbar" "rcub" "tilde" nil
nil nil nil nil nil nil nil nil
nil nil nil nil nil nil nil nil
nil nil nil nil nil nil nil nil
nil nil nil nil nil nil nil nil
"nbsp" "iexcl" "cent" "pound" "curren" "yen" "brvbar" "sect"
"uml" "copy" "ordf" "laquo" "not" "shy" "reg" "macr"
"ring" "plusmn" "sup2" "sup3" "acute" "micro" "para" "middot"
"cedil" "sup1" "ordm" "raquo" "frac14" "frac12" "frac34" "iquest"
"Agrave" "Aacute" "Acirc" "Atilde" "Auml" "Aring" "AElig" "Ccedil"
"Egrave" "Eacute" "Ecirc" "Euml" "Igrave" "Iacute" "Icirc" "Iuml"
"ETH" "Ntilde" "Ograve" "Oacute" "Ocirc" "Otilde" "Ouml" nil
"Oslash" "Ugrave" "Uacute" "Ucirc" "Uuml" "Yacute" "THORN" "szlig"
"agrave" "aacute" "acirc" "atilde" "auml" "aring" "aelig" "ccedil"
"egrave" "eacute" "ecirc" "euml" "igrave" "iacute" "icirc" "iuml"
"eth" "ntilde" "ograve" "oacute" "ocirc" "otilde" "ouml" "divide"
"oslash" "ugrave" "uacute" "ucirc" "uuml" "yacute" "thorn" "yuml"]
"Vector of symbolic character names without `&' and `;'.")
(put 'sgml-table 'char-table-extra-slots 0)
(defvar sgml-char-names-table
(let ((table (make-char-table 'sgml-table))
(i 32)
elt)
(while (< i 128)
(setq elt (aref sgml-char-names i))
(if elt (aset table (make-char 'latin-iso8859-1 i) elt))
(setq i (1+ i)))
table)
"A table for mapping non-ASCII characters into SGML entity names.
Currently, only Latin-1 characters are supported.")
(defcustom sgml-validate-command
;; prefer tidy because (o)nsgmls is often built without --enable-http
;; which makes it next to useless
(cond ((executable-find "tidy")
;; tidy is available from http://tidy.sourceforge.net/
"tidy --gnu-emacs yes -utf8 -e -q")
((executable-find "nsgmls")
;; nsgmls is a free SGML parser in the SP suite available from
;; ftp.jclark.com, replaced old `sgmls'.
"nsgmls -s")
((executable-find "onsgmls")
;; onsgmls is the community version of `nsgmls'
;; hosted on http://openjade.sourceforge.net/
"onsgmls -s")
(t "Install (o)nsgmls, tidy, or some other SGML validator, and set `sgml-validate-command'"))
"The command to validate an SGML document.
The file name of current buffer file name will be appended to this,
separated by a space."
:type 'string
:version "21.1")
(defvar sgml-saved-validate-command nil
"The command last used to validate in this buffer.")
;; I doubt that null end tags are used much for large elements,
;; so use a small distance here.
(defcustom sgml-slash-distance 1000
"If non-nil, is the maximum distance to search for matching `/'."
:type '(choice (const nil) integer))
(defconst sgml-namespace-re "[_[:alpha:]][-_.[:alnum:]]*")
(defconst sgml-name-re "[_:[:alpha:]][-_.:[:alnum:]]*")
(defconst sgml-tag-name-re (concat "<\\([!/?]?" sgml-name-re "\\)"))
(defconst sgml-attrs-re "\\(?:[^\"'/><]\\|\"[^\"]*\"\\|'[^']*'\\)*")
(defconst sgml-start-tag-regex (concat "<" sgml-name-re sgml-attrs-re)
"Regular expression that matches a non-empty start tag.
Any terminating `>' or `/' is not matched.")
(defface sgml-namespace
'((t (:inherit font-lock-builtin-face)))
"`sgml-mode' face used to highlight the namespace part of identifiers.")
(defvar sgml-namespace-face 'sgml-namespace)
;; internal
(defconst sgml-font-lock-keywords-1
`((,(concat "<\\([!?]" sgml-name-re "\\)") 1 font-lock-keyword-face)
;; We could use the simpler "\\(" sgml-namespace-re ":\\)?" instead,
;; but it would cause a bit more backtracking in the re-matcher.
(,(concat "?\\(" sgml-namespace-re "\\)\\(?::\\(" sgml-name-re "\\)\\)?")
(1 (if (match-end 2) sgml-namespace-face font-lock-function-name-face))
(2 font-lock-function-name-face nil t))
;; FIXME: this doesn't cover the variables using a default value.
;; The first shy-group is an important anchor: it prevents an O(n^2)
;; pathological case where we otherwise keep retrying a failing match
;; against a very long word at every possible position within the word.
(,(concat "\\(?:^\\|[ \t]\\)\\(" sgml-namespace-re "\\)\\(?::\\("
sgml-name-re "\\)\\)?=[\"']")
(1 (if (match-end 2) sgml-namespace-face font-lock-variable-name-face))
(2 font-lock-variable-name-face nil t))
(,(concat "[&%]" sgml-name-re ";?") . font-lock-variable-name-face)))
(defconst sgml-font-lock-keywords-2
(append
sgml-font-lock-keywords-1
'((eval
. (cons (concat "<"
(regexp-opt (mapcar 'car sgml-tag-face-alist) t)
"\\([ \t][^>]*\\)?>\\([^<]+\\)\\1>")
'(3 (cdr (assoc-string (match-string 1) sgml-tag-face-alist t))
prepend))))))
;; for font-lock, but must be defvar'ed after
;; sgml-font-lock-keywords-1 and sgml-font-lock-keywords-2 above
(defvar sgml-font-lock-keywords sgml-font-lock-keywords-1
"Rules for highlighting SGML code. See also `sgml-tag-face-alist'.")
(defun sgml-font-lock-syntactic-face (state)
"`font-lock-syntactic-face-function' for `sgml-mode'."
;; Don't use string face outside of tags.
(cond ((and (nth 9 state) (nth 3 state)) font-lock-string-face)
((nth 4 state) font-lock-comment-face)))
(defvar-local sgml--syntax-propertize-ppss nil)
(defun sgml--syntax-propertize-ppss (pos)
"Return PPSS at POS, fixing the syntax of any lone `>' along the way."
(cl-assert (>= pos (car sgml--syntax-propertize-ppss)))
(let ((ppss (parse-partial-sexp (car sgml--syntax-propertize-ppss) pos -1
nil (cdr sgml--syntax-propertize-ppss))))
(while (eq -1 (car ppss))
(put-text-property (1- (point)) (point)
'syntax-table (string-to-syntax "."))
;; Hack attack: rather than recompute the ppss from
;; (car sgml--syntax-propertize-ppss), we manually "fix it".
(setcar ppss 0)
(setq ppss (parse-partial-sexp (point) pos -1 nil ppss)))
(setcdr sgml--syntax-propertize-ppss ppss)
(setcar sgml--syntax-propertize-ppss pos)
ppss))
(eval-and-compile
(defconst sgml-syntax-propertize-rules
(syntax-propertize-precompile-rules
;; Use the `b' style of comments to avoid interference with the -- ... --
;; comments recognized when `sgml-specials' includes ?-.
;; FIXME: beware of blabla !!
("\\(<\\)!--" (1 "< b"))
("--[ \t\n]*\\(>\\)" (1 "> b"))
("\\(<\\)[?!]" (1 (prog1 "|>"
(sgml-syntax-propertize-inside end))))
;; Quotes outside of tags should not introduce strings which end up
;; hiding tags. We used to test every quote and mark it as "."
;; if it's outside of tags, but there are too many quotes and
;; the resulting number of calls to syntax-ppss made it too slow
;; (bug#33887), so we're now careful to leave alone any pair
;; of quotes that doesn't hold a < or > char, which is the vast majority:
;; either they're both within a tag (or a comment), in which case it's
;; indeed correct to leave them as is, or they're both outside of tags, in
;; which case they arguably should have punctuation syntax, but it is
;; harmless to let them have string syntax because they won't "hide" any
;; tag or comment from us (and we use the
;; font-lock-syntactic-face-function to make sure those spurious "strings
;; within text" aren't highlighted as strings).
("\\([\"']\\)[^\"'<>]*"
(1 (if (eq (char-after) (char-after (match-beginning 0)))
;; Fast-track case.
(forward-char 1)
;; Point has moved to the end of the text we matched after the
;; quote, but this risks overlooking a match to one of the other
;; regexp in the rules. We could just (goto-char (match-end 1))
;; to solve this, but that would be too easy, so instead we
;; only move back enough to avoid skipping comment ender, which
;; happens to be the only one that we could have overlooked.
(when (eq (char-after) ?>)
(skip-chars-backward "-"))
;; Be careful to call `syntax-ppss' on a position before the one
;; we're going to change, so as not to need to flush the data we
;; just computed.
(if (zerop (save-excursion
(car (sgml--syntax-propertize-ppss
(match-beginning 0)))))
(string-to-syntax ".")))))
)))
(defun sgml-syntax-propertize (start end)
"Syntactic keywords for `sgml-mode'."
(setq sgml--syntax-propertize-ppss (cons start (syntax-ppss start)))
(cl-assert (>= (cadr sgml--syntax-propertize-ppss) 0))
(sgml-syntax-propertize-inside end)
(funcall
(syntax-propertize-rules sgml-syntax-propertize-rules)
start end)
;; Catch any '>' after the last quote.
(sgml--syntax-propertize-ppss end))
(defun sgml-syntax-propertize-inside (end)
(let ((ppss (syntax-ppss)))
(cond
((eq (nth 3 ppss) t)
(let ((endre (save-excursion
(goto-char (nth 8 ppss))
(cond
((looking-at-p "")
((looking-at-p "<\\?") (if sgml-xml-mode "\\?>" ">"))
(t ">")))))
(when (re-search-forward endre end 'move)
(put-text-property (1- (point)) (point)
'syntax-table (string-to-syntax "|<"))))))))
;; internal
(defvar sgml-face-tag-alist ()
"Alist of face and tag name for facemenu.")
(defvar sgml-tag-face-alist ()
"Tag names and face or list of faces to fontify with when invisible.
When `font-lock-maximum-decoration' is 1 this is always used for fontifying.
When more these are fontified together with `sgml-font-lock-keywords'.")
(defvar sgml-display-text ()
"Tag names as lowercase symbols, and display string when invisible.")
;; internal
(defvar sgml-tags-invisible nil)
(defcustom sgml-tag-alist
'(("![" ("ignore" t) ("include" t))
("!attlist")
("!doctype")
("!element")
("!entity"))
"Alist of tag names for completing read and insertion rules.
This alist is made up as
((\"tag\" . TAGRULE)
...)
TAGRULE is a list of optionally t (no endtag) or `\\n' (separate endtag by
newlines) or a skeleton with nil, t or `\\n' in place of the interactor
followed by an ATTRIBUTERULE (for an always present attribute) or an
attribute alist.
The attribute alist is made up as
((\"attribute\" . ATTRIBUTERULE)
...)
ATTRIBUTERULE is a list of optionally t (no value when no input) followed by
an optional alist of possible values."
:type '(repeat (cons (string :tag "Tag Name")
(repeat :tag "Tag Rule" sexp))))
(put 'sgml-tag-alist 'risky-local-variable t)
(defcustom sgml-tag-help
'(("!" . "Empty declaration for comment")
("![" . "Embed declarations with parser directive")
("!attlist" . "Tag attributes declaration")
("!doctype" . "Document type (DTD) declaration")
("!element" . "Tag declaration")
("!entity" . "Entity (macro) declaration"))
"Alist of tag name and short description."
:type '(repeat (cons (string :tag "Tag Name")
(string :tag "Description"))))
(defvar sgml-empty-tags nil
"List of tags whose !ELEMENT definition says EMPTY.")
(defvar sgml-unclosed-tags nil
"List of tags whose !ELEMENT definition says the end-tag is optional.")
(defun sgml-xml-guess ()
"Guess whether the current buffer is XML. Return non-nil if so."
(save-excursion
(goto-char (point-min))
(or (string= "xml" (file-name-extension (or buffer-file-name "")))
;; Maybe the buffer-size check isn't needed, I don't know.
(and (zerop (buffer-size))
(string= "xhtml" (file-name-extension (or buffer-file-name ""))))
(looking-at "\\s-*<\\?xml")
(when (re-search-forward
(eval-when-compile
(mapconcat 'identity
'(""))
(concat "<" tag-face ">"))
((and (consp face)
(consp (car face))
(null (cdr face))
(memq (caar face) '(:foreground :background)))
(setq facemenu-end-add-face "")
(format ""
(if (eq (caar face) :foreground)
"color"
"background-color")
(cadr (car face))))
(t
(error "Face not configured for %s mode"
(format-mode-line mode-name))))))
(defun sgml-fill-nobreak ()
"Don't break between a tag name and its first argument.
This function is designed for use in `fill-nobreak-predicate'.
^ ^
| no break here | but still allowed here"
(save-excursion
(skip-chars-backward " \t")
(and (not (zerop (skip-syntax-backward "w_")))
(skip-chars-backward "/?!")
(eq (char-before) ?<))))
(defvar tildify-space-string)
(defvar tildify-foreach-region-function)
;;;###autoload
(define-derived-mode sgml-mode text-mode '(sgml-xml-mode "XML" "SGML")
"Major mode for editing SGML documents.
Makes > match <.
Keys <, &, SPC within <>, \", / and \\=' can be electric depending on
`sgml-quick-keys'.
An argument of N to a tag-inserting command means to wrap it around
the next N words. In Transient Mark mode, when the mark is active,
N defaults to -1, which means to wrap it around the current region.
If you like upcased tags, put (setq sgml-transformation-function \\='upcase)
in your init file.
Use \\[sgml-validate] to validate your document with an SGML parser.
Do \\[describe-variable] sgml- SPC to see available variables.
Do \\[describe-key] on the following bindings to discover what they do.
\\{sgml-mode-map}"
(make-local-variable 'sgml-saved-validate-command)
(make-local-variable 'facemenu-end-add-face)
;; If encoding does not allow non-break space character, use reference.
;; FIXME: Perhaps use if possible (e.g. when we know its HTML)?
(setq-local tildify-space-string
(if (equal (decode-coding-string
(encode-coding-string " " buffer-file-coding-system)
buffer-file-coding-system) " ")
" " " "))
;; FIXME: Use the fact that we're parsing the document already
;; rather than using regex-based filtering.
(setq-local tildify-foreach-region-function
(apply-partially
'tildify-foreach-ignore-environments
`((,(eval-when-compile
(concat
"<\\("
(regexp-opt '("pre" "dfn" "code" "samp" "kbd" "var"
"PRE" "DFN" "CODE" "SAMP" "KBD" "VAR"))
"\\)\\>[^>]*>"))
. ("" 1 ">"))
("")
("<" . ">"))))
;;(make-local-variable 'facemenu-remove-face-function)
;; A start or end tag by itself on a line separates a paragraph.
;; This is desirable because SGML discards a newline that appears
;; immediately after a start tag or immediately before an end tag.
(setq-local paragraph-start (concat "[ \t]*$\\|\
\[ \t]*?\\(" sgml-name-re sgml-attrs-re "\\)?>"))
(setq-local paragraph-separate (concat paragraph-start "$"))
(setq-local adaptive-fill-regexp "[ \t]*")
(add-hook 'fill-nobreak-predicate 'sgml-fill-nobreak nil t)
(setq-local indent-line-function 'sgml-indent-line)
(setq-local comment-start "")
(setq-local comment-indent-function 'sgml-comment-indent)
(setq-local comment-line-break-function 'sgml-comment-indent-new-line)
(setq-local skeleton-further-elements '((completion-ignore-case t)))
(setq-local skeleton-end-hook
(lambda ()
(or (eolp)
(not (or (eq v2 '\n) (eq (car-safe v2) '\n)))
(newline-and-indent))))
(setq font-lock-defaults '((sgml-font-lock-keywords
sgml-font-lock-keywords-1
sgml-font-lock-keywords-2)
nil t nil
(font-lock-syntactic-face-function
. sgml-font-lock-syntactic-face)))
(setq-local syntax-propertize-function #'sgml-syntax-propertize)
(setq-local facemenu-add-face-function 'sgml-mode-facemenu-add-face-function)
(setq-local sgml-xml-mode (sgml-xml-guess))
(unless sgml-xml-mode
(setq-local skeleton-transformation-function sgml-transformation-function))
;; This will allow existing comments within declarations to be
;; recognized.
;; I can't find a clear description of SGML/XML comments, but it seems that
;; the only reliable ones are although it's not clear what
;; "..." can contain. It used to accept -- ... -- as well, but that was
;; apparently a mistake.
(setq-local comment-start-skip ",
;; or only if ?- is in sgml-specials, so match explicitly
(let ((start (point)))
(unless (re-search-forward comment-end-skip pos 'move)
(list 0 nil nil nil t nil nil nil start))))
((and sgml-xml-mode (looking-at "<\\?"))
;; Processing Instructions.
;; In SGML, it's basically a normal tag of the form
;; but in XML, it takes the form ... ?>.
(let ((pi-start (point)))
(unless (search-forward "?>" pos 'move)
(list 0 nil nil 'pi nil nil nil nil pi-start))))
(t
;; We've reached a tag. Parse it.
;; FIXME: Handle net-enabling start-tags
(parse-partial-sexp (point) pos 0))))))
(cond
((memq (nth 3 state) '(cdata pi)) (cons (nth 3 state) (nth 8 state)))
((nth 3 state) (cons 'string (nth 8 state)))
((nth 4 state) (cons 'comment (nth 8 state)))
((and state (> (nth 0 state) 0)) (cons 'tag (nth 1 state)))
(t (cons 'text text-start))))))
(defun sgml-beginning-of-tag (&optional only-immediate)
"Skip to beginning of tag and return its name.
If this can't be done, return nil."
(let ((context (sgml-lexical-context)))
(if (eq (car context) 'tag)
(progn
(goto-char (cdr context))
(when (looking-at sgml-tag-name-re)
(match-string-no-properties 1)))
(if only-immediate nil
(when (not (eq (car context) 'text))
(goto-char (cdr context))
(sgml-beginning-of-tag t))))))
(defun sgml-value (alist)
"Interactively insert value taken from attribute-rule ALIST.
See `sgml-tag-alist' for info about attribute rules."
(setq alist (cdr alist))
(if (stringp (car alist))
(insert "=\"" (car alist) ?\")
(if (and (eq (car alist) t) (not sgml-xml-mode))
(when (cdr alist)
(insert "=\"")
(setq alist (skeleton-read (lambda ()
(completing-read
"Value: " (cdr alist)))))
(if (string< "" alist)
(insert alist ?\")
(delete-char -2)))
(insert "=\"")
(if (cdr alist)
(insert (skeleton-read (lambda ()
(completing-read "Value: " alist))))
(when (null alist)
(insert (skeleton-read '(read-string "Value: ")))))
(insert ?\"))))
(defun sgml-quote (start end &optional unquotep)
"Quote SGML text in region START ... END.
Only &, <, >, ' and \" characters are quoted, the rest is left
untouched. This is sufficient to use quoted text as SGML argument.
With prefix argument UNQUOTEP, unquote the region. All numeric entities,
\"amp\", \"lt\", \"gt\" and \"quot\" named entities are unquoted."
(interactive "r\nP")
(save-restriction
(narrow-to-region start end)
(goto-char (point-min))
(if unquotep
;; FIXME: We should unquote other named character references as well.
(while (re-search-forward
"\\(&\\(amp\\|quot\\|lt\\|gt\\|#\\([0-9]+\\|[xX][0-9a-fA-F]+\\)\\)\\)\\([][<>&;\n\t \"%!'(),/=?]\\|$\\)"
nil t)
(replace-match
(string
(or (cdr (assq (char-after (match-beginning 2))
'((?a . ?&) (?q . ?\") (?l . ?<) (?g . ?>))))
(let ((num (match-string 3)))
(if (or (eq ?x (aref num 0)) (eq ?X (aref num 0)))
(string-to-number (substring num 1) 16)
(string-to-number num 10)))))
t t nil (if (eq (char-before (match-end 0)) ?\;) 0 1)))
(while (re-search-forward "[&<>\"']" nil t)
(replace-match (cdr (assq (char-before) '((?& . "&")
(?< . "<")
(?> . ">")
(?\" . """)
(?' . "'"))))
t t)))))
(defun sgml-pretty-print (beg end)
"Simple-minded pretty printer for SGML.
Re-indents the code and inserts newlines between BEG and END.
You might want to turn on `auto-fill-mode' to get better results."
;; TODO:
;; - insert newline between some start-tag and text.
;; - don't insert newline in front of some end-tags.
(interactive "r")
(save-excursion
(if (< beg end)
(goto-char beg)
(goto-char end)
(setq end beg)
(setq beg (point)))
;; Don't use narrowing because it screws up auto-indent.
(setq end (copy-marker end t))
(with-syntax-table sgml-tag-syntax-table
(while (re-search-forward "<" end t)
(goto-char (match-beginning 0))
(unless (or ;;(looking-at "")
(progn (skip-chars-backward " \t") (bolp)))
(reindent-then-newline-and-indent))
(sgml-forward-sexp 1)))
;; (indent-region beg end)
))
;; Parsing
(cl-defstruct (sgml-tag
(:constructor sgml-make-tag (type start end name)))
type start end name)
(defsubst sgml-parse-tag-name ()
"Skip past a tag-name, and return the name."
(buffer-substring-no-properties
(point) (progn (skip-syntax-forward "w_") (point))))
(defun sgml-tag-text-p (start end)
"Return non-nil if text between START and END is a tag.
Checks among other things that the tag does not contain spurious
unquoted < or > chars inside, which would indicate that it
really isn't a tag after all."
(save-excursion
(with-syntax-table sgml-tag-syntax-table
(let ((pps (parse-partial-sexp start end 2)))
(and (= (nth 0 pps) 0))))))
(defun sgml--find-<>-backward (limit)
"Search backward for a '<' or '>' character.
The character must have open or close syntax.
Returns t if found, nil otherwise."
(catch 'found
(while (re-search-backward "[<>]" limit 'move)
;; If this character has "open" or "close" syntax, then we've
;; found the one we want.
(when (memq (syntax-class (syntax-after (point))) '(4 5))
(throw 'found t)))))
(defun sgml-parse-tag-backward (&optional limit)
"Parse an SGML tag backward, and return information about the tag.
Assume that parsing starts from within a textual context.
Leave point at the beginning of the tag."
(catch 'found
(let (tag-type tag-start tag-end name)
(or (sgml--find-<>-backward limit)
(error "No tag found"))
(when (eq (char-after) ?<)
;; Oops!! Looks like we were not in a textual context after all!.
;; Let's try to recover.
;; Remember the tag-start so we don't need to look for it later.
;; This is not just an optimization but also makes sure we don't get
;; stuck in infloops in cases where "looking back for <" would not go
;; back far enough.
(setq tag-start (point))
(with-syntax-table sgml-tag-syntax-table
(let ((pos (point)))
(condition-case nil
;; FIXME: This does not correctly skip over PI an CDATA tags.
(sgml-forward-sexp 1)
(scan-error
;; This < seems to be just a spurious one, let's ignore it.
(goto-char pos)
(throw 'found (sgml-parse-tag-backward limit))))
;; Check it is really a tag, without any extra < or > inside.
(unless (sgml-tag-text-p pos (point))
(goto-char pos)
(throw 'found (sgml-parse-tag-backward limit)))
(forward-char -1))))
(setq tag-end (1+ (point)))
(cond
((sgml-looking-back-at "--") ; comment
(setq tag-type 'comment
tag-start (or tag-start (search-backward ""))
('cdata (insert "]]>"))
('pi (insert " ?>"))
('jsp (insert " %>"))
('tag (insert " />"))
('text
(let ((context (save-excursion (sgml-get-context))))
(if context
(progn
(insert "" (sgml-tag-name (car (last context))) ">")
(indent-according-to-mode)))))
(_
(error "Nothing to close"))))
(defun sgml-empty-tag-p (tag-name)
"Return non-nil if TAG-NAME is an implicitly empty tag."
(and (not sgml-xml-mode)
(assoc-string tag-name sgml-empty-tags 'ignore-case)))
(defun sgml-unclosed-tag-p (tag-name)
"Return non-nil if TAG-NAME is a tag for which an end-tag is optional."
(and (not sgml-xml-mode)
(assoc-string tag-name sgml-unclosed-tags 'ignore-case)))
(defun sgml-calculate-indent (&optional lcon)
"Calculate the column to which this line should be indented.
LCON is the lexical context, if any."
(unless lcon (setq lcon (sgml-lexical-context)))
;; Indent comment-start markers inside