From d5a77bd1dc45e0638df3e4c763a168912c93b5b5 Mon Sep 17 00:00:00 2001 From: David Fussner Date: Wed, 13 Sep 2023 11:59:54 +0100 Subject: [PATCH] Fix behavior of xref commands in TeX buffers * lib-src/etags.c (longopts): Add new option --tex-alt-forms. (TeX_commands): Improve parsing of commands in TeX buffers. (TEX_defenv): Expand list of commands to tag by default in TeX buffers. (TeX_help): * doc/emacs/maintaining.texi (Tag Syntax): Document new tagged commands and new user option. (Identifier Search): Add note about auto-mode-alist and xref-find-references. * lisp/textmodes/tex-mode.el (tex-common-initialization): Set up xref modifications for in-tree TeX modes. (tex-thingatpt-modes-list): New var. (tex-thingatpt-is-texsymbol): New defcustom. (tex-set-thingatpt-symbol): New command to apply value of previous buffer-locally. (tex--symbol-or-texsymbol): New helper function for previous. (tex--thing-at-point): New function to return texsymbol 'thing-at-point'. (tex-thingatpt--beginning-of-texsymbol) (tex-thingatpt--end-of-texsymbol): New functions to define texsymbol "thing" for 'thing-at-point'. (tex-thingatpt-syntax-table, tex-escape-char): New vars to do the same. (tex-thingatpt-include-escape): New defcustom to refine behavior of previous. (tex--include-escape-p): New function to do the same. (tex-thingatpt-syntax-table): New function to access and modify the syntax table of the same name. --- doc/emacs/maintaining.texi | 33 +++++- lib-src/etags.c | 122 ++++++++++++++++++--- lisp/textmodes/tex-mode.el | 216 +++++++++++++++++++++++++++++++++++++ 3 files changed, 357 insertions(+), 14 deletions(-) diff --git a/doc/emacs/maintaining.texi b/doc/emacs/maintaining.texi index a95335f3df2..44b8b304026 100644 --- a/doc/emacs/maintaining.texi +++ b/doc/emacs/maintaining.texi @@ -2457,6 +2457,13 @@ Identifier Search referenced. The XREF mode commands are available in this buffer, see @ref{Xref Commands}. +When invoked in a buffer whose major mode uses the @code{etags} +backend, @kbd{M-?} searches files and buffers whose major mode matches +that of the original buffer. It guesses that mode from file +extensions, so if @kbd{M-?} seems to be skipping relevant buffers or +files, try customizing the variable @code{auto-mode-alist} to include +the missing extensions (@pxref{Choosing Modes}). + @vindex xref-auto-jump-to-first-xref If the value of the variable @code{xref-auto-jump-to-first-xref} is @code{t}, @code{xref-find-references} automatically jumps to the first @@ -2672,8 +2679,23 @@ Tag Syntax @code{\section}, @code{\subsection}, @code{\subsubsection}, @code{\eqno}, @code{\label}, @code{\ref}, @code{\cite}, @code{\bibitem}, @code{\part}, @code{\appendix}, @code{\entry}, -@code{\index}, @code{\def}, @code{\newcommand}, @code{\renewcommand}, -@code{\newenvironment} and @code{\renewenvironment} are tags. +@code{\index}, @code{\def}, @code{\edef}, @code{\gdef}, @code{\xdef}, +@code{\newcommand}, @code{\renewcommand}, @code{\newenvironment}, +@code{\renewenvironment}, @code{\DeclareRobustCommand}, +@code{\newrobustcmd}, @code{\renewrobustcmd}, @code{\providecommand}, +@code{\providerobustcmd}, @code{\NewDocumentCommand}, +@code{\RenewDocumentCommand}, @code{\ProvideDocumentCommand}, +@code{\DeclareDocumentCommand}, @code{\NewExpandableDocumentCommand}, +@code{\RenewExpandableDocumentCommand}, +@code{\ProvideExpandableDocumentCommand}, +@code{\DeclareExpandableDocumentCommand}, +@code{\NewDocumentEnvironment}, @code{\RenewDocumentEnvironment}, +@code{\ProvideDocumentEnvironment}, +@code{\DeclareDocumentEnvironment}, @code{\csdef}, @code{\csedef}, +@code{\csgdef}, @code{\csxdef}, @code{\csletcs}, @code{\cslet}, +@code{\letcs}, and @code{\let} are tags. So too are the arguments of +any starred variants of these commands, when such variants currently +exist. Other commands can make tags as well, if you specify them in the environment variable @env{TEXTAGS} before invoking @command{etags}. The @@ -2689,6 +2711,13 @@ Tag Syntax specifies (using Bourne shell syntax) that the commands @samp{\mycommand} and @samp{\myothercommand} also define tags. +The @samp{--tex-alt-forms} option causes each tag to have two names, +one with and one without the @TeX{} escape character, usually +@samp{\}. This may be helpful when mixing traditional @TeX{} or +@LaTeX{} constructs (@samp{\def}) with newer constructs from the +@samp{etoolbox} package (@samp{\csdef}). Use of this option will +double the size of any @TeX{}-related sections in your tags file. + @item In Lisp code, any function defined with @code{defun}, any variable defined with @code{defvar} or @code{defconst}, and in general the diff --git a/lib-src/etags.c b/lib-src/etags.c index 147ecbd7c1b..3a6682fe451 100644 --- a/lib-src/etags.c +++ b/lib-src/etags.c @@ -475,6 +475,7 @@ #define xrnew(op, n, m) ((op) = xnrealloc (op, n, (m) * sizeof *(op))) static bool ignoreindent; /* -I: ignore indentation in C */ static int packages_only; /* --packages-only: in Ada, only tag packages*/ static int class_qualify; /* -Q: produce class-qualified tags in C++/Java */ +static int tex_alt_forms; /* --tex-alt-forms: tag names w/ and w/o escape */ static int debug; /* --debug */ /* STDIN is defined in LynxOS system headers */ @@ -509,6 +510,7 @@ #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */ { "no-regex", no_argument, NULL, 'R' }, { "ignore-case-regex", required_argument, NULL, 'c' }, { "parse-stdin", required_argument, NULL, STDIN }, + { "tex-alt-forms", no_argument, &tex_alt_forms, 1 }, { "version", no_argument, NULL, 'V' }, #if CTAGS /* Ctags options */ @@ -792,12 +794,28 @@ #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */ "In LaTeX text, the argument of any of the commands '\\chapter',\n\ '\\section', '\\subsection', '\\subsubsection', '\\eqno', '\\label',\n\ '\\ref', '\\cite', '\\bibitem', '\\part', '\\appendix', '\\entry',\n\ -'\\index', '\\def', '\\newcommand', '\\renewcommand',\n\ -'\\newenvironment' or '\\renewenvironment' is a tag.\n\ +'\\index', '\\def', '\\edef', '\\gdef', '\\xdef', '\\newcommand',\n\ +'\\renewcommand', '\\newenvironment', '\\renewenvironment',\n\ +'\\DeclareRobustCommand, '\\newrobustcmd', '\\renewrobustcmd',\n\ +'\\providecommand', '\\providerobustcmd', '\\NewDocumentCommand',\n\ +'\\RenewDocumentCommand', '\\ProvideDocumentCommand',\n\ +'\\DeclareDocumentCommand', '\\NewExpandableDocumentCommand',\n\ +'\\RenewExpandableDocumentCommand', '\\ProvideExpandableDocumentCommand',\n\ +'\\DeclareExpandableDocumentCommand', '\\NewDocumentEnvironment',\n\ +'\\RenewDocumentEnvironment', '\\ProvideDocumentEnvironment',\n\ +'\\DeclareDocumentEnvironment', '\\csdef', '\\csedef', '\\csgdef',\n\ +'\\csxdef', '\\csletcs', '\\cslet', '\\letcs', or '\\let' is a tag.\n\ +So is the argument of any of the starred variants of these commands,\n\ +when a starred variant currently exists.\n\ \n\ Other commands can be specified by setting the environment variable\n\ 'TEXTAGS' to a colon-separated list like, for example,\n\ - TEXTAGS=\"mycommand:myothercommand\"."; + TEXTAGS=\"mycommand:myothercommand\".\n\ +\n\ +The '--tex-alt-forms' option causes each tag to have two names, one\n\ +with and one without the TeX escape char, usually '\\'. This may be\n\ +helpful when mixing traditional TeX or LaTeX constructs ('\\def')\n\ +with newer constructs from the 'etoolbox' package ('\\csdef')."; static const char *Texinfo_suffixes [] = @@ -5735,12 +5753,27 @@ Scheme_functions (FILE *inf) static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */ -/* Default set of control sequences to put into TEX_toktab. - The value of environment var TEXTAGS is prepended to this. */ +/* Default set of control sequences to put into TEX_toktab. The value of + environment var TEXTAGS is prepended to this. (2023) Add variants of + '\def', some additional LaTeX (and former xparse) commands, and common + variants from the 'etoolbox' package. Also, add starred variants of the + commands if they exist. */ static const char *TEX_defenv = "\ -:chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\ -:part:appendix:entry:index:def\ -:newcommand:renewcommand:newenvironment:renewenvironment"; +:chapter*:section*:subsection*:subsubsection*:part*:label:ref\ +:chapter:section:subsection:subsubsection:eqno:cite:bibitem:part\ +:appendix:entry:index:def:edef:gdef:xdef:newcommand*:newcommand\ +:renewcommand*:renewcommand:newenvironment*:newenvironment\ +:renewenvironment*:renewenvironment:DeclareRobustCommand*\ +:DeclareRobustCommand:renewrobustcmd*:renewrobustcmd\ +:newrobustcmd*:newrobustcmd:providecommand*:providecommand\ +:providerobustcmd*:providerobustcmd:NewDocumentCommand\ +:RenewDocumentCommand:ProvideDocumentCommand\ +:DeclareDocumentCommand:NewExpandableDocumentCommand\ +:RenewExpandableDocumentCommand:ProvideExpandableDocumentCommand\ +:DeclareExpandableDocumentCommand:NewDocumentEnvironment\ +:RenewDocumentEnvironment:ProvideDocumentEnvironment\ +:DeclareDocumentEnvironment:csdef:csedef:csgdef:csxdef:csletcs\ +:cslet:letcs:let"; static void TEX_decode_env (const char *, const char *); @@ -5752,6 +5785,7 @@ TeX_commands (FILE *inf) { char *cp; linebuffer *key; + char newname[UCHAR_MAX]; char TEX_esc = '\0'; char TEX_opgrp UNINIT, TEX_clgrp UNINIT; @@ -5799,19 +5833,73 @@ TeX_commands (FILE *inf) { char *p; ptrdiff_t namelen, linelen; - bool opgrp = false; + bool opgrp = false, one_esc = false; cp = skip_spaces (cp + key->len); + /* Skip the optional arguments to commands in the tags list so + that these arguments don't end up as the name of the tag. + The name will instead come from the argument in curly braces + that follows the optional ones. */ + if (*cp == '[' || *cp == '(') + { + while (*cp != TEX_opgrp && *cp != '\0') + cp++; + } if (*cp == TEX_opgrp) { opgrp = true; cp++; } + /* Jumping to a TeX command definition doesn't work in at least + some of the editors that use ctags. Using the + '--tex-alt-forms' option to strip TEX_esc should provide + minor improvements, though overall the behavior is still + suboptimal. (With --tex-alt-forms we print each tag twice, + once with and once without TEX_esc in the tag name. See + below.) The undocumented ctags option '--no-duplicates' may + also help. Changes in tex-mode.el in GNU Emacs address the + majority of these issues for etags, though the + '--tex-alt-forms' option can also be useful there. */ + + if (tex_alt_forms && *cp == TEX_esc) + { + cp++; + one_esc = true; + } + + /* Add optional argument brackets '(' and '[' to the loop test + so that these arguments don't appear in tag names. Also add + '=' as it's relational in the vast majority of cases. */ for (p = cp; - (!c_isspace (*p) && *p != '#' && - *p != TEX_opgrp && *p != TEX_clgrp); + (!c_isspace (*p) && *p != '#' && *p != '=' && + *p != '[' && *p != '(' && *p != TEX_opgrp && + *p != TEX_clgrp); p++) - continue; + /* Allow only one escape char in a tag name, which + (primarily) enables tagging a TeX command's different, + possibly temporary, '\let' bindings. */ + if (*p == TEX_esc) + { + if (!one_esc) + { + one_esc = true; + continue; + } + else + break; + } + else + continue; + /* Re-run the scan to catch (highly unusual) cases where a + command name is of the form '\('. */ + if ((*p == '(' || *p == '[') && (p - cp) < 2) + { + for (p = cp; + (!c_isspace (*p) && *p != '#' && + *p != TEX_opgrp && *p != TEX_clgrp); + p++) + continue; + } namelen = p - cp; linelen = lb.len; if (!opgrp || *p == TEX_clgrp) @@ -5820,6 +5908,16 @@ TeX_commands (FILE *inf) p++; linelen = p - lb.buffer + 1; } + /* With --tex-alt-forms we strip any TEX_esc from the name (see + above), print the tag with TEX_esc prepended to the bare tag + name, then print the same tag again with the bare tag + name. */ + if (tex_alt_forms) + { + snprintf (newname, UCHAR_MAX, "%c%s", TEX_esc, cp); + make_tag (newname, namelen + 1, true, + lb.buffer, linelen, lineno, linecharno); + } make_tag (cp, namelen, true, lb.buffer, linelen, lineno, linecharno); goto tex_next_line; /* We only tag a line once */ diff --git a/lisp/textmodes/tex-mode.el b/lisp/textmodes/tex-mode.el index a26e7b9c83a..3de4a093e09 100644 --- a/lisp/textmodes/tex-mode.el +++ b/lisp/textmodes/tex-mode.el @@ -1277,6 +1277,8 @@ tex-common-initialization (syntax-propertize-rules latex-syntax-propertize-rules)) ;; TABs in verbatim environments don't do what you think. (setq-local indent-tabs-mode nil) + ;; Set up xref backend in TeX buffers. + (tex-set-thingatpt-symbol) ;; Other vars that should be buffer-local. (make-local-variable 'tex-command) (make-local-variable 'tex-start-of-header) @@ -3724,6 +3726,220 @@ tex-chktex (kill-buffer (process-buffer process))))))) (process-send-region tex-chktex--process (point-min) (point-max)) (process-send-eof tex-chktex--process)))) + +;;; Xref / Etags tweaks + +;; Rather than define a new xref backend for TeX, we tweak the default +;; etags backend so that the main xref user commands (including +;; `xref-find-definitions', `xref-find-apropos', and +;; `xref-find-references' [on M-., C-M-., and M-?, respectively]) work +;; in TeX buffers. This mostly involves defining a new THING for +;; `thing-at-point' (texsymbol), then substituting that THING for +;; `symbol' in TeX buffers, at least by (configurable) default. The +;; TeX escape character will by default appear in the resulting string +;; only when the xref command uses string search and not regexp +;; search, though this too is configurable. The new THING type also +;; improves the accuracy of other commands that use `thing-at-point' +;; in TeX buffers, like `isearch-forward-thing-at-point' (on M-s M-.) +;; and `project-find-regexp' (on C-x p g). Indeed, +;; `project-find-regexp' sometimes works better in TeX buffers than +;; `xref-find-references'. + +(defvar tex-thingatpt-modes-list + '(tex-mode doctex-mode latex-mode plain-tex-mode slitex-mode ams-tex-mode) + "Major modes where `thing-at-point' may use the `texsymbol' type. + +When a buffer's `major-mode' is in this list, and when +`tex-thingatpt-is-texsymbol' is t (the default), any command in +that buffer that calls `thing-at-point' with a `symbol' argument +actually uses the `texsymbol' argument, instead.") + +(defcustom tex-thingatpt-is-texsymbol t + "When non-nil replace `symbol' by `texsymbol' for `thing-at-point'. + +This applies only to TeX buffers. The `texsymbol' \"thing\" +modifies the standard `symbol' for use in such buffers. + +When nil, restore the default behavior of `thing-at-point' in TeX +buffers. + +Custom will automatically apply changes in all TeX buffers, but +if you set the variable outside of Custom it won't take effect +until you apply it with \\[tex-set-thingatpt-symbol]. Without a +prefix argument (\\[universal-argument]) this applies only to the +current buffer, but with one it applies to all TeX buffers in +`buffer-list'. (TeX buffers are those whose `major-mode' is a +member of `tex-thingatpt-modes-list'.)" + :type 'boolean + :group 'tex-file + :group 'TeX-misc + :initialize #'custom-initialize-default + :set (lambda (var val) + (set-default var val) + (tex-set-thingatpt-symbol t)) + :version "30.1") + +(defcustom tex-thingatpt-include-escape '(xref-find-definitions + xref-find-definitions-other-window + xref-find-definitions-other-frame) + "If non-nil, include `tex-escape-char' in `thing-at-point'. + +This variable only takes effect when `tex-thingatpt-is-texsymbol' +is t (the default), changing the argument passed to +`thing-at-point' from `symbol' to `texsymbol'. When that is the +case, the values of this variable act as follows: + +When t, `thing-at-point' will always include a +`tex-escape-char' (usually `\\'), should one be present, in the +string it returns in TeX buffers. + +When nil, `thing-at-point' will never include the +`tex-escape-char' in the string it returns in TeX buffers. + +Otherwise, it's a list of commands for which `thing-at-point' +will always include the `tex-escape-char' in the string it +returns. The three xref commands listed by default may cease to +function properly in TeX buffers if set to nil, but using the +`--tex-alt-forms' option when creating your tags table with +`etags' will rectify that." + :type '(choice (const :tag "Always include tex-escape-char" t) + (const :tag "Never include tex-escape-char" nil) + (set :tag "Include tex-escape-char for these commands" + (repeat :inline t (symbol :tag "command")))) + :group 'tex-file + :group 'TeX-misc + :version "30.1") + +(defvar tex-escape-char ?\\ + "The current, possibly buffer-local, TeX escape character. + +The `etags' program only recognizes `\\' (92) and `!' (33) as +escape characters in TeX documents, and if it detects the latter +it also uses `<>' as the TeX grouping construct rather than `{}'. +Setting this variable to anything other than `\\' or `!' is +possible but will not be useful without changes to `etags', at +least for commands that search tags tables, such as +`xref-find-definitions' (\\[xref-find-definitions]) and \ +`xref-find-apropos' (\\[xref-find-apropos]).") + +(defvar tex-thingatpt-syntax-table + (let* ((ost (if (boundp 'TeX-mode-syntax-table) + TeX-mode-syntax-table + tex-mode-syntax-table)) + (st (make-syntax-table ost))) + (modify-syntax-entry ?# "'" st) + (modify-syntax-entry ?= "'" st) + (modify-syntax-entry ?` "'" st) + (modify-syntax-entry ?\" "'" st) + (modify-syntax-entry ?' "'" st) + st) + "Syntax table for delimiting `thing-at-point' in TeX buffers. + +When `tex-thingatpt-is-texsymbol' is t, this syntax table helps +to define what a `texsymbol' is. To access it use the +`tex-thingatpt-syntax-table' function.") + +(defun tex-thingatpt-syntax-table () + "Return a syntax table for `thing-at-point' in TeX buffers. + +It modifies the pre-defined syntax table depending both on the +setting of the `tex-escape-char' variable, which may be buffer +local, and on whether we're using AUCTeX or the in-tree tex-mode." + (let ((nst (make-syntax-table tex-thingatpt-syntax-table)) + (escsy (if (boundp 'TeX-mode-syntax-table) + ?\\ + ?/))) + (cond ((char-equal tex-escape-char ?\\)) + ((char-equal tex-escape-char ?!) + (modify-syntax-entry ?\\ "_" nst) + (modify-syntax-entry tex-escape-char (char-to-string escsy) nst) + (modify-syntax-entry ?< "(>" nst) + (modify-syntax-entry ?> ")<" nst)) + (t + (modify-syntax-entry ?\\ "_" nst) + (modify-syntax-entry tex-escape-char (char-to-string escsy) nst))) + nst)) + +;; Setup AUCTeX modes. (Should this be in AUCTeX itself?) +(add-hook 'TeX-mode-hook #'tex-set-thingatpt-symbol) + +;; `xref-find-references' needs this when called from a latex-mode +;; buffer in order to search files or buffers with a .tex suffix +;; (including the buffer from which it has been called). We append it +;; to `auto-mode-alist' so as not to interfere with the usual +;; mode-setting apparatus. +(add-to-list 'auto-mode-alist '("\\.[tT]e[xX]\\'" . latex-mode) t) + +(dolist (texmode tex-thingatpt-modes-list) + (put texmode 'find-tag-default-function 'tex--thing-at-point)) + +(put 'texsymbol 'beginning-op 'tex-thingatpt--beginning-of-texsymbol) + +(put 'texsymbol 'end-op 'tex-thingatpt--end-of-texsymbol) + +(declare-function cl-substitute "cl-seq" (cl-new cl-old cl-seq &rest cl-keys)) + +(defun tex-set-thingatpt-symbol (&optional all) + "Set meaning of `thing-at-point' `symbol' in (ALL?) TeX buffers. + +When `tex-thingatpt-is-texsymbol' is t, set `thing-at-point' to +use the `texsymbol' \"thing\" instead of `symbol', otherwise +maintain or restore the default. Without an optional ALL make +changes only in current buffer, with ALL make changes in all TeX +buffers in `buffer-list'." + (interactive "P") + (require 'thingatpt) + (if all + (dolist (buf (buffer-list)) + (with-current-buffer buf + (tex--symbol-or-texsymbol))) + (tex--symbol-or-texsymbol))) + +(defun tex--symbol-or-texsymbol () + (when (memq major-mode tex-thingatpt-modes-list) + (if tex-thingatpt-is-texsymbol + (setq-local thing-at-point-provider-alist + (add-to-list 'thing-at-point-provider-alist + '(symbol . tex--thing-at-point)) + isearch-forward-thing-at-point + (cl-substitute 'texsymbol 'symbol + isearch-forward-thing-at-point)) + (setq-local thing-at-point-provider-alist + (delete '(symbol . tex--thing-at-point) + thing-at-point-provider-alist) + isearch-forward-thing-at-point + (cl-substitute 'symbol 'texsymbol + isearch-forward-thing-at-point))))) + +(defun tex--thing-at-point () + "Pass `thing' type `texsymbol' to `bounds-of-thing-at-point'. + +When `tex-thingatpt-is-texsymbol' is t, calls in TeX buffers to +`thing-at-point' with argument `symbol' will instead use the +argument `texsymbol'. Otherwise it will call `find-tag-default'." + (if tex-thingatpt-is-texsymbol + (let ((bounds (bounds-of-thing-at-point 'texsymbol))) + (when bounds + (buffer-substring-no-properties (car bounds) (cdr bounds)))) + (find-tag-default))) + +(defun tex--include-escape-p (command) + (or (eq tex-thingatpt-include-escape t) + (memq command tex-thingatpt-include-escape))) + +(defun tex-thingatpt--beginning-of-texsymbol () + "Move point to the beginning of the current TeX symbol." + (with-syntax-table (tex-thingatpt-syntax-table) + (and (re-search-backward "\\([][()]\\|\\(\\sw\\|\\s_\\|\\s.\\)+\\)") + (skip-syntax-backward "w_.") + (when (tex--include-escape-p this-command) + (skip-syntax-backward "\\/"))))) + +(defun tex-thingatpt--end-of-texsymbol () + "Move point to the end of the current TeX symbol." + (with-syntax-table (tex-thingatpt-syntax-table) + (and (re-search-forward "\\([][()]\\|\\(\\sw\\|\\s_\\|\\s.\\)+\\)") + (skip-syntax-forward "w_.")))) (make-obsolete-variable 'tex-mode-load-hook "use `with-eval-after-load' instead." "28.1") -- 2.35.8