From 195f259ae8f8fc17bfee1c85ec783b31e5ae7128 Mon Sep 17 00:00:00 2001 From: Jim Porter Date: Mon, 21 Oct 2024 15:41:42 -0700 Subject: [PATCH] Improve correctness of Eshell globs when using escape characters This new implementation opts *in* to treating characters as glob characters, rather than opting out. This reduces the need to coordinate with other parts of Eshell and should be harder to break. * lisp/eshell/em-glob.el (eshell-parse-glob-chars): Return the propertized globbing character directly. (eshell--propertize-glob, eshell--glob-char-p) (eshell--contains-glob-char-p, eshell--all-glob-chars-p): New functions. (eshell-glob-p): Make obsolete. (eshell-glob-regexp, eshell-glob-convert-1, eshell-glob-convert): Check for 'eshell-glob-char' property. (eshell-extended-glob): Remove text properties when returning no match. (eshell--glob-anything): New constant. (eshell-glob-entries): Propertize "*" to treat it as a glob. * lisp/eshell/em-ls.el (eshell-ls--expand-wildcards): New function... (eshell-ls--insert-directory): ... use it. * test/lisp/eshell/em-glob-tests.el: Use 'eshell--propertize-glob' in tests. (em-glob-test/convert/literal-characters) (em-glob-test/convert/mixed-literal-characters): New tests. * lisp/eshell/em-glob.el (eshell-expand-glob): Rename from 'eshell-extended-glob'. Update callers. (eshell-extended-glob): New function to expand a GLOB that hasn't been propertized yet, for use outside of Eshell command forms. (eshell-parse-glob-chars): Return the propertized globbing character directly. (eshell-parse-glob-string, eshell--glob-char-p) (eshell--contains-glob-char-p, eshell--all-glob-chars-p): New functions. (eshell-glob-regexp, eshell-glob-convert-1, eshell-glob-convert): Check for 'eshell-glob-char' property. (eshell-glob-p): Make obsolete. (eshell--glob-anything): New constant... (eshell-glob-entries): ... use it. * lisp/eshell/em-ls.el (eshell-ls--expand-wildcards): New function... (eshell-ls--insert-directory): ... use it. * test/lisp/eshell/em-glob-tests.el: Use 'eshell-parse-glob-string in tests. (em-glob-test/convert/literal-characters) (em-glob-test/convert/mixed-literal-characters): New tests. --- lisp/eshell/em-glob.el | 141 ++++++++++++++++++++---------- lisp/eshell/em-ls.el | 19 ++-- test/lisp/eshell/em-glob-tests.el | 48 ++++++---- 3 files changed, 140 insertions(+), 68 deletions(-) diff --git a/lisp/eshell/em-glob.el b/lisp/eshell/em-glob.el index 57bb0c53b57..b94c4e3ed46 100644 --- a/lisp/eshell/em-glob.el +++ b/lisp/eshell/em-glob.el @@ -149,23 +149,48 @@ eshell-no-command-globbing "Don't glob the command argument. Reflect this by modifying TERMS." (ignore (pcase (car terms) - ((or `(eshell-extended-glob ,term) - `(eshell-splice-args (eshell-extended-glob ,term))) + ((or `(eshell-expand-glob ,term) + `(eshell-splice-args (eshell-expand-glob ,term))) (setcar terms term))))) (defun eshell-add-glob-modifier () - "Add `eshell-extended-glob' to the argument modifier list." + "Add `eshell-expand-glob' to the argument modifier list." (when eshell-glob-splice-results (add-hook 'eshell-current-modifiers #'eshell-splice-args 99)) - (add-hook 'eshell-current-modifiers #'eshell-extended-glob)) + (add-hook 'eshell-current-modifiers #'eshell-expand-glob)) (defun eshell-parse-glob-chars () - "Parse a globbing delimiter. -The character is not advanced for ordinary globbing characters, so -that other function may have a chance to override the globbing -interpretation." + "Parse a globbing character." (when (memq (char-after) eshell-glob-chars-list) - (ignore (eshell-add-glob-modifier)))) + (eshell-add-glob-modifier) + (prog1 + (propertize (char-to-string (char-after)) 'eshell-glob-char t) + (forward-char)))) + +(defvar eshell-glob-chars-regexp nil) +(defsubst eshell-glob-chars-regexp () + "Return the lazily-created value for `eshell-glob-chars-regexp'." + (or eshell-glob-chars-regexp + (setq-local eshell-glob-chars-regexp + (rx-to-string `(+ (any ,@eshell-glob-chars-list)) t)))) + +(defun eshell-parse-glob-string (glob) + "Add text properties to glob characters in GLOB and return the result." + (let ((regexp (rx-to-string + `(or (seq (group-n 1 "\\") anychar) + (group-n 2 (regexp ,(eshell-glob-chars-regexp)))) + t))) + (with-temp-buffer + (insert glob) + (goto-char (point-min)) + (while (re-search-forward regexp nil t) + (cond + ((match-beginning 1) ; Remove backslash escape. + (delete-region (match-beginning 1) (match-end 1))) + ((match-beginning 2) ; Propertize globbing character. + (put-text-property (match-beginning 2) (match-end 2) + 'eshell-glob-char t)))) + (buffer-string)))) (defvar eshell-glob-matches) (defvar message-shown) @@ -174,12 +199,16 @@ eshell-glob-recursive-alist '(("**/" . recurse) ("***/" . recurse-symlink))) -(defvar eshell-glob-chars-regexp nil) -(defsubst eshell-glob-chars-regexp () - "Return the lazily-created value for `eshell-glob-chars-regexp'." - (or eshell-glob-chars-regexp - (setq-local eshell-glob-chars-regexp - (rx-to-string `(+ (any ,@eshell-glob-chars-list)) t)))) +(defsubst eshell--glob-char-p (string index) + (get-text-property index 'eshell-glob-char string)) + +(defsubst eshell--contains-glob-char-p (string) + (text-property-any 0 (length string) 'eshell-glob-char t string)) + +(defun eshell--all-glob-chars-p (string) + (and (length> string 0) + (not (text-property-not-all + 0 (length string) 'eshell-glob-char t string)))) (defun eshell-glob-regexp (pattern) "Convert glob-pattern PATTERN to a regular expression. @@ -196,9 +225,10 @@ eshell-glob-regexp [a-b] [a-b] matches a character or range [^a] [^a] excludes a character or range -If any characters in PATTERN have the text property `escaped' -set to true, then these characters will match themselves in the -resulting regular expression." +This function only considers in PATTERN that have the text property +`eshell-glob-char' set to t for conversion from glob to regexp syntax. +All other characters are treated as literals. See also +`eshell-parse-glob-chars' and `eshell-parse-glob-string'." (let ((matched-in-pattern 0) ; How much of PATTERN handled regexp) (while (string-match (eshell-glob-chars-regexp) @@ -209,7 +239,7 @@ eshell-glob-regexp (concat regexp (regexp-quote (substring pattern matched-in-pattern op-begin)))) - (if (get-text-property op-begin 'escaped pattern) + (if (not (eshell--glob-char-p pattern op-begin)) (setq regexp (concat regexp (regexp-quote (char-to-string op-char))) matched-in-pattern (1+ op-begin)) @@ -229,6 +259,7 @@ eshell-glob-regexp (defun eshell-glob-p (pattern) "Return non-nil if PATTERN has any special glob characters." + (declare (obsolete nil "31.1")) ;; "~" is an infix globbing character, so one at the start of a glob ;; must be a literal. (let ((start (if (string-prefix-p "~" pattern) 1 0))) @@ -249,8 +280,8 @@ eshell-glob-convert-1 ;; Split the glob if it contains a negation like x~y. (while (and (eq incl glob) (setq index (string-search "~" glob index))) - (if (or (get-text-property index 'escaped glob) - (or (= (1+ index) len))) + (if (or (not (eshell--glob-char-p glob index)) + (= (1+ index) len)) (setq index (1+ index)) (setq incl (substring glob 0 index) excl (substring glob (1+ index))))) @@ -294,13 +325,18 @@ eshell-glob-convert (setq start-dir (pop globs)) (setq start-dir (file-name-as-directory "."))) (while globs - (if-let* ((recurse (cdr (assoc (car globs) - eshell-glob-recursive-alist)))) + ;; "~" is an infix globbing character, so one at the start of a + ;; glob component must be a literal. + (when (eq (aref (car globs) 0) ?~) + (remove-text-properties 0 1 '(eshell-glob-char) (car globs))) + (if-let* ((recurse (cdr (assoc (car globs) eshell-glob-recursive-alist))) + ((eshell--all-glob-chars-p + (string-trim-right (car globs) "/")))) (if last-saw-recursion (setcar result recurse) (push recurse result) (setq last-saw-recursion t)) - (if (or result (eshell-glob-p (car globs))) + (if (or result (eshell--contains-glob-char-p (car globs))) (push (eshell-glob-convert-1 (car globs) (null (cdr globs))) result) ;; We haven't seen a glob yet, so instead append to the start @@ -312,6 +348,38 @@ eshell-glob-convert (nreverse result) isdir))) +(defun eshell-expand-glob (glob) + "Return a list of files matched by GLOB. +Each globbing character in GLOB should have a non-nil value for the text +property `eshell-glob-char' (e.g. by `eshell-parse-glob-chars') in order +for it to have syntactic meaning; otherwise, this function treats the +character literally. + +This function is primarily intended for use within Eshell command +forms. If you want to use an ordinary string as a glob, use +`eshell-extended-glob' instead." + (let ((globs (eshell-glob-convert glob)) + eshell-glob-matches message-shown) + (unwind-protect + ;; After examining GLOB, make sure we actually got some globs + ;; before computing the results. We can get zero globs for + ;; remote file names using "~", like "/ssh:remote:~/file.txt". + ;; During Eshell argument parsing, we can't always be sure if + ;; the "~" is a home directory reference or part of a glob + ;; (e.g. if the argument was assembled from variables). + (when (cadr globs) + (apply #'eshell-glob-entries globs)) + (when message-shown + (message nil))) + (cond + (eshell-glob-matches + (sort eshell-glob-matches #'string<)) + ((and eshell-error-if-no-glob (cadr globs)) + (error "No matches found: %s" glob)) + (t + (let ((result (substring-no-properties glob))) + (if eshell-glob-splice-results (list result) result)))))) + (defun eshell-extended-glob (glob) "Return a list of files matched by GLOB. If no files match, signal an error (if `eshell-error-if-no-glob' @@ -327,26 +395,9 @@ eshell-extended-glob Mainly they are not supported because file matching is done with Emacs regular expressions, and these cannot support the above constructs." - (let ((globs (eshell-glob-convert glob)) - eshell-glob-matches message-shown) - (if (null (cadr globs)) - ;; If, after examining GLOB, there are no actual globs, just - ;; bail out. This can happen for remote file names using "~", - ;; like "/ssh:remote:~/file.txt". During parsing, we can't - ;; always be sure if the "~" is a home directory reference or - ;; part of a glob (e.g. if the argument was assembled from - ;; variables). - (if eshell-glob-splice-results (list glob) glob) - (unwind-protect - (apply #'eshell-glob-entries globs) - (if message-shown - (message nil))) - (or (and eshell-glob-matches (sort eshell-glob-matches #'string<)) - (if eshell-error-if-no-glob - (error "No matches found: %s" glob) - (if eshell-glob-splice-results - (list glob) - glob)))))) + (eshell-expand-glob (eshell-parse-glob-string glob))) + +(defconst eshell--glob-anything (eshell-parse-glob-string "*")) ;; FIXME does this really need to abuse eshell-glob-matches, message-shown? (defun eshell-glob-entries (path globs only-dirs) @@ -363,7 +414,7 @@ eshell-glob-entries (if (rassq (car globs) eshell-glob-recursive-alist) (setq recurse-p (car globs) glob (or (cadr globs) - (eshell-glob-convert-1 "*" t)) + (eshell-glob-convert-1 eshell--glob-anything t)) glob-remainder (cddr globs)) (setq glob (car globs) glob-remainder (cdr globs))) diff --git a/lisp/eshell/em-ls.el b/lisp/eshell/em-ls.el index 8bf2e20d320..e8cdb9c82c4 100644 --- a/lisp/eshell/em-ls.el +++ b/lisp/eshell/em-ls.el @@ -246,6 +246,17 @@ dired-flag (declare-function eshell-extended-glob "em-glob" (glob)) (defvar eshell-error-if-no-glob) +(defvar eshell-glob-splice-results) + +(defun eshell-ls--expand-wildcards (file) + "Expand the shell wildcards in FILE if any." + (if (and (atom file) + (not (file-exists-p file))) + (let ((eshell-error-if-no-glob t) + ;; Ensure `eshell-extended-glob' returns a list. + (eshell-glob-splice-results t)) + (mapcar #'file-relative-name (eshell-extended-glob file))) + (list (file-relative-name file)))) (defun eshell-ls--insert-directory (orig-fun file switches &optional wildcard full-directory-p) @@ -277,13 +288,7 @@ eshell-ls--insert-directory (require 'em-glob) (let* ((insert-func 'insert) (error-func 'insert) - (eshell-error-if-no-glob t) - (target ; Expand the shell wildcards if any. - (if (and (atom file) - (string-match "[[?*]" file) - (not (file-exists-p file))) - (mapcar #'file-relative-name (eshell-extended-glob file)) - (file-relative-name file))) + (target (eshell-ls--expand-wildcards file)) (switches (append eshell-ls-dired-initial-args (and (or (consp dired-directory) wildcard) (list "-d")) diff --git a/test/lisp/eshell/em-glob-tests.el b/test/lisp/eshell/em-glob-tests.el index 239968917ab..88d3afc5d4d 100644 --- a/test/lisp/eshell/em-glob-tests.el +++ b/test/lisp/eshell/em-glob-tests.el @@ -134,17 +134,19 @@ em-glob-test/expand/explicitly-listify-results (ert-deftest em-glob-test/convert/current-start-directory () "Test converting a glob starting in the current directory." - (should (equal (eshell-glob-convert "*.el") + (should (equal (eshell-glob-convert (eshell-parse-glob-string "*.el")) '("./" (("\\`.*\\.el\\'" . "\\`\\.")) nil)))) (ert-deftest em-glob-test/convert/relative-start-directory () "Test converting a glob starting in a relative directory." - (should (equal (eshell-glob-convert "some/where/*.el") + (should (equal (eshell-glob-convert + (eshell-parse-glob-string "some/where/*.el")) '("./some/where/" (("\\`.*\\.el\\'" . "\\`\\.")) nil)))) (ert-deftest em-glob-test/convert/absolute-start-directory () "Test converting a glob starting in an absolute directory." - (should (equal (eshell-glob-convert "/some/where/*.el") + (should (equal (eshell-glob-convert + (eshell-parse-glob-string "/some/where/*.el")) '("/some/where/" (("\\`.*\\.el\\'" . "\\`\\.")) nil)))) (ert-deftest em-glob-test/convert/remote-start-directory () @@ -152,16 +154,30 @@ em-glob-test/convert/remote-start-directory (skip-unless (eshell-tests-remote-accessible-p)) (let* ((default-directory ert-remote-temporary-file-directory) (remote (file-remote-p default-directory))) - (should (equal (eshell-glob-convert (format "%s/some/where/*.el" remote)) + (should (equal (eshell-glob-convert + (format (eshell-parse-glob-string "%s/some/where/*.el") + remote)) `(,(format "%s/some/where/" remote) (("\\`.*\\.el\\'" . "\\`\\.")) nil))))) -(ert-deftest em-glob-test/convert/quoted-start-directory () - "Test converting a glob starting in a quoted directory name." +(ert-deftest em-glob-test/convert/start-directory-with-spaces () + "Test converting a glob starting in a directory with spaces in its name." (should (equal (eshell-glob-convert - (concat (eshell-escape-arg "some where/") "*.el")) + (eshell-parse-glob-string "some where/*.el")) '("./some where/" (("\\`.*\\.el\\'" . "\\`\\.")) nil)))) +(ert-deftest em-glob-test/convert/literal-characters () + "Test converting a \"glob\" with only literal characters." + (should (equal (eshell-glob-convert "*.el") '("./*.el" nil nil))) + (should (equal (eshell-glob-convert "**/") '("./**/" nil t)))) + +(ert-deftest em-glob-test/convert/mixed-literal-characters () + "Test converting a glob with some literal characters." + (should (equal (eshell-glob-convert (eshell-parse-glob-string "\\*\\*/*.el")) + '("./**/" (("\\`.*\\.el\\'" . "\\`\\.")) nil))) + (should (equal (eshell-glob-convert (eshell-parse-glob-string "**/\\*.el")) + '("./" (recurse ("\\`\\*\\.el\\'" . "\\`\\.")) nil)))) + ;; Glob matching @@ -262,11 +278,11 @@ em-glob-test/match-n-or-more-characters (ert-deftest em-glob-test/match-n-or-more-groups () "Test that \"(x)#\" and \"(x)#\" match zero or more instances of \"(x)\"." - (with-fake-files '("h.el" "ha.el" "hi.el" "hii.el" "dir/hi.el") - (should (equal (eshell-extended-glob "hi#.el") - '("h.el" "hi.el" "hii.el"))) - (should (equal (eshell-extended-glob "hi##.el") - '("hi.el" "hii.el"))))) + (with-fake-files '("h.el" "ha.el" "hi.el" "hah.el" "hahah.el" "dir/hah.el") + (should (equal (eshell-extended-glob "h(ah)#.el") + '("h.el" "hah.el" "hahah.el"))) + (should (equal (eshell-extended-glob "h(ah)##.el") + '("hah.el" "hahah.el"))))) (ert-deftest em-glob-test/match-n-or-more-character-sets () "Test that \"[x]#\" and \"[x]#\" match zero or more instances of \"[x]\"." @@ -300,11 +316,11 @@ em-glob-test/match-dot-files (ert-deftest em-glob-test/no-matches () "Test behavior when a glob fails to match any files." (with-fake-files '("foo.el" "bar.el") - (should (equal (eshell-extended-glob "*.txt") - "*.txt")) + (should (equal-including-properties (eshell-extended-glob "*.txt") + "*.txt")) (let ((eshell-glob-splice-results t)) - (should (equal (eshell-extended-glob "*.txt") - '("*.txt")))) + (should (equal-including-properties (eshell-extended-glob "*.txt") + '("*.txt")))) (let ((eshell-error-if-no-glob t)) (should-error (eshell-extended-glob "*.txt"))))) -- 2.25.1