From abd8fd9b6ea7936c56cde538202c7f3eeb24aa76 Mon Sep 17 00:00:00 2001 From: Sean Whitton Date: Mon, 17 Jan 2022 15:15:36 -0700 Subject: [PATCH v5 3/3] Add Eshell syntax to more easily bypass Eshell's own pipelining * etc/NEWS: * doc/misc/eshell.texi (Input/Output): Document the new syntax. * lisp/eshell/em-extpipe.el: New module (Bug#46351). * test/lisp/eshell/em-extpipe-tests.el: New tests. * lisp/eshell/esh-module.el (eshell-modules-list): Add `eshell-extpipe'. --- doc/misc/eshell.texi | 42 +++++ etc/NEWS | 10 ++ lisp/eshell/em-extpipe.el | 183 ++++++++++++++++++++ lisp/eshell/esh-module.el | 1 + test/lisp/eshell/em-extpipe-tests.el | 205 +++++++++++++++++++++++ test/lisp/eshell/eshell-tests-helpers.el | 4 +- 6 files changed, 443 insertions(+), 2 deletions(-) create mode 100644 lisp/eshell/em-extpipe.el create mode 100644 test/lisp/eshell/em-extpipe-tests.el diff --git a/doc/misc/eshell.texi b/doc/misc/eshell.texi index df6e3b861e..261e88d00c 100644 --- a/doc/misc/eshell.texi +++ b/doc/misc/eshell.texi @@ -1142,6 +1142,48 @@ Input/Output The output function is called once on each line of output until @code{nil} is passed, indicating end of output. +@section Running Shell Pipelines Natively +When constructing shell pipelines that will move a lot of data, it is +a good idea to bypass Eshell's own pipelining support and use the +operating system shell's instead. This is especially relevant when +executing commands on a remote machine using Eshell's Tramp +integration: using the remote shell's pipelining avoids copying the +data which will flow through the pipeline to local Emacs buffers and +then right back again. + +Eshell recognises a special syntax to make it easier to convert +pipelines so as to bypass Eshell's pipelining. Prefixing at least one +@code{|}, @code{<} or @code{>} with an asterisk marks a command as +intended for the operating system shell. To make it harder to invoke +this functionality accidentally, it is also required that the asterisk +be preceded by whitespace or located at the start of input. For +example, + +@example + cat *.ogg *| my-cool-decoder >file +@end example + +Executing this command will not copy all the data in the *.ogg files, +nor the decoded data, into Emacs buffers, as would normally happen. + +The command is interpreted as extending up to the next @code{|} +character which is not preceded by an unescaped asterisk following +whitespace, or the end of the input if there is no such character. +Thus, all @code{<} and @code{>} redirections occuring before the next +asterisk-unprefixed @code{|} are implicitly prefixed with (whitespace +and) asterisks. An exception is that Eshell-specific redirects right +at the end of the command are excluded. This allows input like this: + +@example + foo *| baz ># +@end example + +@noindent which is equivalent to input like this: + +@example + sh -c "foo | baz" ># +@end example + @node Extension modules @chapter Extension modules Eshell provides a facility for defining extension modules so that they diff --git a/etc/NEWS b/etc/NEWS index 5297db3e2d..68c0eba866 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -858,6 +858,16 @@ the Galeon web browser was released in September, 2008. *** New user option 'ruby-toggle-block-space-before-parameters'. +** Eshell + ++++ +*** New feature to easily bypass Eshell's own pipelining. +Prefixing '|', '<' or '>' with an asterisk, i.e. '*|', '*<' or '*>', +will cause the whole command to be passed to the operating system +shell. This is particularly useful to bypass Eshell's own pipelining +support for pipelines which will move a lot of data. See "Running +Shell Pipelines Natively" in the Eshell manual. + ** Miscellaneous --- diff --git a/lisp/eshell/em-extpipe.el b/lisp/eshell/em-extpipe.el new file mode 100644 index 0000000000..57aeec38ff --- /dev/null +++ b/lisp/eshell/em-extpipe.el @@ -0,0 +1,183 @@ +;;; em-extpipe.el --- external shell pipelines -*- lexical-binding:t -*- + +;; Copyright (C) 2022 Free Software Foundation, Inc. + +;; Author: Sean Whitton + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs. If not, see . + +;;; Commentary: + +;; When constructing shell pipelines that will move a lot of data, it +;; is a good idea to bypass Eshell's own pipelining support and use +;; the operating system shell's instead. This module tries to make +;; that easy to do. + +;;; Code: + +(require 'cl-lib) +(require 'esh-arg) +(require 'esh-io) +(require 'esh-util) + +(eval-when-compile (require 'files-x)) + +;;; Functions: + +(defun eshell-extpipe-initialize () ;Called from `eshell-mode' via intern-soft! + "Initialize external pipelines support." + (when (boundp 'eshell-special-chars-outside-quoting) + (setq-local + eshell-special-chars-outside-quoting + (append eshell-special-chars-outside-quoting (list ?\*)))) + (add-hook 'eshell-parse-argument-hook + #'eshell-parse-external-pipeline -20 t) + (add-hook 'eshell-pre-rewrite-command-hook + #'eshell-rewrite-external-pipeline -20 t)) + +(defun eshell-parse-external-pipeline () + "Parse a pipeline intended for execution by the external shell. + +A sequence of arguments is rewritten to use the operating system +shell when it contains `*|', `*<' or `*>', where the asterisk is +preceded by whitespace or located at the start of input. + +The command extends to the next `|' character which is not +preceded by an unescaped asterisk following whitespace, or the +end of input, except that any Eshell-specific output redirections +occurring at the end are excluded. Any other `<' or `>' +appearing before the end of the command are treated as though +preceded by (whitespace and) an asterisk. + +For example, + + foo # + +is equivalent to + + sh -c \"foo # + +when `shell-file-name' is `sh' and `shell-command-switch' is +`-c', but in + + foo ># *| baz + +and + + foo *| baz ># --some-argument + +the Eshell-specific redirect will be passed on to the operating +system shell, probably leading to undesired results. + +This function must appear early in `eshell-parse-argument-hook' +to ensure that operating system shell syntax is not interpreted +as though it were Eshell syntax." + ;; Our goal is to wrap the external command to protect it from the + ;; other members of `eshell-parse-argument-hook'. We must avoid + ;; misinterpreting a quoted `*|', `*<' or `*>' as indicating an + ;; external pipeline, hence the structure of the loop in `findbeg1'. + (cl-flet + ((findbeg1 (pat &optional go (bound (point-max))) + (let* ((start (point)) + (result + (catch 'found + (while (> bound (point)) + (let* ((found + (save-excursion + (re-search-forward "['\"\\]" bound t))) + (next (or (and found (match-beginning 0)) + bound))) + (if (re-search-forward pat next t) + (throw 'found (match-beginning 1)) + (goto-char next) + (while (or (eshell-parse-backslash) + (eshell-parse-double-quote) + (eshell-parse-literal-quote))))))))) + (goto-char (if (and result go) (match-end 0) start)) + result))) + (unless (or eshell-current-argument eshell-current-quoted) + (let ((beg (point)) end + (next-marked (findbeg1 "\\(?:\\=\\|\\s-\\)\\(\\*[|<>]\\)")) + (next-unmarked + (or (findbeg1 "\\(?:\\=\\|[^*]\\|\\S-\\*\\)\\(|\\)") + (point-max)))) + (when (and next-marked (> next-unmarked next-marked) + (or (> next-marked (point)) + (looking-back "\\`\\|\\s-" nil))) + ;; Skip to the final segment of the external pipeline. + (while (findbeg1 "\\(?:\\=\\|\\s-\\)\\(\\*|\\)" t)) + ;; Find output redirections. + (while (findbeg1 + "\\([0-9]?>+&?[0-9]?\\s-*\\S-\\)" t next-unmarked) + ;; Is the output redirection Eshell-specific? We have our + ;; own logic, rather than calling `eshell-parse-argument', + ;; to avoid specifying here all the possible cars of + ;; parsed special references -- `get-buffer-create' etc. + (forward-char -1) + (let ((this-end + (save-match-data + (cond ((looking-at "#<") + (forward-char 1) + (1+ (eshell-find-delimiter ?\< ?\>))) + ((and (looking-at "/\\S-+") + (assoc (match-string 0) + eshell-virtual-targets)) + (match-end 0)))))) + (cond ((and this-end end) + (goto-char this-end)) + (this-end + (goto-char this-end) + (setq end (match-beginning 0))) + (t + (setq end nil))))) + ;; We've moved past all Eshell-specific output redirections + ;; we could find. If there is only whitespace left, then + ;; `end' is right before redirections we should exclude; + ;; otherwise, we must include everything. + (unless (and end (skip-syntax-forward "\s" next-unmarked) + (= next-unmarked (point))) + (setq end next-unmarked)) + (let ((cmd (string-trim + (buffer-substring-no-properties beg end)))) + (goto-char end) + ;; We must now drop the asterisks, unless quoted/escaped. + (with-temp-buffer + (insert cmd) + (goto-char (point-min)) + (cl-loop + for next = (findbeg1 "\\(?:\\=\\|\\s-\\)\\(\\*[|<>]\\)" t) + while next do (forward-char -2) (delete-char 1)) + (eshell-finish-arg + `(eshell-external-pipeline ,(buffer-string)))))))))) + +(defun eshell-rewrite-external-pipeline (terms) + "Rewrite an external pipeline in TERMS as parsed by +`eshell-parse-external-pipeline', which see." + (while terms + (when (and (listp (car terms)) + (eq (caar terms) 'eshell-external-pipeline)) + (with-connection-local-variables + (setcdr terms (cl-list* + shell-command-switch (cadar terms) (cdr terms))) + (setcar terms shell-file-name))) + (setq terms (cdr terms)))) + +(defsubst eshell-external-pipeline (&rest _args) + "Stub to generate an error if a pipeline is not rewritten." + (error "Unhandled external pipeline in input text")) + +(provide 'em-extpipe) +;;; esh-extpipe.el ends here diff --git a/lisp/eshell/esh-module.el b/lisp/eshell/esh-module.el index ade151d7cd..14e91912d1 100644 --- a/lisp/eshell/esh-module.el +++ b/lisp/eshell/esh-module.el @@ -54,6 +54,7 @@ eshell-modules-list eshell-basic eshell-cmpl eshell-dirs + eshell-extpipe eshell-glob eshell-hist eshell-ls diff --git a/test/lisp/eshell/em-extpipe-tests.el b/test/lisp/eshell/em-extpipe-tests.el new file mode 100644 index 0000000000..1283b6b361 --- /dev/null +++ b/test/lisp/eshell/em-extpipe-tests.el @@ -0,0 +1,205 @@ +;;; em-extpipe-tests.el --- em-extpipe test suite -*- lexical-binding:t -*- + +;; Copyright (C) 2022 Free Software Foundation, Inc. + +;; Author: Sean Whitton + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs. If not, see . + +;;; Commentary: + + +;;; Code: + +(require 'cl-lib) +(require 'ert) +(require 'ert-x) +(require 'em-extpipe) +(eval-and-compile + (load (expand-file-name "eshell-tests-helpers" + (file-name-directory (or load-file-name + default-directory))))) + +(defvar eshell-history-file-name) +(defvar eshell-test--max-subprocess-time) +(declare-function eshell-command-result-p "eshell-tests-helpers") + +(defmacro em-extpipe-tests--deftest (name input &rest body) + (declare (indent 2)) + `(ert-deftest ,name () + (cl-macrolet + ((should-parse (expected) + `(let ((shell-file-name "sh") + (shell-command-switch "-c")) + ;; Strip `eshell-trap-errors'. + (should (equal ,expected + (cadr (eshell-parse-command input)))))) + (with-substitute-for-temp (&rest body) + ;; Substitute name of an actual temporary file and/or + ;; buffer into `input'. The substitution logic is + ;; appropriate for only the use we put it to in this file. + `(ert-with-temp-file temp + (let ((temp-buffer (generate-new-buffer " *temp*" t))) + (unwind-protect + (let ((input + (replace-regexp-in-string + "temp\\([^>]\\|\\'\\)" temp + (string-replace "#" + (buffer-name temp-buffer) + input)))) + ,@body) + (when (buffer-name temp-buffer) + (kill-buffer temp-buffer)))))) + (temp-should-string= (expected) + `(string= ,expected (string-trim-right + (with-temp-buffer + (insert-file-contents temp) + (buffer-string))))) + (temp-buffer-should-string= (expected) + `(string= ,expected (string-trim-right + (with-current-buffer temp-buffer + (buffer-string)))))) + (skip-unless shell-file-name) + (skip-unless shell-command-switch) + (skip-unless (executable-find shell-file-name)) + (let ((input ,input)) + (with-temp-eshell ,@body))))) + +(em-extpipe-tests--deftest em-extpipe-test-1 + "echo \"bar\" *| rev >temp" + (skip-unless (executable-find "rev")) + (should-parse '(eshell-named-command + "sh" (list "-c" "echo \"bar\" | rev >temp"))) + (with-substitute-for-temp + (eshell-command-result-p input "^$") + (temp-should-string= "rab"))) + +(em-extpipe-tests--deftest em-extpipe-test-2 + "echo \"bar\" | rev *>temp" + (skip-unless (executable-find "rev")) + (should-parse + '(eshell-execute-pipeline + '((eshell-named-command "echo" (list (eshell-escape-arg "bar"))) + (eshell-named-command "sh" (list "-c" "rev >temp"))))) + (with-substitute-for-temp + (eshell-command-result-p input "^$") + (temp-should-string= "rab"))) + +(em-extpipe-tests--deftest em-extpipe-test-3 "foo *| bar | baz -d" + (should-parse + '(eshell-execute-pipeline + '((eshell-named-command "sh" (list "-c" "foo | bar")) + (eshell-named-command "baz" (list "-d")))))) + +(em-extpipe-tests--deftest em-extpipe-test-4 + "echo \"bar\" *| rev >#" + (skip-unless (executable-find "rev")) + (should-parse + '(progn + (ignore + (eshell-set-output-handle 1 'overwrite + (get-buffer-create "temp"))) + (eshell-named-command "sh" + (list "-c" "echo \"bar\" | rev")))) + (with-substitute-for-temp + (eshell-command-result-p input "^$") + (temp-buffer-should-string= "rab"))) + +(em-extpipe-tests--deftest em-extpipe-test-5 + "foo *| bar ># baz" + (should-parse '(eshell-named-command + "sh" (list "-c" "foo | bar ># baz")))) + +(em-extpipe-tests--deftest em-extpipe-test-6 + "foo ># *| bar baz" + (should-parse '(eshell-named-command + "sh" (list "-c" "foo ># | bar baz")))) + +(em-extpipe-tests--deftest em-extpipe-test-7 + "foo *| bar ># >>#" + (should-parse + '(progn + (ignore + (eshell-set-output-handle 1 'overwrite + (get-buffer-create "quux"))) + (ignore + (eshell-set-output-handle 1 'append + (get-process "other"))) + (eshell-named-command "sh" + (list "-c" "foo | bar"))))) + +(em-extpipe-tests--deftest em-extpipe-test-8 + "foo *| bar >/dev/kill | baz" + (should-parse + '(eshell-execute-pipeline + '((progn + (ignore + (eshell-set-output-handle 1 'overwrite "/dev/kill")) + (eshell-named-command "sh" + (list "-c" "foo | bar"))) + (eshell-named-command "baz"))))) + +(em-extpipe-tests--deftest em-extpipe-test-9 "foo \\*| bar" + (should-parse + '(eshell-execute-pipeline + '((eshell-named-command "foo" + (list (eshell-escape-arg "*"))) + (eshell-named-command "bar"))))) + +(em-extpipe-tests--deftest em-extpipe-test-10 "foo \"*|\" *>bar" + (should-parse + '(eshell-named-command "sh" (list "-c" "foo \"*|\" >bar")))) + +(em-extpipe-tests--deftest em-extpipe-test-11 "foo '*|' bar" + (should-parse '(eshell-named-command + "foo" (list (eshell-escape-arg "*|") "bar")))) + +(em-extpipe-tests--deftest em-extpipe-test-12 ">foo bar *| baz" + (should-parse + '(eshell-named-command "sh" (list "-c" ">foo bar | baz")))) + +(em-extpipe-tests--deftest em-extpipe-test-13 "foo*|bar" + (should-parse '(eshell-execute-pipeline + '((eshell-named-command (concat "foo" "*")) + (eshell-named-command "bar"))))) + +(em-extpipe-tests--deftest em-extpipe-test-14 "tac *