unofficial mirror of bug-gnu-emacs@gnu.org 
 help / color / mirror / code / Atom feed
From: Dmitry Gutov <dgutov@yandex.ru>
To: Juri Linkov <juri@linkov.net>
Cc: abela@chalmers.se, 31796@debbugs.gnu.org
Subject: bug#31796: 27.1; dired-do-find-regexp-and-replace fails to find multiline regexps
Date: Wed, 16 Dec 2020 05:00:33 +0200	[thread overview]
Message-ID: <cc448aff-dc72-8424-390b-f3c6151c9e84@yandex.ru> (raw)
In-Reply-To: <87eek2902v.fsf@mail.linkov.net>

[-- Attachment #1: Type: text/plain, Size: 1028 bytes --]

On 06.12.2020 23:00, Juri Linkov wrote:
>>> dired-do-find-regexp uses 'ignores' to filter out ignored files.
>>> You could add another filter to filter out files without matches
>>> using 'grep -PzL'.
>> Right. This is sorta a backup plan. Although, when the number of files to
>> search can be counted on one hand, there's nothing too bad in doing the
>> search in Emacs.
> Another backup plan is to use ripgrep.  Its multiline handling with -U
> also allows to search words ignoring any whitespace, even newlines.
> This is like isearch-lax-whitespace using search-whitespace-regexp
> when it contains a newline, e.g. "[ \t\r\n]+".

Right. It has a problem of its own, though: it still outputs a file name 
per line, even when a match is spread across several lines (unlike 
pcregrep). So we're left guessing where a given multiline match ends.

Also, 'sort' doesn't seem to be able to treat both : and \0 as 
separators at the same time.

Here's a rough patch, for illustration. It's kind of working, but I'm 
not loving it.

[-- Attachment #2: ripgrep-multiline.diff --]
[-- Type: text/x-patch, Size: 3754 bytes --]

diff --git a/lisp/progmodes/xref.el b/lisp/progmodes/xref.el
index 6e99e9d8ac..6bc03ee727 100644
--- a/lisp/progmodes/xref.el
+++ b/lisp/progmodes/xref.el
@@ -1340,7 +1340,7 @@ xref-search-program-alist
      ;; without the '| sort ...' part if GNU sort is not available on
      ;; your system and/or stable ordering is not important to you.
      ;; Note#2: '!*/' is there to filter out dirs (e.g. submodules).
-     "xargs -0 rg <C> -nH --no-messages -g '!*/' -e <R> | sort -t: -k1,1 -k2n,2"
+     "xargs -0 rg <C> -nH --sort path --no-messages -g '!*/' -e <R>"
      ))
   "Associative list mapping program identifiers to command templates.
 
@@ -1390,6 +1390,7 @@ xref-matches-in-files
        ;; The 'auto' default would be fine too, but ripgrep can't handle
        ;; the options we pass in that case.
        (grep-highlight-matches nil)
+       (multiline (string-match-p "\n" regexp))
        (command (grep-expand-template (cdr
                                        (or
                                         (assoc
@@ -1397,7 +1398,14 @@ xref-matches-in-files
                                          xref-search-program-alist)
                                         (user-error "Unknown search program `%s'"
                                                     xref-search-program)))
-                                      (xref--regexp-to-extended regexp))))
+                                      (xref--regexp-to-extended regexp)
+                                      nil
+                                      nil
+                                      nil
+                                      (when multiline '("-U" "--null")))))
+    (if (and multiline (not (eq xref-search-program 'ripgrep)))
+        (user-error "Sorry, multiline searches are not supported with `%s'"
+                    xref-search-program))
     (when remote-id
       (require 'tramp)
       (setq files (mapcar
@@ -1425,6 +1433,27 @@ xref-matches-in-files
                  (not (looking-at "Binary file .* matches")))
         (user-error "Search failed with status %d: %s" status
                     (buffer-substring (point-min) (line-end-position))))
+      (if multiline
+          (let (match line last-line file)
+            (while (re-search-forward "^\\([^\0]+\\)\\(?:\0\\)\\([0-9]+\\):" nil t)
+              (if (and match
+                       (equal file (match-string 1))
+                       (= (string-to-number (match-string 2))
+                          (1+ last-line)))
+                  (progn
+                    (setq last-line (string-to-number (match-string 2))
+                          match (concat match
+                                        "\n"
+                                        (buffer-substring
+                                         (match-end 0)
+                                         (line-end-position)))))
+                (when match
+                  (push (list line file match) hits))
+                (setq match (buffer-substring (match-end 0) (line-end-position))
+                      file (match-string 1)
+                      line (string-to-number (match-string 2))
+                      last-line line)))
+            (push (list line file match) hits)))
       (while (re-search-forward grep-re nil t)
         (push (list (string-to-number (match-string line-group))
                     (match-string file-group)
@@ -1541,7 +1570,7 @@ xref--collect-matches
                (file (and file (concat remote-id file)))
                (buf (xref--find-file-buffer file))
                (syntax-needed (xref--regexp-syntax-dependent-p regexp)))
-    (if buf
+    (if nil
         (with-current-buffer buf
           (save-excursion
             (goto-char (point-min))

  reply	other threads:[~2020-12-16  3:00 UTC|newest]

Thread overview: 61+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-06-11 18:58 bug#31796: 26.1; dired-do-find-regexp-and-replace fails to find multiline regexps Žygimantas Bruzgys
2018-06-12 10:17 ` Noam Postavsky
2020-11-23 21:25   ` Dmitry Gutov
2020-11-23  9:09 ` bug#31796: 27.1; " Andreas Abel
2020-11-23 15:23   ` Eli Zaretskii
2020-11-23 16:16   ` Drew Adams
2020-11-23 21:22     ` Dmitry Gutov
2020-11-24 19:28     ` Juri Linkov
2020-11-24 20:12       ` Drew Adams
2020-11-25  7:31         ` Juri Linkov
2020-11-25 17:37           ` Drew Adams
2020-11-24 20:19       ` Eli Zaretskii
2020-11-24 20:31         ` Juri Linkov
2020-11-24 20:51           ` Drew Adams
2020-11-24 21:07           ` Eli Zaretskii
2020-11-25  7:28             ` Juri Linkov
2020-11-25 15:48               ` Eli Zaretskii
2020-11-25 20:18                 ` Juri Linkov
2020-11-25 20:30                   ` Eli Zaretskii
2020-11-29  2:30                     ` Dmitry Gutov
2020-11-29 15:22                       ` Eli Zaretskii
2020-11-23 21:28   ` Dmitry Gutov
2020-11-23 23:49     ` Andreas Abel
2020-11-24  0:13       ` Dmitry Gutov
2020-11-24  1:19         ` Dmitry Gutov
2020-11-24 15:16       ` Eli Zaretskii
2020-11-24 15:43         ` Dmitry Gutov
2020-11-24 16:35           ` Eli Zaretskii
2020-11-24 19:43             ` Dmitry Gutov
2020-11-24 20:16               ` Eli Zaretskii
2020-11-30  2:25                 ` Dmitry Gutov
2020-11-30  8:49                   ` Juri Linkov
2020-12-01  2:21                     ` Dmitry Gutov
2020-12-01  8:39                       ` Juri Linkov
2020-12-03  2:46                         ` Dmitry Gutov
2020-12-06 21:00                           ` Juri Linkov
2020-12-16  3:00                             ` Dmitry Gutov [this message]
2020-12-16 20:32                               ` Juri Linkov
2020-12-17  0:40                                 ` Dmitry Gutov
2020-11-30 15:30                   ` Eli Zaretskii
2020-11-30 15:39                     ` Jean Louis
2020-11-30 16:36                       ` Eli Zaretskii
2020-11-30 15:42                     ` Jean Louis
2020-12-01  1:23                       ` Dmitry Gutov
2020-12-01  8:36                         ` Juri Linkov
2020-12-01 15:20                           ` Dmitry Gutov
2020-12-01  1:24                     ` Dmitry Gutov
2020-12-01  5:20                   ` Richard Stallman
2020-12-01 15:46                     ` Eli Zaretskii
2020-12-02  4:26                       ` Richard Stallman
2020-12-02 14:56                         ` Eli Zaretskii
2020-12-02 17:17                           ` Dmitry Gutov
2020-12-02 17:39                             ` Eli Zaretskii
2020-12-02 17:43                               ` Dmitry Gutov
2020-12-02 17:47                                 ` Eli Zaretskii
2020-12-03  5:26                                   ` Richard Stallman
2020-12-03  2:23                     ` Dmitry Gutov
2020-11-24 19:29     ` Juri Linkov
2020-11-24 19:39       ` Dmitry Gutov
     [not found] <<CADy8Bt=f=LOE6ODLhhW7ZS6qXRQCzd15Hd0eFKVO8qok98ni8w@mail.gmail.com>
     [not found] ` <<10120030-8b8d-b702-add4-8f099f934ed5@chalmers.se>
     [not found]   ` <<91c98791-9df2-43ee-9aac-205c5b0de9c2@default>
     [not found]     ` <<87blfm6922.fsf@mail.linkov.net>
     [not found]       ` <<838saqtsm9.fsf@gnu.org>
2020-11-24 20:32         ` Drew Adams
     [not found]         ` <<87mtz64htw.fsf@mail.linkov.net>
     [not found]           ` <<831rgitqe2.fsf@gnu.org>
2020-11-24 21:35             ` Drew Adams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=cc448aff-dc72-8424-390b-f3c6151c9e84@yandex.ru \
    --to=dgutov@yandex.ru \
    --cc=31796@debbugs.gnu.org \
    --cc=abela@chalmers.se \
    --cc=juri@linkov.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).