unofficial mirror of bug-gnu-emacs@gnu.org 
 help / color / mirror / code / Atom feed
From: Juri Linkov <juri@jurta.org>
To: Stefan Monnier <monnier@IRO.UMontreal.CA>
Cc: 10145@debbugs.gnu.org
Subject: bug#10145: 24.0.91; Word Isearch backward
Date: Thu, 01 Dec 2011 09:27:14 +0200	[thread overview]
Message-ID: <878vmwu55q.fsf@mail.jurta.org> (raw)
In-Reply-To: <jwvmxbdpgt0.fsf-monnier+emacs@gnu.org> (Stefan Monnier's message of "Wed, 30 Nov 2011 14:11:18 -0500")

> `word-search-regexp' is good, thank you.
>
>> I'll provide an Elisp version once the function name is agreed upon.
>
> Thanks.  Of course, that would/will have to wait for 24.2.

This is intended to fix a bug reported by Dani for 24.1.

Below is a complete patch.  Please decide what to do.

The Elisp version passes all regression tests and its output
is identical to the output of the C version:

(word-search-regexp "")        ""
(word-search-regexp " ")       ""
(word-search-regexp "w")       "\\bw\\b"
(word-search-regexp " w")      "\\bw\\b"
(word-search-regexp "w ")      "\\bw\\b"
(word-search-regexp " w ")     "\\bw\\b"
(word-search-regexp "w w")     "\\bw\\W\\W*w\\b"
(word-search-regexp " w w")    "\\bw\\W\\W*w\\b"
(word-search-regexp "w w ")    "\\bw\\W\\W*w\\b"
(word-search-regexp " w w ")   "\\bw\\W\\W*w\\b"
(word-search-regexp "" t)      ""
(word-search-regexp " " t)     ""
(word-search-regexp "w" t)     "\\bw"
(word-search-regexp " w" t)    "\\bw"
(word-search-regexp "w " t)    "\\bw\\b"
(word-search-regexp " w " t)   "\\bw\\b"
(word-search-regexp "w w" t)   "\\bw\\W\\W*w"
(word-search-regexp " w w" t)  "\\bw\\W\\W*w"
(word-search-regexp "w w " t)  "\\bw\\W\\W*w\\b"
(word-search-regexp " w w " t) "\\bw\\W\\W*w\\b"

=== modified file 'lisp/isearch.el'
--- lisp/isearch.el	2011-11-29 18:39:16 +0000
+++ lisp/isearch.el	2011-12-01 07:27:07 +0000
@@ -1380,6 +1389,20 @@ (defun isearch-toggle-case-fold ()
   (sit-for 1)
   (isearch-update))
 
+(defun word-search-regexp (string &optional lax)
+  "Return a regexp which matches words, ignoring punctuation.
+Given STRING, a string of words separated by word delimiters,
+compute a regexp that matches those exact words separated by
+arbitrary punctuation.  If LAX is non-nil, the end of the string
+need not match a word boundary unless it ends in whitespace.
+Used in `word-search-forward' and `word-search-backward'."
+  (if (string-match-p "^\\W*$" string)
+      ""
+    (concat
+     "\\b"
+     (mapconcat 'identity (split-string string "\\W\\W*" t) "\\W\\W*")
+     (if (or (not lax) (string-match-p "\\W$" string)) "\\b"))))
+
 (defun isearch-query-replace (&optional delimited regexp-flag)
   "Start `query-replace' with string to replace from last search string.
 The arg DELIMITED (prefix arg if interactive), if non-nil, means replace
@@ -1642,8 +1660,10 @@ (defun isearch-search-and-update ()
 		   (if (and (eq case-fold-search t) search-upper-case)
 		       (setq case-fold-search
 			     (isearch-no-upper-case-p isearch-string isearch-regexp)))
-		   (looking-at (if isearch-regexp isearch-string
-				 (regexp-quote isearch-string))))
+		   (looking-at (cond
+				(isearch-regexp isearch-string)
+				(isearch-word (word-search-regexp isearch-string t))
+				(t (regexp-quote isearch-string)))))
 	       (error nil))
 	     (or isearch-yank-flag
 		 (<= (match-end 0)

=== modified file 'src/search.c'
--- src/search.c	2011-11-27 18:17:40 +0000
+++ src/search.c	2011-12-01 07:27:07 +0000
@@ -2078,99 +2077,6 @@ (at your option) any later version.
   XSETBUFFER (last_thing_searched, current_buffer);
 }
 \f
-/* Given STRING, a string of words separated by word delimiters,
-   compute a regexp that matches those exact words separated by
-   arbitrary punctuation.  If LAX is nonzero, the end of the string
-   need not match a word boundary unless it ends in whitespace.  */
-
-static Lisp_Object
-wordify (Lisp_Object string, int lax)
-{
-  register unsigned char *o;
-  register EMACS_INT i, i_byte, len, punct_count = 0, word_count = 0;
-  Lisp_Object val;
-  int prev_c = 0;
-  EMACS_INT adjust;
-  int whitespace_at_end;
-
-  CHECK_STRING (string);
-  len = SCHARS (string);
-
-  for (i = 0, i_byte = 0; i < len; )
-    {
-      int c;
-
-      FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
-
-      if (SYNTAX (c) != Sword)
-	{
-	  punct_count++;
-	  if (SYNTAX (prev_c) == Sword)
-	    word_count++;
-	}
-
-      prev_c = c;
-    }
-
-  if (SYNTAX (prev_c) == Sword)
-    {
-      word_count++;
-      whitespace_at_end = 0;
-    }
-  else
-    {
-      whitespace_at_end = 1;
-      if (!word_count)
-	return empty_unibyte_string;
-    }
-
-  adjust = - punct_count + 5 * (word_count - 1)
-    + ((lax && !whitespace_at_end) ? 2 : 4);
-  if (STRING_MULTIBYTE (string))
-    val = make_uninit_multibyte_string (len + adjust,
-					SBYTES (string)
-					+ adjust);
-  else
-    val = make_uninit_string (len + adjust);
-
-  o = SDATA (val);
-  *o++ = '\\';
-  *o++ = 'b';
-  prev_c = 0;
-
-  for (i = 0, i_byte = 0; i < len; )
-    {
-      int c;
-      EMACS_INT i_byte_orig = i_byte;
-
-      FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
-
-      if (SYNTAX (c) == Sword)
-	{
-	  memcpy (o, SDATA (string) + i_byte_orig, i_byte - i_byte_orig);
-	  o += i_byte - i_byte_orig;
-	}
-      else if (SYNTAX (prev_c) == Sword && --word_count)
-	{
-	  *o++ = '\\';
-	  *o++ = 'W';
-	  *o++ = '\\';
-	  *o++ = 'W';
-	  *o++ = '*';
-	}
-
-      prev_c = c;
-    }
-
-  if (!lax || whitespace_at_end)
-    {
-      *o++ = '\\';
-      *o++ = 'b';
-    }
-
-  return val;
-}
-\f
 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
        "MSearch backward: ",
        doc: /* Search backward from point for STRING.
@@ -2209,6 +2115,9 @@ (at your option) any later version.
   return search_command (string, bound, noerror, count, 1, 0, 0);
 }
 
+/* Function that returns a regexp which matches words, ignoring punctuation.  */
+static Lisp_Object Qword_search_regexp;
+
 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
        "sWord search backward: ",
        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
@@ -2217,10 +2126,15 @@ (at your option) any later version.
 The match found must not extend before that position.
 Optional third argument, if t, means if fail just return nil (no error).
   If not nil and not t, move to limit of search and return nil.
-Optional fourth argument is repeat count--search for successive occurrences.  */)
+Optional fourth argument is repeat count--search for successive occurrences.
+
+Relies on the function `word-search-regexp' to convert a sequence
+of words in STRING to a regexp used to search words without regard
+to punctuation.  */)
   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
 {
-  return search_command (wordify (string, 0), bound, noerror, count, -1, 1, 0);
+  return search_command (call2 (Qword_search_regexp, string, Qnil),
+			 bound, noerror, count, -1, 1, 0);
 }
 
 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
@@ -2231,10 +2145,15 @@ (at your option) any later version.
 The match found must not extend after that position.
 Optional third argument, if t, means if fail just return nil (no error).
   If not nil and not t, move to limit of search and return nil.
-Optional fourth argument is repeat count--search for successive occurrences.  */)
+Optional fourth argument is repeat count--search for successive occurrences.
+
+Relies on the function `word-search-regexp' to convert a sequence
+of words in STRING to a regexp used to search words without regard
+to punctuation.  */)
   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
 {
-  return search_command (wordify (string, 0), bound, noerror, count, 1, 1, 0);
+  return search_command (call2 (Qword_search_regexp, string, Qnil),
+			 bound, noerror, count, 1, 1, 0);
 }
 
 DEFUN ("word-search-backward-lax", Fword_search_backward_lax, Sword_search_backward_lax, 1, 4,
@@ -2249,10 +2168,15 @@ (at your option) any later version.
 The match found must not extend before that position.
 Optional third argument, if t, means if fail just return nil (no error).
   If not nil and not t, move to limit of search and return nil.
-Optional fourth argument is repeat count--search for successive occurrences.  */)
+Optional fourth argument is repeat count--search for successive occurrences.
+
+Relies on the function `word-search-regexp' to convert a sequence
+of words in STRING to a regexp used to search words without regard
+to punctuation.  */)
   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
 {
-  return search_command (wordify (string, 1), bound, noerror, count, -1, 1, 0);
+  return search_command (call2 (Qword_search_regexp, string, Qt),
+			 bound, noerror, count, -1, 1, 0);
 }
 
 DEFUN ("word-search-forward-lax", Fword_search_forward_lax, Sword_search_forward_lax, 1, 4,
@@ -2267,10 +2191,15 @@ (at your option) any later version.
 The match found must not extend after that position.
 Optional third argument, if t, means if fail just return nil (no error).
   If not nil and not t, move to limit of search and return nil.
-Optional fourth argument is repeat count--search for successive occurrences.  */)
+Optional fourth argument is repeat count--search for successive occurrences.
+
+Relies on the function `word-search-regexp' to convert a sequence
+of words in STRING to a regexp used to search words without regard
+to punctuation.  */)
   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
 {
-  return search_command (wordify (string, 1), bound, noerror, count, 1, 1, 0);
+  return search_command (call2 (Qword_search_regexp, string, Qt),
+			 bound, noerror, count, 1, 1, 0);
 }
 
 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
@@ -3243,4 +3172,5 @@ (at your option) any later version.
   defsubr (&Smatch_data);
   defsubr (&Sset_match_data);
   defsubr (&Sregexp_quote);
+  DEFSYM (Qword_search_regexp, "word-search-regexp");
 }






  reply	other threads:[~2011-12-01  7:27 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-11-26 20:26 bug#10145: 24.0.91; Word Isearch backward Dani Moncayo
2011-11-29  0:33 ` Juri Linkov
2011-11-29  8:40   ` Andreas Schwab
2011-11-30  9:46     ` Juri Linkov
2011-11-30 14:02       ` Stefan Monnier
2011-11-30 15:32         ` Juri Linkov
2011-11-30 19:11           ` Stefan Monnier
2011-12-01  7:27             ` Juri Linkov [this message]
2011-12-01 16:00               ` Stefan Monnier
2011-12-02 10:22                 ` Juri Linkov
2011-12-02 10:48                   ` Eli Zaretskii
2011-12-02 14:38                   ` Stefan Monnier
2011-12-02 17:11                     ` Juri Linkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=878vmwu55q.fsf@mail.jurta.org \
    --to=juri@jurta.org \
    --cc=10145@debbugs.gnu.org \
    --cc=monnier@IRO.UMontreal.CA \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).