From mboxrd@z Thu Jan 1 00:00:00 1970 Path: news.gmane.org!not-for-mail From: Artur Malabarba Newsgroups: gmane.emacs.devel Subject: Re: ASCII-folded search [was: Re: Upcoming loss of usability ...] Date: Mon, 22 Jun 2015 22:03:56 +0100 Message-ID: References: <20150615142237.GA3517@acm.fritz.box> <87y4jkhqh5.fsf@uwakimon.sk.tsukuba.ac.jp> <557F3C22.4060909@cs.ucla.edu> <5580D356.4050708@cs.ucla.edu> <87si9qonxb.fsf@gnu.org> <87ioamz8if.fsf@petton.fr> <32013464-2300-46c6-ba46-4a3c36bfee5d@default> <87twu62nnt.fsf@mbork.pl> <87oakdfwim.fsf@uwakimon.sk.tsukuba.ac.jp> <83wpz1lh7c.fsf@gnu.org> <83oakdl7yj.fsf@gnu.org> <83ioall3x5.fsf@gnu.org> Reply-To: bruce.connor.am@gmail.com NNTP-Posting-Host: plane.gmane.org Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: quoted-printable X-Trace: ger.gmane.org 1435007058 31731 80.91.229.3 (22 Jun 2015 21:04:18 GMT) X-Complaints-To: usenet@ger.gmane.org NNTP-Posting-Date: Mon, 22 Jun 2015 21:04:18 +0000 (UTC) Cc: Stephen Turnbull , Stefan Monnier , emacs-devel To: Eli Zaretskii Original-X-From: emacs-devel-bounces+ged-emacs-devel=m.gmane.org@gnu.org Mon Jun 22 23:04:17 2015 Return-path: Envelope-to: ged-emacs-devel@m.gmane.org Original-Received: from lists.gnu.org ([208.118.235.17]) by plane.gmane.org with esmtp (Exim 4.69) (envelope-from ) id 1Z78t7-0007am-1k for ged-emacs-devel@m.gmane.org; Mon, 22 Jun 2015 23:04:17 +0200 Original-Received: from localhost ([::1]:41999 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Z78t6-0007Av-CF for ged-emacs-devel@m.gmane.org; Mon, 22 Jun 2015 17:04:16 -0400 Original-Received: from eggs.gnu.org ([2001:4830:134:3::10]:48620) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Z78sq-0007Am-Oi for emacs-devel@gnu.org; Mon, 22 Jun 2015 17:04:02 -0400 Original-Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1Z78sp-00030m-Ac for emacs-devel@gnu.org; Mon, 22 Jun 2015 17:04:00 -0400 Original-Received: from mail-lb0-x231.google.com ([2a00:1450:4010:c04::231]:34686) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Z78sn-0002z6-5p; Mon, 22 Jun 2015 17:03:57 -0400 Original-Received: by lbnk3 with SMTP id k3so10464656lbn.1; Mon, 22 Jun 2015 14:03:56 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=mime-version:reply-to:sender:in-reply-to:references:date:message-id :subject:from:to:cc:content-type:content-transfer-encoding; bh=McvhC55PH9Jx+aqwrytRHaocUq7Jx+FktA6tCPnnr/Q=; b=fK7ZdYc3PttexLXn4LZaT/NYdq3aXyEwr2yZmhzIqJbjvKZ4hRvQ2umUjYMySbxmuc KSxiETechowjBCVqkUIVA3h+ZLdhOinlxbSANSTD75EKzw/uuNdWRlbvc4ZHvOo7mVvd 0vyuJhNsH6lLWSlzGk88i8z64Ke72A19e8M8RfRm4V8Gdg0TRP3rPsZDERHIGxnv8YAU GV9nX2SvemU6P4NwSyJBiu4crcudob2muO1Dz5s3mOydcGXTQLz6B6V2WnL2VBZzJkNk GdxgOvkwP4j6HaQxZbV3zlGM1QSTBNAJLV0zDyArXyZE1GzCaFopErMeRYcyEpTsfOeA 4m4Q== X-Received: by 10.112.126.101 with SMTP id mx5mr27074817lbb.35.1435007036455; Mon, 22 Jun 2015 14:03:56 -0700 (PDT) Original-Received: by 10.25.214.133 with HTTP; Mon, 22 Jun 2015 14:03:56 -0700 (PDT) In-Reply-To: X-Google-Sender-Auth: mNdJ1R7Z3BklpcNrnY-SbSYUtcs X-detected-operating-system: by eggs.gnu.org: Error: Malformed IPv6 address (bad octet value). X-Received-From: 2a00:1450:4010:c04::231 X-BeenThere: emacs-devel@gnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "Emacs development discussions." List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: emacs-devel-bounces+ged-emacs-devel=m.gmane.org@gnu.org Original-Sender: emacs-devel-bounces+ged-emacs-devel=m.gmane.org@gnu.org Xref: news.gmane.org gmane.emacs.devel:187396 Archived-At: > Shall I merge? (It adds about 5 seconds of compile time in my laptop) Inlining the patch attached above (sorry, force of habit). From: Artur Malabarba Date: Tue, 27 Jan 2015 14:08:01 -0200 Subject: [PATCH] * lisp/isearch.el: Fold many unicode characters to ASCII (isearch-character-fold-search, isearch--character-fold-extras) (isearch--character-fold-table): New variable. (isearch--character-folded-regexp): New function. (isearch-search-fun-default): Use them. * lisp/replace.el (replace-character-fold): New variable. (replace-search): Use it. --- lisp/isearch.el | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++= ++++ lisp/replace.el | 9 +++++++ 2 files changed, 84 insertions(+) diff --git a/lisp/isearch.el b/lisp/isearch.el index d1b92bd..eb0f965 100644 --- a/lisp/isearch.el +++ b/lisp/isearch.el @@ -272,6 +272,74 @@ Default value, nil, means edit the string instead." :version "23.1" :group 'isearch) +(defvar isearch-character-fold-search t + "Whether regular isearch should fold similar characters. +This means some characters will match entire groups of charactes, +such as \" matching all variants of double quotes, for instance.") + +(defconst isearch--character-fold-extras + '((?\" "=EF=BC=82" "=E2=80=9C" "=E2=80=9D" "=E2=80=9D" "=E2=80=9E" "=E2= =B9=82" "=E3=80=9E" "=E2=80=9F" "=E2=80=9F" "=E2=9D=9E" "=E2=9D=9D" "=E2=9D= =A0" "=E2=80=9C" "=E2=80=9E" "=E3=80=9D" "=E3=80=9F" "=F0=9F=99=B7" "=F0=9F=99=B6" "=F0=9F=99=B8" "=C2=AB" "=C2=BB") + (?' "`" "=E2=9D=9F" "=E2=9D=9B" "=E2=9D=9C" "=E2=80=98" "=E2=80=99" "= =E2=80=9A" "=E2=80=9B" "=E2=80=9A" "=F3=A0=80=A2" "=E2=9D=AE" "=E2=9D=AF" "= =E2=80=B9" "=E2=80=BA") + (?` "=E2=9D=9B" "=E2=80=98" "=E2=80=9B" "=F3=A0=80=A2" "=E2=9D=AE" "= =E2=80=B9") + ;; `isearch-character-fold-search' doesn't interact with + ;; `isearch-lax-whitespace' yet. So we need to add this here. + (?\s " " "\r" "\n")) + "Extra entries to add to `isearch--character-fold-table'. +Used to specify character folding not covered by unicode +decomposition. Each car is a character and each cdr is a list of +strings that it should match (itself excluded).") + +(defvar isearch--character-fold-table + (eval-when-compile (funcall (byte-compile (lambda () + (require 'subr-x) + (let ((equiv (make-char-table 'character-fold-table))) + ;; Compile a list of all complex characters that each simple + ;; character should match. + (dotimes (i (length equiv)) + (let ((dd (get-char-code-property i 'decomposition)) + d k found) + ;; Skip trivial cases (?a decomposes to (?a)). + (unless (and (eq i (car dd))) + ;; Discard a possible formatting tag. + (when (symbolp (car-safe dd)) + (setq dd (cdr dd))) + ;; Is k a number or letter, per unicode standard? + (setq d dd) + (while (and d (not found)) + (setq k (pop d)) + (setq found (and (characterp k) + (memq (get-char-code-property k 'general-category) + '(Lu Ll Lt Lm Lo Nd Nl No))))) + ;; If there's no number or letter on the + ;; decomposition, find the first character in it. + (setq d dd) + (while (and d (not found)) + (setq k (pop d)) + (setq found (characterp k))) + ;; Add i to the list of characters that k can + ;; represent. Also add its decomposition, so we can + ;; match multi-char representations like (format "a%c" 769) + (when (and found (not (eq i k))) + (aset equiv k (cons (apply #'string dd) + (cons (string i) + (aref equiv k)))))))) + (dotimes (i (length equiv)) + (when-let ((chars (append (cdr (assq i isearch--character-fold-extr= as)) + (aref equiv i)))) + (aset equiv i (regexp-opt (cons (string i) chars))))) + equiv))))) + "Used for folding characters of the same group during search.") + +(defun isearch--character-folded-regexp (string) + "Return a regexp matching anything that character-folds into STRING. +That is, any character in STRING that has an entry in +`isearch--character-fold-table' is replaced with that entry (which is a +regexp). Other characters are `regexp-quote'd." + (apply #'concat + (mapcar (lambda (c) (or (aref isearch--character-fold-table c) + (regexp-quote (string c)))) + string))) + (defcustom isearch-lazy-highlight t "Controls the lazy-highlighting during incremental search. When non-nil, all text in the buffer matching the current search @@ -2607,6 +2675,13 @@ Can be changed via `isearch-search-fun-function' for special needs." 're-search-backward-lax-whitespace)) (isearch-regexp (if isearch-forward 're-search-forward 're-search-backward)) + ;; `isearch-regexp' is essentially a superset of + ;; `isearch-fold-groups'. So fold-groups comes after it. + (isearch-character-fold-search + (lambda (string &optional bound noerror count) + (funcall (if isearch-forward #'re-search-forward #'re-search-backwar= d) + (isearch--character-folded-regexp string) + bound noerror count))) ((and isearch-lax-whitespace search-whitespace-regexp) (if isearch-forward 'search-forward-lax-whitespace diff --git a/lisp/replace.el b/lisp/replace.el index 1bf1343..96bbd61 100644 --- a/lisp/replace.el +++ b/lisp/replace.el @@ -33,6 +33,14 @@ :type 'boolean :group 'matching) +(defcustom replace-character-fold t + "Non-nil means `query-replace' should do character folding in matches. +This means, for instance, that ' will match a large variety of +unicode quotes." + :type 'boolean + :group 'matching + :version "25.1") + (defcustom replace-lax-whitespace nil "Non-nil means `query-replace' matches a sequence of whitespace chars. When you enter a space or spaces in the strings to be replaced, @@ -2003,6 +2011,7 @@ It is called with three arguments, as if it were ;; used after `recursive-edit' might override them. (let* ((isearch-regexp regexp-flag) (isearch-word delimited-flag) + (isearch-character-fold-search replace-character-fold) (isearch-lax-whitespace replace-lax-whitespace) (isearch-regexp-lax-whitespace --=20 2.4.4