From 3628379cf461805008b34e01dba751183c0b857c Mon Sep 17 00:00:00 2001 From: Robert Pluim Date: Mon, 22 Jul 2019 20:27:59 +0200 Subject: [PATCH] Follow decomposition chains when constructing char-fold-table To: emacs-devel@gnu.org * lisp/char-fold.el (char-fold-make-table): Decompose the decomposition of each character, adding equivalences to the original character, until no more decompositions are left. --- etc/NEWS | 8 ++++++++ lisp/char-fold.el | 21 +++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/etc/NEWS b/etc/NEWS index e9ec21bb4c..33fe7075ec 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1169,6 +1169,14 @@ and case-sensitivity together with search strings in the search ring. +++ *** 'flush-lines' prints and returns the number of deleted matching lines. +--- +*** 'char-fold-to-regexp' now matches more variants of a base character. +The table used to check for equivalence of characters is now built +using the complete chain of unicode decompositions of a character, +rather than stopping after one level, such that searching for +e.g. GREEK SMALL LETTER IOTA will now also find GREEK SMALL LETTER +IOTA WITH OXIA. + ** Debugger +++ diff --git a/lisp/char-fold.el b/lisp/char-fold.el index 9d3ea17b41..6842d38a62 100644 --- a/lisp/char-fold.el +++ b/lisp/char-fold.el @@ -78,6 +78,27 @@ (cons (char-to-string char) (aref equiv (car decomp)))))))) (funcall make-decomp-match-char decomp char) + ;; Check to see if the first char of the decomposition + ;; has a further decomposition. If so, add a mapping + ;; back from that second decomposition to the original + ;; character. This allows e.g. 'ι' (GREEK SMALL LETTER + ;; IOTA) to match both the Basic Greek block and + ;; Extended Greek block variants of IOTA + + ;; diacritical(s). Repeat until there are no more + ;; decompositions. + (let ((dec decomp) + next-decomp) + (catch 'done + (while dec + (setq next-decomp (char-table-range table (car dec))) + (when (consp next-decomp) + (when (symbolp (car next-decomp)) + (setq next-decomp (cdr next-decomp))) + (if (not (eq (car dec) + (car next-decomp))) + (funcall make-decomp-match-char (list (car next-decomp)) char) + (throw 'done t))) + (setq dec next-decomp)))) ;; Do it again, without the non-spacing characters. ;; This allows 'a' to match 'ä'. (let ((simpler-decomp nil) -- 2.21.0.419.gffac537e6c