unofficial mirror of bug-gnu-emacs@gnu.org 
 help / color / mirror / code / Atom feed
blob f0c50a9b00dce95f864bf78398222c4fcf6c64a7 5787 bytes (raw)
name: lisp/character-fold.el 	 # note: path name is non-authoritative(*)

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
 
;;; character-fold.el --- match unicode to similar ASCII -*- lexical-binding: t; -*-

;; Copyright (C) 2015 Free Software Foundation, Inc.

;; Maintainer: emacs-devel@gnu.org
;; Keywords: matching

;; This file is part of GNU Emacs.

;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.

;;; Code:

\f
;;;###autoload
(defvar character-fold-search t
  "Non-nil if searches should fold similar characters.
This means some characters will match entire groups of characters.
For instance, \" will match all variants of double quotes, and
the letter a will match all of its accented versions (and then
some).")

(defconst character-fold-table
  (eval-when-compile
    (let* ((equiv (make-char-table 'character-fold-table))
           (table (unicode-property-table-internal 'decomposition))
           (func (char-table-extra-slot table 1)))
      ;; Ensure the table is populated.
      (map-char-table
       (lambda (i v) (when (consp i) (funcall func (car i) v table)))
       table)

      ;; Compile a list of all complex characters that each simple
      ;; character should match.
      (map-char-table
       (lambda (i dec)
         (when (consp dec)
           ;; Discard a possible formatting tag.
           (when (symbolp (car dec))
             (setq dec (cdr dec)))
           ;; Skip trivial cases like ?a decomposing to (?a).
           (unless (or (and (eq i (car dec))
                            (not  (cdr dec))))
             (let ((d dec)
                   (fold-decomp t)
                   k found)
               (while (and d (not found))
                 (setq k (pop d))
                 ;; Is k a number or letter, per unicode standard?
                 (setq found (memq (get-char-code-property k 'general-category)
                                   '(Lu Ll Lt Lm Lo Nd Nl No))))
               (if found
                   ;; Check if the decomposition has more than one letter,
                   ;; because then we don't want the first letter to match
                   ;; the decomposition.
                   (dolist (k d)
                     (when (and fold-decomp
                                (memq (get-char-code-property k 'general-category)
                                      '(Lu Ll Lt Lm Lo Nd Nl No)))
                       (setq fold-decomp nil)))
                 ;; If there's no number or letter on the
                 ;; decomposition, take the first character in it.
                 (setq found (car-safe dec)))
               ;; Finally, we only fold multi-char decomposition if at
               ;; least one of the chars is non-spacing (combining).
               (when fold-decomp
                 (setq fold-decomp nil)
                 (dolist (k dec)
                   (when (and (not fold-decomp)
                              (> (get-char-code-property k 'canonical-combining-class) 0))
                     (setq fold-decomp t))))
               ;; Add i to the list of characters that k can
               ;; represent. Also possibly add its decomposition, so we can
               ;; match multi-char representations like (format "a%c" 769)
               (when (and found (not (eq i k)))
                 (let ((chars (cons (char-to-string i) (aref equiv k))))
                   (aset equiv k
                         (if fold-decomp
                             (cons (apply #'string dec) chars)
                           chars))))))))
       table)

      ;; Add some manual entries.
      (dolist (it '((?\" """ "“" "”" "”" "„" "⹂" "〞" "‟" "‟" "❞" "❝" "❠" "“" "„" "〝" "〟" "🙷" "🙶" "🙸" "«" "»")
                    (?' "❟" "❛" "❜" "‘" "’" "‚" "‛" "‚" "󠀢" "❮" "❯" "‹" "›")
                    (?` "❛" "‘" "‛" "󠀢" "❮" "‹")
                    (?\s "\t" "\r" "\n")))
        (let ((idx (car it))
              (chars (cdr it)))
          (aset equiv idx (append chars (aref equiv idx)))))

      ;; Convert the lists of characters we compiled into regexps.
      (map-char-table
       (lambda (i v) (let ((re (regexp-opt (cons (char-to-string i) v))))
                  (if (consp i)
                      (set-char-table-range equiv i re)
                    (aset equiv i re))))
       equiv)
      equiv))
  "Used for folding characters of the same group during search.")

;;;###autoload
(defun character-fold-to-regexp (string &optional lax)
  "Return a regexp matching anything that character-folds into STRING.
If `character-fold-search' is nil, `regexp-quote' string.
Otherwise, any character in STRING that has an entry in
`character-fold-table' is replaced with that entry (which is a
regexp) and other characters are `regexp-quote'd.
If LAX is non-nil, any single whitespace character is allowed to
match any number of times."
  (if character-fold-search
      (apply #'concat
        (mapcar (lambda (c) (let ((out (or (aref character-fold-table c)
                                      (regexp-quote (string c)))))
                         (if (memq c '(?\s ?\t ?\r ?\n ))
                             (concat out "+")
                           out)))
                string))
    (regexp-quote string)))

;;; character-fold.el ends here

debug log:

solving db77845 ...
found db77845 in https://yhetil.org/emacs-bugs/87twshqv9s.fsf@mail.linkov.net/
found bf5ae59 in https://git.savannah.gnu.org/cgit/emacs.git
preparing index
index prepared:
100644 bf5ae59f41ac49f98ab48c165e36d9dc99e1e043	lisp/character-fold.el

applying [1/1] https://yhetil.org/emacs-bugs/87twshqv9s.fsf@mail.linkov.net/
diff --git a/lisp/character-fold.el b/lisp/character-fold.el
index bf5ae59..db77845 100644

Checking patch lisp/character-fold.el...
Applied patch lisp/character-fold.el cleanly.

index at:
100644 f0c50a9b00dce95f864bf78398222c4fcf6c64a7	lisp/character-fold.el

(*) Git path names are given by the tree(s) the blob belongs to.
    Blobs themselves have no identifier aside from the hash of its contents.^

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).