all messages for Emacs-related lists mirrored at yhetil.org
 help / color / mirror / code / Atom feed
From: Alan Mackenzie <acm@muc.de>
To: Juri Linkov <juri@linkov.net>
Cc: 22097@debbugs.gnu.org
Subject: bug#22097: Ispell: lazy highlighting doesn't work properly.
Date: Wed, 9 Dec 2015 21:59:52 +0000	[thread overview]
Message-ID: <20151209215952.GD1896@acm.fritz.box> (raw)
In-Reply-To: <87fuzgebv4.fsf@mail.linkov.net>

Hello, Juri.

On Sun, Dec 06, 2015 at 01:04:15AM +0200, Juri Linkov wrote:
[quoting removed for clarity]
\(   \|  \|                                                                         \)
  --+  _+  \(   \|                      \)\(  \|    \)*\(                        \)+
             /\w  \(                  \)    \w  [-_]     [.:/@]+\(  \|        \)+
                    \(  \|    \)+[.:@]                            \w  [-_~=?&]
                      \w  [-_]

rrrrrrrrrrr         111111111111           22222222222          3333333333333333    rr

> This is a nice ASCII-art in itself :-), but the problem is in another place -

There are problems in that regexp, too:
(i) As noted in the comment in the source, when - or _ has word syntax,
the regexp is ill-formed - there is ambiguity in \(\w\|[-_]\) because the
- or _ can match either side of the regexp fragment, leading to bad
performance.  This happens in the fragments maked 1, 2, and 3.
(ii) The matching on --+ or _+ damage the lazy highlighting, often
preventing it happening.  But these clauses are there solely to protect
against the poor performance noted in (i).

I have solved these problems by generating this regexp at runtime,
properly taking into account the syntax table.  The fragments 1, 2, and 3
are generated at runtime, and fragments r are now redundant and have been
removed.  This regexp is no longer an element of
ispell-skip-region-alist, instead being dynamically generated wherever
ispell-skip-region-alist is used (3 places).

I haven't yet tried the two patches you sent me.  The one binding
isearch-regexp-function to nil looks obvious and straightforward, the
other one I'll need to look at more carefully.

Here is the current version of my patch for this change.  The comments
and doc strings in it probably are of too low quality to be committed.

[ .... ]



diff --git a/lisp/textmodes/ispell.el b/lisp/textmodes/ispell.el
index 7d5bb6d..9695a6b 100644
--- a/lisp/textmodes/ispell.el
+++ b/lisp/textmodes/ispell.el
@@ -1782,6 +1782,49 @@ ispell-parsing-keyword
 a `~' followed by an extended-character mode -- such as `~.tex'.
 The last occurring definition in the buffer will be used.")
 
+(defun ispell--\\w-filter (char)
+  "Return CHAR in a string when CHAR doesn't have \"word\" syntax,
+nil otherwise.  CHAR must be a character."
+  (let ((str (string char)))
+    (and
+     (not (string-match "\\w" str))
+     str)))
+
+(defun ispell--make-\\w-expression (chars)
+  "Make an expression like \"\\(\\w\\|[-_]\\)\".
+This (parenthesized) expression matches either a character of
+\"word\" syntax or one in CHARS.
+
+CHARS is a string of characters.  A member of CHARS is omitted
+from the expression if it already has word syntax.  (Be sensible
+about special characters such as ?\\, ?^, ?], and ?- in CHARS.)
+If after this filtering there are no chars left, or only one, a
+special form of the expression is generated."
+  (let ((filtered
+	 (mapconcat #'ispell--\\w-filter chars "")))
+    (concat
+     "\\(\\w"
+     (cond
+      ((equal filtered "")
+       "\\)")
+      ((eq (length filtered) 1)
+       (concat "\\|" filtered "\\)"))
+      (t
+       (concat "\\|[" filtered "]\\)"))))))
+
+(defun ispell--make-filename-or-URL-re ()
+  "Construct a regexp to match some file names or URLs or email addresses."
+  (concat ;"\\(--+\\|_+\\|"
+          "\\(/\\w\\|\\("
+          (ispell--make-\\w-expression "-_")
+          "+[.:@]\\)\\)"
+          (ispell--make-\\w-expression "-_")
+          "*\\([.:/@]+"
+          (ispell--make-\\w-expression "-_~=?&")
+          "+\\)+"
+          ;"\\)"
+          ))
+
 ;;;###autoload
 (defvar ispell-skip-region-alist
   `((ispell-words-keyword	   forward-line)
@@ -1798,7 +1841,7 @@ ispell-skip-region-alist
     ;; Matches e-mail addresses, file names, http addresses, etc.  The
     ;; `-+' `_+' patterns are necessary for performance reasons when
     ;; `-' or `_' part of word syntax.
-    (,(purecopy "\\(--+\\|_+\\|\\(/\\w\\|\\(\\(\\w\\|[-_]\\)+[.:@]\\)\\)\\(\\w\\|[-_]\\)*\\([.:/@]+\\(\\w\\|[-_~=?&]\\)+\\)+\\)"))
+;    (,(purecopy "\\(--+\\|_+\\|\\(/\\w\\|\\(\\(\\w\\|[-_]\\)+[.:@]\\)\\)\\(\\w\\|[-_]\\)*\\([.:/@]+\\(\\w\\|[-_~=?&]\\)+\\)+\\)"))
     ;; above checks /.\w sequences
     ;;("\\(--+\\|\\(/\\|\\(\\(\\w\\|[-_]\\)+[.:@]\\)\\)\\(\\w\\|[-_]\\)*\\([.:/@]+\\(\\w\\|[-_~=?&]\\)+\\)+\\)")
     ;; This is a pretty complex regexp.  It can be simplified to the following:
@@ -3387,7 +3430,8 @@ ispell-begin-skip-region-regexp
               (if (string= "" comment-end) "^" (regexp-quote comment-end)))
           (if (and (null ispell-check-comments) comment-start)
               (regexp-quote comment-start))
-          (ispell-begin-skip-region ispell-skip-region-alist)))
+          (ispell-begin-skip-region ispell-skip-region-alist)
+          (ispell--make-filename-or-URL-re)))
    "\\|"))
 
 
@@ -3426,6 +3470,8 @@ ispell-skip-region-list
 The list is of the form described by variable `ispell-skip-region-alist'.
 Must be called after `ispell-buffer-local-parsing' due to dependence on mode."
   (let ((skip-alist ispell-skip-region-alist))
+    (setq skip-alist (append (list (list (ispell--make-filename-or-URL-re)))
+                             skip-alist))
     ;; only additional explicit region definition is tex.
     (if (eq ispell-parser 'tex)
 	(setq case-fold-search nil
@@ -4119,9 +4165,10 @@ ispell-message
 		      (ispell-non-empty-string vm-included-text-prefix)))
 	     (t default-prefix)))
 	   (ispell-skip-region-alist
-	    (cons (list (concat "^\\(" cite-regexp "\\)")
-			(function forward-line))
-		  ispell-skip-region-alist))
+	    (cons (list (ispell--make-filename-or-URL-re))
+                  (cons (list (concat "^\\(" cite-regexp "\\)")
+                              (function forward-line))
+                        ispell-skip-region-alist)))
 	   (old-case-fold-search case-fold-search)
 	   (dictionary-alist ispell-message-dictionary-alist)
 	   (ispell-checking-message t))



-- 
Alan Mackenzie (Nuremberg, Germany).





  parent reply	other threads:[~2015-12-09 21:59 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-12-05 11:42 bug#22097: Ispell: lazy highlighting doesn't work properly Alan Mackenzie
2015-12-05 13:23 ` Eli Zaretskii
2015-12-05 14:06   ` Alan Mackenzie
2015-12-05 14:20     ` Eli Zaretskii
2015-12-05 16:04       ` Alan Mackenzie
2015-12-05 16:47         ` Eli Zaretskii
2015-12-05 23:04         ` Juri Linkov
2015-12-08  0:47           ` Juri Linkov
2015-12-09 21:59           ` Alan Mackenzie [this message]
2020-09-07 16:34             ` Lars Ingebrigtsen
2015-12-10  0:04   ` Juri Linkov
2015-12-10 16:08     ` Alan Mackenzie
2015-12-11 22:45       ` Juri Linkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20151209215952.GD1896@acm.fritz.box \
    --to=acm@muc.de \
    --cc=22097@debbugs.gnu.org \
    --cc=juri@linkov.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this external index

	https://git.savannah.gnu.org/cgit/emacs.git
	https://git.savannah.gnu.org/cgit/emacs/org-mode.git

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.