unofficial mirror of bug-gnu-emacs@gnu.org 
 help / color / mirror / code / Atom feed
* bug#12611: Bugfix for broken hunspell choices
@ 2012-10-09 12:32 Bastian Ballmann
  2012-10-10 10:48 ` Agustin Martin
  0 siblings, 1 reply; 5+ messages in thread
From: Bastian Ballmann @ 2012-10-09 12:32 UTC (permalink / raw)
  To: 12611

** Description

When using hunspell as spell checker and setting LANG=de_CH.utf-8 i 
often get an empty choices buffer after running M-x ispell. The problem 
is the use of the -a parameter. Hunspell only outputs half of the 
corrections when using -a in contrast to a run without -a param.

Here's the configuration I tested with

(require 'ispell)

(setq ispell-dictionary-base-alist
   '(
         ("de_DE"
          "[a-zäöüßA-ZÄÖÜ]" "[^a-zäöüßA-ZÄÖÜ]" "[']" nil
          ("-d" "de_DE") nil utf-8)

         ("de_CH"
          "[a-zäöüA-ZÄÖÜ]" "[^a-zäöüA-ZÄÖÜ]" "[']" nil
          ("-d" "de_CH") nil utf-8)

         ("en_US"
          "[a-zA-Z]" "[^a-zA-Z]" "[']" nil
          ("-d" "en_US") nil utf-8)

         ("en_GB"
          "[a-zA-Z]" "[^a-zA-Z]" "[']" nil
          ("-d" "en_GB") nil utf-8)

     )
)

(eval-after-load "ispell"
     (progn
          (setq ispell-dictionary "de_CH")
          (setq ispell-extra-args '("-t")) ; The input file is in TeX or 
LaTeX format.
          (setq ispell-silently-savep t)   ; save personal dict without 
confirmation
      )
)

(setq-default ispell-program-name "hunspell")
(setq ispell-really-hunspell t)
(setq debug-on-error t)

The only way I was able to generate choices for all wrong written word 
is by skiping the -a parameter.


** Patch

diff --git a/lisp/textmodes/ispell.el b/lisp/textmodes/ispell.el
index 51a4800..2a73926 100644
--- a/lisp/textmodes/ispell.el
+++ b/lisp/textmodes/ispell.el
@@ -1632,7 +1632,8 @@ This allows it to improve the suggestion list 
based on actual misspellings."
                            (point-min) (point-max)
                            ispell-program-name nil
                            output-buf nil
-                          "-a"
+                           ;; -a makes problems with choices when using 
hunspell and utf-8
+                           (if ispell-really-hunspell "" "-a")
                            ;; hunspell -m option means something different
                            (if ispell-really-hunspell "" "-m")
                            ispell-args))
@@ -2577,8 +2578,12 @@ Optional third arg SHIFT is an offset to apply 
based on previous corrections."
     ((eq (aref output 0) ?+)            ; found because of root word
      (substring output 2))              ; return root word
     ((equal 0 (string-match "[\ra-zA-Z]" output))
-    (ding)                             ; error message from ispell!
-    (message "Ispell error: %s" output)
+    (if (not (equal "hunspell" ispell-program-name))
+        (progn
+          (ding)                               ; error message from ispell!
+          (message "Ispell error: %s" output)
+          )
+    )
      (sit-for 5)
      nil)
     (t                                  ; need to process &, ?, and #'s
@@ -2664,7 +2669,8 @@ Keeps argument list for future Ispell invocations 
for no async support."
         (let ((process-connection-type ispell-use-ptys-p))
           (apply 'start-process
                  "ispell" nil ispell-program-name
-                "-a"                   ; Accept single input lines.
+                 ;; -a makes problems with choices when using hunspell 
and utf-8
+                 (if ispell-really-hunspell "" "-a")
                   ;; Make root/affix combos not in dict.
                   ;; hunspell -m option means different.
                  (if ispell-really-hunspell "" "-m")
@@ -2752,30 +2758,34 @@ Keeps argument list for future Ispell 
invocations for no async support."
           (set-process-coding-system ispell-process 
(ispell-get-coding-system)
                                      (ispell-get-coding-system)))
        ;; Get version ID line
-      (ispell-accept-output 3)
-      ;; get more output if filter empty?
-      (if (null ispell-filter) (ispell-accept-output 3))
-      (cond ((null ispell-filter)
-            (error "%s did not output version line" ispell-program-name))
-           ((and
-             (stringp (car ispell-filter))
-             (if (string-match "warning: " (car ispell-filter))
-                 (progn
-                   (ispell-accept-output 3) ; was warn msg.
-                   (stringp (car ispell-filter)))
-               (null (cdr ispell-filter)))
-             (string-match "^@(#) " (car ispell-filter)))
-            ;; got the version line as expected (we already know it's 
the right
-            ;; version, so don't bother checking again.)
-            nil)
-           (t
-            ;; Otherwise, it must be an error message.  Show the user.
-            ;; But first wait to see if some more output is going to 
arrive.
-            ;; Otherwise we get cool errors like "Can't open ".
-            (sleep-for 1)
-            (ispell-accept-output 3)
-            (error "%s" (mapconcat 'identity ispell-filter "\n"))))
-      (setq ispell-filter nil)         ; Discard version ID line
+      (if (not (eq ispell-program-name "hunspell"))
+          (progn
+            (ispell-accept-output 3)
+            ;; get more output if filter empty?
+            (if (null ispell-filter) (ispell-accept-output 3))
+            (cond ((null ispell-filter)
+                   (error "%s did not output version line" 
ispell-program-name))
+                  ((and
+                    (stringp (car ispell-filter))
+                    (if (string-match "warning: " (car ispell-filter))
+                        (progn
+                          (ispell-accept-output 3) ; was warn msg.
+                          (stringp (car ispell-filter)))
+                      (null (cdr ispell-filter)))
+                    (string-match "^@(#) " (car ispell-filter)))
+                   ;; got the version line as expected (we already know 
it's the right
+                   ;; version, so don't bother checking again.)
+                   nil)
+                  (t
+                   ;; Otherwise, it must be an error message.  Show the 
user.
+                   ;; But first wait to see if some more output is 
going to arrive.
+                   ;; Otherwise we get cool errors like "Can't open ".
+                   (sleep-for 1)
+                   (ispell-accept-output 3)
+                   (error "%s" (mapconcat 'identity ispell-filter "\n"))))
+            (setq ispell-filter nil)           ; Discard version ID line
+            )
+      )
        (let ((extended-char-mode (ispell-get-extended-character-mode)))
         (if extended-char-mode          ; ~ extended character mode
             (ispell-send-string (concat extended-char-mode "\n"))))

Have a nice day!

Basti

-- 
ETH Zürich, Bastian Ballmann, IT Service Group
CAB E 44.1, Universitätsstrasse 6, CH-8092 Zürich
Tel +41 44 632 72 04






^ permalink raw reply related	[flat|nested] 5+ messages in thread

* bug#12611: Bugfix for broken hunspell choices
  2012-10-09 12:32 bug#12611: Bugfix for broken hunspell choices Bastian Ballmann
@ 2012-10-10 10:48 ` Agustin Martin
  2012-10-10 15:57   ` Eli Zaretskii
  0 siblings, 1 reply; 5+ messages in thread
From: Agustin Martin @ 2012-10-10 10:48 UTC (permalink / raw)
  To: 12611

On Tue, Oct 09, 2012 at 02:32:11PM +0200, Bastian Ballmann wrote:
> ** Description
> 
> When using hunspell as spell checker and setting LANG=de_CH.utf-8 i
> often get an empty choices buffer after running M-x ispell. The
> problem is the use of the -a parameter. Hunspell only outputs half
> of the corrections when using -a in contrast to a run without -a
> param.
[...]
> The only way I was able to generate choices for all wrong written
> word is by skiping the -a parameter.

All the ispell.el communication with the spellchecker is done in pipe mode
(-a mode). If you disable it you will completely disable normal
communication.

As a matter of fact I get no comunication with your changes applied, just an
ispell process that is started and not killed on exit. Not to mention that
this also breaks flyspell.el.

You are probably hit by some of the oddities in hunspell pipe mode
regarding UTF-8 characters.

-- 
Regards,





^ permalink raw reply	[flat|nested] 5+ messages in thread

* bug#12611: Bugfix for broken hunspell choices
  2012-10-10 10:48 ` Agustin Martin
@ 2012-10-10 15:57   ` Eli Zaretskii
  2012-10-11 10:53     ` Agustin Martin
  0 siblings, 1 reply; 5+ messages in thread
From: Eli Zaretskii @ 2012-10-10 15:57 UTC (permalink / raw)
  To: Agustin Martin; +Cc: 12611

> Date: Wed, 10 Oct 2012 12:48:26 +0200
> From: Agustin Martin <agustin.martin@hispalinux.es>
> 
> > The only way I was able to generate choices for all wrong written
> > word is by skiping the -a parameter.
> 
> All the ispell.el communication with the spellchecker is done in pipe mode
> (-a mode). If you disable it you will completely disable normal
> communication.

Indeed.

> You are probably hit by some of the oddities in hunspell pipe mode
> regarding UTF-8 characters.

You mean "bugs".  Yes, quite probably.  The solution is to fix
Hunspell (I think I can give the OP patches, if he can build his own
Hunspell).

Failing that, the work-around would be not to use UTF-8 for
communicating with Hunspell.  This should be possible for the de_CH
locale.  I suggest to customize ispell-local-dictionary-alist, and add
to it an entry for de_CH (and any other languages you care about) that
uses a unibyte encoding, such as Latin-1 or Latin-9.  You will also
have to change the CASECHARS and NOT-CASECHARS to use the
corresponding Latin code points, see the value of
ispell-dictionary-base-alist for an example.  This should cause
Hunspell to use the unibyte encoding, which should work around the
problems with UTF-8.





^ permalink raw reply	[flat|nested] 5+ messages in thread

* bug#12611: Bugfix for broken hunspell choices
  2012-10-10 15:57   ` Eli Zaretskii
@ 2012-10-11 10:53     ` Agustin Martin
  2018-08-28 23:00       ` Noam Postavsky
  0 siblings, 1 reply; 5+ messages in thread
From: Agustin Martin @ 2012-10-11 10:53 UTC (permalink / raw)
  To: 12611

On Wed, Oct 10, 2012 at 05:57:44PM +0200, Eli Zaretskii wrote:
> > Date: Wed, 10 Oct 2012 12:48:26 +0200
> > From: Agustin Martin <agustin.martin@hispalinux.es>
> > 
> > > The only way I was able to generate choices for all wrong written
> > > word is by skiping the -a parameter.
> > 
> > All the ispell.el communication with the spellchecker is done in pipe mode
> > (-a mode). If you disable it you will completely disable normal
> > communication.
> 
> Indeed.
> 
> > You are probably hit by some of the oddities in hunspell pipe mode
> > regarding UTF-8 characters.
> 
> You mean "bugs".  Yes, quite probably.  The solution is to fix
> Hunspell (I think I can give the OP patches, if he can build his own
> Hunspell).

Indeed. Browsing into hunspell bug tracker I noticed another report (with
proposed change) that may be related to this problem,

  http://sourceforge.net/tracker/?func=detail&aid=3468022&group_id=143754&atid=756395
  No Encoding of Word for Suggestions in Piped Mode - ID: 3468022

A consequence of this bug seems that when communicated with in UTF-8, useful
suggestions are not generated from an 8-bit dictionary.

This in adition to report and changes proposed in

  http://sourceforge.net/tracker/?func=detail&aid=3178449&group_id=143754&atid=756395
 Bad UTF-8 char count in pipe mode - ID: 3178449

Hope they will soon be fixed

-- 
Agustin





^ permalink raw reply	[flat|nested] 5+ messages in thread

* bug#12611: Bugfix for broken hunspell choices
  2012-10-11 10:53     ` Agustin Martin
@ 2018-08-28 23:00       ` Noam Postavsky
  0 siblings, 0 replies; 5+ messages in thread
From: Noam Postavsky @ 2018-08-28 23:00 UTC (permalink / raw)
  To: Agustin Martin; +Cc: 12611

forwarded 12611 forwarded 12611 https://sourceforge.net/p/hunspell/bugs/211/
close 12611
quit

>> You mean "bugs".  Yes, quite probably.  The solution is to fix
>> Hunspell (I think I can give the OP patches, if he can build his own
>> Hunspell).
>
> Indeed. Browsing into hunspell bug tracker I noticed another report (with
> proposed change) that may be related to this problem,
>
>   http://sourceforge.net/tracker/?func=detail&aid=3468022&group_id=143754&atid=756395
>   No Encoding of Word for Suggestions in Piped Mode - ID: 3468022

It now redirects to https://sourceforge.net/p/hunspell/bugs/211/, the
maintainer says it's probably fixed in hunspell 1.3.4.

> A consequence of this bug seems that when communicated with in UTF-8, useful
> suggestions are not generated from an 8-bit dictionary.
>
> This in adition to report and changes proposed in
>
>   http://sourceforge.net/tracker/?func=detail&aid=3178449&group_id=143754&atid=756395
>  Bad UTF-8 char count in pipe mode - ID: 3178449

This one is marked fixed (https://sourceforge.net/p/hunspell/bugs/185/).





^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2018-08-28 23:00 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-10-09 12:32 bug#12611: Bugfix for broken hunspell choices Bastian Ballmann
2012-10-10 10:48 ` Agustin Martin
2012-10-10 15:57   ` Eli Zaretskii
2012-10-11 10:53     ` Agustin Martin
2018-08-28 23:00       ` Noam Postavsky

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).