unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
* auto-detect multiple languages -- ispell-detect.el
@ 2024-07-31 11:42 Emanuel Berg
  2024-08-02  2:53 ` Richard Stallman
  0 siblings, 1 reply; 34+ messages in thread
From: Emanuel Berg @ 2024-07-31 11:42 UTC (permalink / raw)
  To: emacs-devel

Automatic on-the-fly language detection and spelling with the
appropriate dictionary.

It works on arbitrary texts with no markup or
anything required.

Also a future release for ELPA, I hope.

;;; -*- lexical-binding: t -*-
;;
;; this file:
;;   https://dataswamp.org/~incal/emacs-init/ispell-detect.el
;;
;; Installation on Debian:
;;
;; 1. For detection, install the /usr/share/dict files from
;;    the 'w-' packages, for example 'wamerican-insane',
;;    'wfrench' and 'wswedish'.
;;
;; 2. For correction, install the ispell files from the 'i-'
;;    packages, for example 'iamerican-insane', 'ifrench-gut'
;;    and 'iswedish'.
;;
;; 3. Set `ispell-detect--langs' as below.
;;
;; Test detection:
;;
;;   (ispell-detect (point) (pos-eol)) ; l'oiseau aimait le beau
;;   (ispell-detect (point) (pos-eol)) ; detta är en mening på svenska
;;   (ispell-detect (point) (pos-eol)) ; this isn't just another program
;;
;; Test multiple language spelling:
;;
;;   https://dataswamp.org/~incal/test-spell/3lang.txt

(require 'cl-lib)
(require 'ispell)

(defvar ispell-detect--langs
  '(("/usr/share/dict/american-english-insane" "american-insane")
    ("/usr/share/dict/french"                  "francais")
    ("/usr/share/dict/swedish"                 "svenska")))

(defun ids--region ()
  (if (use-region-p)
      (list (region-beginning) (region-end))
    (list nil nil)))

(defun ispell-detect-spell (&optional beg end probe-forward spell-forward)
  (interactive (ids--region))
  (or beg (setq beg (point-min)))
  (or end (setq end (point-max)))
  (or probe-forward (setq probe-forward #'forward-sentence))
  (or spell-forward (setq spell-forward #'forward-paragraph))
  (goto-char beg)
  (cl-loop
    for beg = (point)
    for probe-end = (progn (funcall probe-forward) (point))
    for spell-end = (progn (goto-char beg) (funcall spell-forward) (min (point) end))
    while (< beg spell-end)
    for lang = (ispell-detect beg probe-end)
    do (unless (string= lang ispell-current-dictionary)
         (ispell-change-dictionary lang))
       (ispell-region beg spell-end)))

(defun ispell-detect (&optional beg end)
  "Detect the language used in the current buffer,
from the alternatives in `ispell-detect--langs'.
\nDetect on the region from BEG to END [paragraph].
\nMethod: `ispell-count'"
  (interactive (ids--region))
  (save-mark-and-excursion
    (cl-loop
      with beg = (or beg (progn (start-of-paragraph-text) (point)))
      with end = (or end (progn   (end-of-paragraph-text) (point)))
      for (wl d) in ispell-detect--langs
      collect (list d (ispell-count beg end wl)) into res
      finally return (caar (cl-sort res #'< :key #'cadr)))))

(defun ispell-count (&optional beg end wordlist)
  "Spell the region from BEG to END [whole buffer] with WORDLIST,
Return the ratio of incorrectly spelled words."
  (interactive (ids--region))
  (or beg (setq beg (point-min)))
  (or end (setq end (point-max)))
  (save-mark-and-excursion
    (goto-char beg)
    (forward-word)
    (backward-word)
    (cl-loop
      with words  = 0
      with errors = 0
      while (< (point) end)
      do (let ((word (thing-at-point 'word t)))
           (unless (ispell-lookup-words word wordlist)
             (cl-incf errors))
           (cl-incf words)
           (forward-to-word))
      finally return (/ errors words 1.0))))

(provide 'ispell-detect)

-- 
underground experts united
https://dataswamp.org/~incal




^ permalink raw reply	[flat|nested] 34+ messages in thread

end of thread, other threads:[~2024-08-12  1:47 UTC | newest]

Thread overview: 34+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-07-31 11:42 auto-detect multiple languages -- ispell-detect.el Emanuel Berg
2024-08-02  2:53 ` Richard Stallman
2024-08-02  8:06   ` Gregor Zattler
2024-08-02 22:32     ` Emanuel Berg
2024-08-03  2:02     ` Emanuel Berg
2024-08-04  6:52       ` Uwe Brauer via Emacs development discussions.
2024-08-03 13:25     ` Emanuel Berg
2024-08-03 17:14       ` Emanuel Berg
2024-08-03 19:58         ` Emanuel Berg
2024-08-04  5:09           ` Eli Zaretskii
2024-08-04  5:31             ` Emanuel Berg
2024-08-04  5:50               ` Eli Zaretskii
2024-08-04  6:14                 ` Emanuel Berg
2024-08-04  6:27                   ` Eli Zaretskii
2024-08-04  6:40                     ` Emanuel Berg
2024-08-04  7:08                       ` Eli Zaretskii
2024-08-04 10:30                         ` Stefan Kangas
2024-08-04 15:25                           ` Uwe Brauer via Emacs development discussions.
2024-08-04 15:56                             ` Emanuel Berg
2024-08-04 17:45                               ` tomas
2024-08-04  7:13                     ` Emanuel Berg
2024-08-07 15:41                     ` Björn Bidar
     [not found]                     ` <87wmkspcgn.fsf@>
2024-08-08  2:01                       ` guess-language package Richard Stallman
2024-08-08  6:42                         ` Joost Kremers
2024-08-12  1:47                           ` Richard Stallman
2024-08-08  6:55                       ` auto-detect multiple languages -- ispell-detect.el Joost Kremers
2024-08-12  1:47                       ` Richard Stallman
2024-08-04 11:03                   ` Emanuel Berg
2024-08-04  5:42             ` Emanuel Berg
2024-08-04  5:52               ` Eli Zaretskii
2024-08-04  7:17           ` Uwe Brauer via Emacs development discussions.
2024-08-04  9:30             ` Emanuel Berg
2024-08-04  2:05     ` Richard Stallman
2024-08-07 15:36       ` Björn Bidar

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).