From mboxrd@z Thu Jan 1 00:00:00 1970 Path: news.gmane.io!.POSTED.blaine.gmane.org!not-for-mail From: Emanuel Berg Newsgroups: gmane.emacs.devel Subject: auto-detect multiple languages -- ispell-detect.el Date: Wed, 31 Jul 2024 13:42:16 +0200 Message-ID: <87y15h7ppj.fsf@dataswamp.org> Mime-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Injection-Info: ciao.gmane.io; posting-host="blaine.gmane.org:116.202.254.214"; logging-data="6394"; mail-complaints-to="usenet@ciao.gmane.io" User-Agent: Gnus/5.13 (Gnus v5.13) To: emacs-devel@gnu.org Cancel-Lock: sha1:af0FRxT4Yvn+1YQkSaCA0E+nc2o= Original-X-From: emacs-devel-bounces+ged-emacs-devel=m.gmane-mx.org@gnu.org Wed Jul 31 15:37:01 2024 Return-path: Envelope-to: ged-emacs-devel@m.gmane-mx.org Original-Received: from lists.gnu.org ([209.51.188.17]) by ciao.gmane.io with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.92) (envelope-from ) id 1sZ9Vj-0001Sl-Dr for ged-emacs-devel@m.gmane-mx.org; Wed, 31 Jul 2024 15:37:00 +0200 Original-Received: from localhost ([::1] helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1sZ9VE-0006DK-Te; Wed, 31 Jul 2024 09:36:29 -0400 Original-Received: from eggs.gnu.org ([2001:470:142:3::10]) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1sZ7j0-0006f7-SP for emacs-devel@gnu.org; Wed, 31 Jul 2024 07:42:34 -0400 Original-Received: from ciao.gmane.io ([116.202.254.214]) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1sZ7iy-0002su-BG for emacs-devel@gnu.org; Wed, 31 Jul 2024 07:42:34 -0400 Original-Received: from list by ciao.gmane.io with local (Exim 4.92) (envelope-from ) id 1sZ7iw-0002Ds-9w for emacs-devel@gnu.org; Wed, 31 Jul 2024 13:42:30 +0200 X-Injected-Via-Gmane: http://gmane.org/ Mail-Followup-To: emacs-devel@gnu.org Mail-Copies-To: never Received-SPF: pass client-ip=116.202.254.214; envelope-from=ged-emacs-devel@m.gmane-mx.org; helo=ciao.gmane.io X-Spam_score_int: -18 X-Spam_score: -1.9 X-Spam_bar: - X-Spam_report: (-1.9 / 5.0 requ) BAYES_00=-1.9, HEADER_FROM_DIFFERENT_DOMAINS=0.001, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-Mailman-Approved-At: Wed, 31 Jul 2024 09:36:27 -0400 X-BeenThere: emacs-devel@gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: "Emacs development discussions." List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: emacs-devel-bounces+ged-emacs-devel=m.gmane-mx.org@gnu.org Original-Sender: emacs-devel-bounces+ged-emacs-devel=m.gmane-mx.org@gnu.org Xref: news.gmane.io gmane.emacs.devel:322226 Archived-At: Automatic on-the-fly language detection and spelling with the appropriate dictionary. It works on arbitrary texts with no markup or anything required. Also a future release for ELPA, I hope. ;;; -*- lexical-binding: t -*- ;; ;; this file: ;; https://dataswamp.org/~incal/emacs-init/ispell-detect.el ;; ;; Installation on Debian: ;; ;; 1. For detection, install the /usr/share/dict files from ;; the 'w-' packages, for example 'wamerican-insane', ;; 'wfrench' and 'wswedish'. ;; ;; 2. For correction, install the ispell files from the 'i-' ;; packages, for example 'iamerican-insane', 'ifrench-gut' ;; and 'iswedish'. ;; ;; 3. Set `ispell-detect--langs' as below. ;; ;; Test detection: ;; ;; (ispell-detect (point) (pos-eol)) ; l'oiseau aimait le beau ;; (ispell-detect (point) (pos-eol)) ; detta är en mening på svenska ;; (ispell-detect (point) (pos-eol)) ; this isn't just another program ;; ;; Test multiple language spelling: ;; ;; https://dataswamp.org/~incal/test-spell/3lang.txt (require 'cl-lib) (require 'ispell) (defvar ispell-detect--langs '(("/usr/share/dict/american-english-insane" "american-insane") ("/usr/share/dict/french" "francais") ("/usr/share/dict/swedish" "svenska"))) (defun ids--region () (if (use-region-p) (list (region-beginning) (region-end)) (list nil nil))) (defun ispell-detect-spell (&optional beg end probe-forward spell-forward) (interactive (ids--region)) (or beg (setq beg (point-min))) (or end (setq end (point-max))) (or probe-forward (setq probe-forward #'forward-sentence)) (or spell-forward (setq spell-forward #'forward-paragraph)) (goto-char beg) (cl-loop for beg = (point) for probe-end = (progn (funcall probe-forward) (point)) for spell-end = (progn (goto-char beg) (funcall spell-forward) (min (point) end)) while (< beg spell-end) for lang = (ispell-detect beg probe-end) do (unless (string= lang ispell-current-dictionary) (ispell-change-dictionary lang)) (ispell-region beg spell-end))) (defun ispell-detect (&optional beg end) "Detect the language used in the current buffer, from the alternatives in `ispell-detect--langs'. \nDetect on the region from BEG to END [paragraph]. \nMethod: `ispell-count'" (interactive (ids--region)) (save-mark-and-excursion (cl-loop with beg = (or beg (progn (start-of-paragraph-text) (point))) with end = (or end (progn (end-of-paragraph-text) (point))) for (wl d) in ispell-detect--langs collect (list d (ispell-count beg end wl)) into res finally return (caar (cl-sort res #'< :key #'cadr))))) (defun ispell-count (&optional beg end wordlist) "Spell the region from BEG to END [whole buffer] with WORDLIST, Return the ratio of incorrectly spelled words." (interactive (ids--region)) (or beg (setq beg (point-min))) (or end (setq end (point-max))) (save-mark-and-excursion (goto-char beg) (forward-word) (backward-word) (cl-loop with words = 0 with errors = 0 while (< (point) end) do (let ((word (thing-at-point 'word t))) (unless (ispell-lookup-words word wordlist) (cl-incf errors)) (cl-incf words) (forward-to-word)) finally return (/ errors words 1.0)))) (provide 'ispell-detect) -- underground experts united https://dataswamp.org/~incal