unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
* [PATCH] Improve Malayalam language transliteration
@ 2020-04-26 13:49 James Thomas
  2020-04-26 15:43 ` Eli Zaretskii
  0 siblings, 1 reply; 17+ messages in thread
From: James Thomas @ 2020-04-26 13:49 UTC (permalink / raw)
  To: emacs-devel

[-- Attachment #1: Type: text/plain, Size: 632 bytes --]

The existing Quail ITRANS scheme is incomplete and not applicable to the
language - does not support some common characters and does not handle
its quirks like 'chillu's.  Also, the Inscript method has errors and is
not updated to the latest standard.

This patch implements the basic sufficient features of the Mozhi
scheme (the complete scheme is unnecessarily complicated IMO) and
updates Inscript.  Refer:
https://malayalam.kerala.gov.in/index.php/InputMethods
https://sites.google.com/site/cibu/

Note: If checking on Ubuntu (and maybe Debian) set the font to Noto
Sans Mono to avoid the problems with the default one.

--
Jim

[-- Attachment #2: 0001-Improve-Malayalam-language-transliteration.patch --]
[-- Type: text/x-diff, Size: 9808 bytes --]

From 7261783271799b0d9cbd5c49afb119f1b8d9e9d6 Mon Sep 17 00:00:00 2001
From: James Thomas <jimjoe@gmx.net>
Date: Sun, 26 Apr 2020 18:59:56 +0530
Subject: [PATCH] Improve Malayalam language transliteration

The current ITRANS scheme does not support some characters and
language quirks like 'chillu's.  The Inscript method is not complete.
* lisp/language/ind-util.el
  (indian-mlm-base-table): Add archaic chars & combos; cleanup.
  (indian-mlm-mozhi-table): For new scheme Mozhi.
* lisp/leim/quail/indian.el
  (inscript-mlm-keytable): Correct errors.
  Add Inscript chillus & zero-width chars, Mozhi scheme.
* etc/NEWS: Mention change
Replace ITRANS with a sufficient implementation of the Mozhi scheme.
Complete Inscript implementation.  Reference:
https://malayalam.kerala.gov.in/index.php/InputMethods
---
 etc/NEWS                  |   7 +++
 lisp/language/ind-util.el |  40 +++++++++++---
 lisp/leim/quail/indian.el | 106 ++++++++++++++++++++++++++++++++------
 3 files changed, 129 insertions(+), 24 deletions(-)

diff --git a/etc/NEWS b/etc/NEWS
index 025d5c14a7..e701cfef41 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -288,6 +288,13 @@ prefix on the Subject line in various languages.
 These new navigation commands are bound to 'n' and 'p' in
 'apropos-mode'.

+** Quail
+
+---
+*** Improved Malayalam language transliteration
+A sufficient implementation of the Mozhi scheme replaces the
+incomplete ITRANS scheme.  Inscript method updated to latest standard.
+
 \f
 * New Modes and Packages in Emacs 28.1

diff --git a/lisp/language/ind-util.el b/lisp/language/ind-util.el
index 4319e5537e..fd21f3a6a6 100644
--- a/lisp/language/ind-util.el
+++ b/lisp/language/ind-util.el
@@ -232,8 +232,8 @@ indian-mlm-base-table
   '(
     (;; VOWELS
      (?അ nil) (?ആ ?ാ) (?ഇ ?ി) (?ഈ ?ീ) (?ഉ ?ു) (?ഊ ?ൂ)
-     (?ഋ ?ൃ) (?ഌ nil) nil (?ഏ ?േ) (?എ ?െ) (?ഐ ?ൈ)
-     nil (?ഓ ?ോ) (?ഒ ?ൊ) (?ഔ ?ൌ) nil nil)
+     (?ഋ ?ൃ) (?ഌ ?ൢ) (?ൡ ?ൣ) (?ഏ ?േ) (?എ ?െ) (?ഐ ?ൈ)
+     nil (?ഒ ?ൊ) (?ഓ ?ോ) (?ഔ ?ൗ) (?് ?്) (?ൠ ?ൄ))
     (;; CONSONANTS
      ?ക ?ഖ ?ഗ ?ഘ ?ങ                  ;; GUTTRULS
      ?ച ?ഛ ?ജ ?ഝ ?ഞ                  ;; PALATALS
@@ -243,13 +243,14 @@ indian-mlm-base-table
      ?യ ?ര ?റ ?ല ?ള ?ഴ ?വ          ;; SEMIVOWELS
      ?ശ ?ഷ ?സ ?ഹ                    ;; SIBILANTS
      nil nil nil nil nil nil nil nil      ;; NUKTAS
-     "ജ്ഞ" "ക്ഷ")
+     "ക്ഷ"
+     "റ്റ" "ന്റ" "ത്ത" "ത്ഥ" "ഞ്ഞ" "ങ്ങ" "ന്ന"
+     "ഞ്ച" "ന്ക" "ങ്ക" "ച്ച" "ച്ഛ" "ക്ക"
+     "ബ്ബ" "ക്ക" "ഗ്ഗ" "ജ്ജ" "മ്മ" "പ്പ" "വ്വ" "ക്സ" "ശ്ശ")
     (;; Misc Symbols
      nil ?ം ?ഃ nil ?് nil nil)
     (;; Digits
-     ?൦ ?൧ ?൨ ?൩ ?൪ ?൫ ?൬ ?൭ ?൮ ?൯)
-    (;; Inscript-extra (4)  (#, $, ^, *, ])
-     "്ര" "ര്" "ത്ര" "ശ്ര" nil)))
+     ?൦ ?൧ ?൨ ?൩ ?൪ ?൫ ?൬ ?൭ ?൮ ?൯)))

 (defvar indian-tml-base-table
   '(
@@ -323,6 +324,29 @@ indian-itrans-v5-table-for-tamil
     (;; misc -- 7
      ".N" (".n" "M") "H" ".a" ".h" ("AUM" "OM") "..")))

+(defvar indian-mlm-mozhi-table
+  '(;; for encode/decode
+    (;; vowels -- 18
+     "a" ("aa" "A") "i" ("ii" "I") "u" ("uu" "U")
+     "R" "Ll" "Lll" ("E" "ae") "e" "ai"
+     nil  "o"   "O"   "au"  "~" "RR")
+    (;; consonants -- 40
+     ("k" "c")   "kh"  "g"   "gh"  "ng"
+     "ch" ("Ch" "chh") "j" "jh" "nj"
+     "T"   "Th"  "D"   "Dh"  "N"
+     "th"  "thh" "d"   "dh"  "n"   nil
+     "p"   ("ph" "f")  "b"   "bh"  "m"
+     "y"   "r"   "rr"  "l"  "L" "zh" ("v" "w")
+     ("S" "z") "sh" "s" "h"
+     nil nil nil nil nil nil nil nil
+     "X"
+     ;; some of these are extra to Mozhi
+     ("t" "tt") "nt" "tth" "tthh" "nnj" "nng" "nn"
+     "nch" "nc" "nk" "cch" "cchh" "cc"
+     "B" ("C" "K" "q") "G" "J" "M" "P" "V" "x" "Z")
+    (;; misc -- 7
+     nil nil "H")))
+
 (defvar indian-kyoto-harvard-table
   '(;; for encode/decode
     (;; vowel
@@ -520,9 +544,9 @@ indian-knd-itrans-v5-hash
   (indian-make-hash indian-knd-base-table
 			  indian-itrans-v5-table))

-(defvar indian-mlm-itrans-v5-hash
+(defvar indian-mlm-mozhi-hash
   (indian-make-hash indian-mlm-base-table
-			  indian-itrans-v5-table))
+			  indian-mlm-mozhi-table))

 (defvar indian-tml-itrans-v5-hash
   (indian-make-hash indian-tml-base-table
diff --git a/lisp/leim/quail/indian.el b/lisp/leim/quail/indian.el
index 2681eab0e5..7fd2b8ed65 100644
--- a/lisp/leim/quail/indian.el
+++ b/lisp/leim/quail/indian.el
@@ -117,12 +117,6 @@ "\\''"
  indian-knd-itrans-v5-hash "kannada-itrans" "Kannada" "KndIT"
  "Kannada transliteration by ITRANS method.")

-(if nil
-    (quail-define-package "malayalam-itrans" "Malayalam" "MlmIT" t "Malayalam ITRANS"))
-(quail-define-indian-trans-package
- indian-mlm-itrans-v5-hash "malayalam-itrans" "Malayalam" "MlmIT"
- "Malayalam transliteration by ITRANS method.")
-
 (defvar quail-tamil-itrans-syllable-table
   (let ((vowels
 	 '(("அ" nil "a")
@@ -358,24 +352,21 @@ inscript-mlm-keytable
   '(
     (;; VOWELS  (18)
      (?D nil) (?E ?e) (?F ?f) (?R ?r) (?G ?g) (?T ?t)
-     (?+ ?=) ("F]" "f]") (?! ?@) (?S ?s) (?Z ?z) (?W ?w)
-     (?| ?\\) (?~ ?`) (?A ?a) (?Q ?q) ("+]" "=]") ("R]" "r]"))
+     (?= ?+) nil nil (?S ?s) (?Z ?z) (?W ?w)
+     nil (?~ ?`) (?A ?a) (?Q ?q))
     (;; CONSONANTS (42)
      ?k ?K ?i ?I ?U                ;; GRUTTALS
      ?\; ?: ?p ?P ?}               ;; PALATALS
      ?' ?\" ?\[ ?{ ?C              ;; CEREBRALS
-     ?l ?L ?o ?O ?v ?V             ;; DENTALS
+     ?l ?L ?o ?O ?v nil            ;; DENTALS
      ?h ?H ?y ?Y ?c                ;; LABIALS
-     ?/ ?j ?J ?n ?N "N]" ?b        ;; SEMIVOWELS
+     ?/ ?j ?J ?n ?N ?B ?b          ;; SEMIVOWELS
      ?M ?< ?m ?u                   ;; SIBILANTS
-     "k]" "K]" "i]" "p]" "[]" "{]" "H]" "/]" ;; NUKTAS
-     ?% ?&)
+     nil nil nil nil nil nil nil nil nil) ;; NUKTAS
     (;; Misc Symbols (7)
-     ?X ?x ?_ ">]" ?d "X]" ?>)
+     nil ?x ?_ nil ?d)
     (;; Digits
-     ?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9)
-    (;; Inscripts
-     ?# ?$ ?^ ?* ?\])))
+     ?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9)))

 (defvar inscript-tml-keytable
   '(
@@ -463,6 +454,21 @@ inscript-tml-keytable
  "malayalam-inscript" "Malayalam" "MlmIS"
  "Malayalam keyboard Inscript.")

+;; Chillus
+(quail-defrule "Cd" ["ണ്"])
+(quail-defrule "Cd]" ?ൺ)
+(quail-defrule "vd" ["ന്"])
+(quail-defrule "vd]" ?ൻ)
+(quail-defrule "jd" ["ര്"])
+(quail-defrule "jd]" ?ർ)
+(quail-defrule "nd" ["ല്"])
+(quail-defrule "nd]" ?ൽ)
+(quail-defrule "Nd" ["ള്"])
+(quail-defrule "Nd]" ?ൾ)
+
+(quail-defrule "\\" ?‌)
+(quail-defrule "X" ?​)
+
 (if nil
     (quail-define-package "tamil-inscript" "Tamil" "TmlIS" t "Tamil keyboard Inscript"))
 (quail-define-inscript-package
@@ -571,4 +577,72 @@ inscript-tml-keytable
   ("?" ?\?)
   ("/" ?্))

+(defun indian-mlm-mozhi-update-translation (control-flag)
+  (let ((len (length quail-current-key)) chillu
+	(vowels '(?a ?e ?i ?o ?u ?A ?E ?I ?O ?U ?R)))
+    (cond ((numberp control-flag)
+	   (progn (if (= control-flag 0)
+		      (setq quail-current-str quail-current-key)
+		    (cond (input-method-exit-on-first-char)
+			  ((and (memq (aref quail-current-key
+					    (1- control-flag))
+				      vowels)
+				(setq chillu (cl-position
+					      (aref quail-current-key
+						    control-flag)
+					      '(?m ?N ?n ?r ?l ?L))))
+			   ;; conditions for putting chillu
+			   (and (or (and (= control-flag (1- len))
+					 (not (setq control-flag nil)))
+				    (and (= control-flag (- len 2))
+					 (let ((temp (aref quail-current-key
+							   (1- len))))
+                                           ;; is it last char of word?
+					   (not
+					    (or (and (>= temp ?a) (<= temp ?z))
+						(and (>= temp ?A) (<= temp ?Z))
+						(eq temp ?~))))
+					 (setq control-flag (1+ control-flag))))
+				(setq quail-current-str     ;; put chillu
+				      (concat (if (not (stringp
+							quail-current-str))
+						  (string quail-current-str)
+						quail-current-str)
+					      (string
+					       (nth chillu '(?ം ?ൺ ?ൻ ?ർ ?ൽ ?ൾ)))))))))
+		  (and (not input-method-exit-on-first-char) control-flag
+		       (while (> len control-flag)
+			 (setq len (1- len))
+			 (setq unread-command-events
+			       (cons (aref quail-current-key len)
+				     unread-command-events))))
+		  ))
+	  ((null control-flag)
+	   (unless quail-current-str
+	     (setq quail-current-str quail-current-key)
+	     ))
+	  ((equal control-flag t)
+	   (if (memq (aref quail-current-key (1- len))  ;; If vowel ending,
+		     vowels)                            ;; may have to put
+	       (setq control-flag nil)))))              ;; chillu. So don't
+  control-flag)                                         ;; end translation
+
+(quail-define-package "malayalam-mozhi" "Malayalam" "MlmMI" t
+                      "Malayalam transliteration by Mozhi method."
+                      nil nil t nil nil nil t nil
+                      'indian-mlm-mozhi-update-translation)
+
+(maphash
+ (lambda (key val)
+   (quail-defrule key (if (= (length val) 1)
+			  (string-to-char val)
+			(vector val))))
+ (cdr indian-mlm-mozhi-hash))
+
+(defun indian-mlm-mozhi-underscore (key len) (throw 'quail-tag nil))
+
+(quail-defrule "_" 'indian-mlm-mozhi-underscore)
+(quail-defrule "|" ?‌)
+(quail-defrule "||" ?​)
+
 ;;; indian.el ends here
--
2.20.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2020-07-16 15:07 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-26 13:49 [PATCH] Improve Malayalam language transliteration James Thomas
2020-04-26 15:43 ` Eli Zaretskii
     [not found]   ` <87tv161aml.fsf@gmx.net>
2020-04-26 17:12     ` Eli Zaretskii
2020-04-26 22:11       ` James Thomas
2020-04-27  1:33       ` James Thomas
2020-04-27  2:42       ` James Thomas
2020-05-28 17:33         ` Eli Zaretskii
2020-05-29 14:57           ` James Thomas
2020-06-01  8:49           ` James Thomas
2020-06-01 15:02             ` Eli Zaretskii
2020-06-02  2:27               ` James Thomas
2020-07-15 12:11           ` James Thomas
2020-07-15 14:29             ` Eli Zaretskii
2020-07-16  3:37               ` James Thomas
2020-07-16  5:20                 ` James Thomas
2020-07-16  5:44                   ` James Thomas
2020-07-16 15:07                     ` Eli Zaretskii

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).