unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
From: James Thomas <jimjoe@gmx.net>
To: emacs-devel@gnu.org
Subject: [PATCH] Improve Malayalam language transliteration
Date: Sun, 26 Apr 2020 19:19:10 +0530	[thread overview]
Message-ID: <87d07ul5m1.fsf@gmx.net> (raw)

[-- Attachment #1: Type: text/plain, Size: 632 bytes --]

The existing Quail ITRANS scheme is incomplete and not applicable to the
language - does not support some common characters and does not handle
its quirks like 'chillu's.  Also, the Inscript method has errors and is
not updated to the latest standard.

This patch implements the basic sufficient features of the Mozhi
scheme (the complete scheme is unnecessarily complicated IMO) and
updates Inscript.  Refer:
https://malayalam.kerala.gov.in/index.php/InputMethods
https://sites.google.com/site/cibu/

Note: If checking on Ubuntu (and maybe Debian) set the font to Noto
Sans Mono to avoid the problems with the default one.

--
Jim

[-- Attachment #2: 0001-Improve-Malayalam-language-transliteration.patch --]
[-- Type: text/x-diff, Size: 9808 bytes --]

From 7261783271799b0d9cbd5c49afb119f1b8d9e9d6 Mon Sep 17 00:00:00 2001
From: James Thomas <jimjoe@gmx.net>
Date: Sun, 26 Apr 2020 18:59:56 +0530
Subject: [PATCH] Improve Malayalam language transliteration

The current ITRANS scheme does not support some characters and
language quirks like 'chillu's.  The Inscript method is not complete.
* lisp/language/ind-util.el
  (indian-mlm-base-table): Add archaic chars & combos; cleanup.
  (indian-mlm-mozhi-table): For new scheme Mozhi.
* lisp/leim/quail/indian.el
  (inscript-mlm-keytable): Correct errors.
  Add Inscript chillus & zero-width chars, Mozhi scheme.
* etc/NEWS: Mention change
Replace ITRANS with a sufficient implementation of the Mozhi scheme.
Complete Inscript implementation.  Reference:
https://malayalam.kerala.gov.in/index.php/InputMethods
---
 etc/NEWS                  |   7 +++
 lisp/language/ind-util.el |  40 +++++++++++---
 lisp/leim/quail/indian.el | 106 ++++++++++++++++++++++++++++++++------
 3 files changed, 129 insertions(+), 24 deletions(-)

diff --git a/etc/NEWS b/etc/NEWS
index 025d5c14a7..e701cfef41 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -288,6 +288,13 @@ prefix on the Subject line in various languages.
 These new navigation commands are bound to 'n' and 'p' in
 'apropos-mode'.

+** Quail
+
+---
+*** Improved Malayalam language transliteration
+A sufficient implementation of the Mozhi scheme replaces the
+incomplete ITRANS scheme.  Inscript method updated to latest standard.
+
 \f
 * New Modes and Packages in Emacs 28.1

diff --git a/lisp/language/ind-util.el b/lisp/language/ind-util.el
index 4319e5537e..fd21f3a6a6 100644
--- a/lisp/language/ind-util.el
+++ b/lisp/language/ind-util.el
@@ -232,8 +232,8 @@ indian-mlm-base-table
   '(
     (;; VOWELS
      (?അ nil) (?ആ ?ാ) (?ഇ ?ി) (?ഈ ?ീ) (?ഉ ?ു) (?ഊ ?ൂ)
-     (?ഋ ?ൃ) (?ഌ nil) nil (?ഏ ?േ) (?എ ?െ) (?ഐ ?ൈ)
-     nil (?ഓ ?ോ) (?ഒ ?ൊ) (?ഔ ?ൌ) nil nil)
+     (?ഋ ?ൃ) (?ഌ ?ൢ) (?ൡ ?ൣ) (?ഏ ?േ) (?എ ?െ) (?ഐ ?ൈ)
+     nil (?ഒ ?ൊ) (?ഓ ?ോ) (?ഔ ?ൗ) (?് ?്) (?ൠ ?ൄ))
     (;; CONSONANTS
      ?ക ?ഖ ?ഗ ?ഘ ?ങ                  ;; GUTTRULS
      ?ച ?ഛ ?ജ ?ഝ ?ഞ                  ;; PALATALS
@@ -243,13 +243,14 @@ indian-mlm-base-table
      ?യ ?ര ?റ ?ല ?ള ?ഴ ?വ          ;; SEMIVOWELS
      ?ശ ?ഷ ?സ ?ഹ                    ;; SIBILANTS
      nil nil nil nil nil nil nil nil      ;; NUKTAS
-     "ജ്ഞ" "ക്ഷ")
+     "ക്ഷ"
+     "റ്റ" "ന്റ" "ത്ത" "ത്ഥ" "ഞ്ഞ" "ങ്ങ" "ന്ന"
+     "ഞ്ച" "ന്ക" "ങ്ക" "ച്ച" "ച്ഛ" "ക്ക"
+     "ബ്ബ" "ക്ക" "ഗ്ഗ" "ജ്ജ" "മ്മ" "പ്പ" "വ്വ" "ക്സ" "ശ്ശ")
     (;; Misc Symbols
      nil ?ം ?ഃ nil ?് nil nil)
     (;; Digits
-     ?൦ ?൧ ?൨ ?൩ ?൪ ?൫ ?൬ ?൭ ?൮ ?൯)
-    (;; Inscript-extra (4)  (#, $, ^, *, ])
-     "്ര" "ര്" "ത്ര" "ശ്ര" nil)))
+     ?൦ ?൧ ?൨ ?൩ ?൪ ?൫ ?൬ ?൭ ?൮ ?൯)))

 (defvar indian-tml-base-table
   '(
@@ -323,6 +324,29 @@ indian-itrans-v5-table-for-tamil
     (;; misc -- 7
      ".N" (".n" "M") "H" ".a" ".h" ("AUM" "OM") "..")))

+(defvar indian-mlm-mozhi-table
+  '(;; for encode/decode
+    (;; vowels -- 18
+     "a" ("aa" "A") "i" ("ii" "I") "u" ("uu" "U")
+     "R" "Ll" "Lll" ("E" "ae") "e" "ai"
+     nil  "o"   "O"   "au"  "~" "RR")
+    (;; consonants -- 40
+     ("k" "c")   "kh"  "g"   "gh"  "ng"
+     "ch" ("Ch" "chh") "j" "jh" "nj"
+     "T"   "Th"  "D"   "Dh"  "N"
+     "th"  "thh" "d"   "dh"  "n"   nil
+     "p"   ("ph" "f")  "b"   "bh"  "m"
+     "y"   "r"   "rr"  "l"  "L" "zh" ("v" "w")
+     ("S" "z") "sh" "s" "h"
+     nil nil nil nil nil nil nil nil
+     "X"
+     ;; some of these are extra to Mozhi
+     ("t" "tt") "nt" "tth" "tthh" "nnj" "nng" "nn"
+     "nch" "nc" "nk" "cch" "cchh" "cc"
+     "B" ("C" "K" "q") "G" "J" "M" "P" "V" "x" "Z")
+    (;; misc -- 7
+     nil nil "H")))
+
 (defvar indian-kyoto-harvard-table
   '(;; for encode/decode
     (;; vowel
@@ -520,9 +544,9 @@ indian-knd-itrans-v5-hash
   (indian-make-hash indian-knd-base-table
 			  indian-itrans-v5-table))

-(defvar indian-mlm-itrans-v5-hash
+(defvar indian-mlm-mozhi-hash
   (indian-make-hash indian-mlm-base-table
-			  indian-itrans-v5-table))
+			  indian-mlm-mozhi-table))

 (defvar indian-tml-itrans-v5-hash
   (indian-make-hash indian-tml-base-table
diff --git a/lisp/leim/quail/indian.el b/lisp/leim/quail/indian.el
index 2681eab0e5..7fd2b8ed65 100644
--- a/lisp/leim/quail/indian.el
+++ b/lisp/leim/quail/indian.el
@@ -117,12 +117,6 @@ "\\''"
  indian-knd-itrans-v5-hash "kannada-itrans" "Kannada" "KndIT"
  "Kannada transliteration by ITRANS method.")

-(if nil
-    (quail-define-package "malayalam-itrans" "Malayalam" "MlmIT" t "Malayalam ITRANS"))
-(quail-define-indian-trans-package
- indian-mlm-itrans-v5-hash "malayalam-itrans" "Malayalam" "MlmIT"
- "Malayalam transliteration by ITRANS method.")
-
 (defvar quail-tamil-itrans-syllable-table
   (let ((vowels
 	 '(("அ" nil "a")
@@ -358,24 +352,21 @@ inscript-mlm-keytable
   '(
     (;; VOWELS  (18)
      (?D nil) (?E ?e) (?F ?f) (?R ?r) (?G ?g) (?T ?t)
-     (?+ ?=) ("F]" "f]") (?! ?@) (?S ?s) (?Z ?z) (?W ?w)
-     (?| ?\\) (?~ ?`) (?A ?a) (?Q ?q) ("+]" "=]") ("R]" "r]"))
+     (?= ?+) nil nil (?S ?s) (?Z ?z) (?W ?w)
+     nil (?~ ?`) (?A ?a) (?Q ?q))
     (;; CONSONANTS (42)
      ?k ?K ?i ?I ?U                ;; GRUTTALS
      ?\; ?: ?p ?P ?}               ;; PALATALS
      ?' ?\" ?\[ ?{ ?C              ;; CEREBRALS
-     ?l ?L ?o ?O ?v ?V             ;; DENTALS
+     ?l ?L ?o ?O ?v nil            ;; DENTALS
      ?h ?H ?y ?Y ?c                ;; LABIALS
-     ?/ ?j ?J ?n ?N "N]" ?b        ;; SEMIVOWELS
+     ?/ ?j ?J ?n ?N ?B ?b          ;; SEMIVOWELS
      ?M ?< ?m ?u                   ;; SIBILANTS
-     "k]" "K]" "i]" "p]" "[]" "{]" "H]" "/]" ;; NUKTAS
-     ?% ?&)
+     nil nil nil nil nil nil nil nil nil) ;; NUKTAS
     (;; Misc Symbols (7)
-     ?X ?x ?_ ">]" ?d "X]" ?>)
+     nil ?x ?_ nil ?d)
     (;; Digits
-     ?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9)
-    (;; Inscripts
-     ?# ?$ ?^ ?* ?\])))
+     ?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9)))

 (defvar inscript-tml-keytable
   '(
@@ -463,6 +454,21 @@ inscript-tml-keytable
  "malayalam-inscript" "Malayalam" "MlmIS"
  "Malayalam keyboard Inscript.")

+;; Chillus
+(quail-defrule "Cd" ["ണ്"])
+(quail-defrule "Cd]" ?ൺ)
+(quail-defrule "vd" ["ന്"])
+(quail-defrule "vd]" ?ൻ)
+(quail-defrule "jd" ["ര്"])
+(quail-defrule "jd]" ?ർ)
+(quail-defrule "nd" ["ല്"])
+(quail-defrule "nd]" ?ൽ)
+(quail-defrule "Nd" ["ള്"])
+(quail-defrule "Nd]" ?ൾ)
+
+(quail-defrule "\\" ?‌)
+(quail-defrule "X" ?​)
+
 (if nil
     (quail-define-package "tamil-inscript" "Tamil" "TmlIS" t "Tamil keyboard Inscript"))
 (quail-define-inscript-package
@@ -571,4 +577,72 @@ inscript-tml-keytable
   ("?" ?\?)
   ("/" ?্))

+(defun indian-mlm-mozhi-update-translation (control-flag)
+  (let ((len (length quail-current-key)) chillu
+	(vowels '(?a ?e ?i ?o ?u ?A ?E ?I ?O ?U ?R)))
+    (cond ((numberp control-flag)
+	   (progn (if (= control-flag 0)
+		      (setq quail-current-str quail-current-key)
+		    (cond (input-method-exit-on-first-char)
+			  ((and (memq (aref quail-current-key
+					    (1- control-flag))
+				      vowels)
+				(setq chillu (cl-position
+					      (aref quail-current-key
+						    control-flag)
+					      '(?m ?N ?n ?r ?l ?L))))
+			   ;; conditions for putting chillu
+			   (and (or (and (= control-flag (1- len))
+					 (not (setq control-flag nil)))
+				    (and (= control-flag (- len 2))
+					 (let ((temp (aref quail-current-key
+							   (1- len))))
+                                           ;; is it last char of word?
+					   (not
+					    (or (and (>= temp ?a) (<= temp ?z))
+						(and (>= temp ?A) (<= temp ?Z))
+						(eq temp ?~))))
+					 (setq control-flag (1+ control-flag))))
+				(setq quail-current-str     ;; put chillu
+				      (concat (if (not (stringp
+							quail-current-str))
+						  (string quail-current-str)
+						quail-current-str)
+					      (string
+					       (nth chillu '(?ം ?ൺ ?ൻ ?ർ ?ൽ ?ൾ)))))))))
+		  (and (not input-method-exit-on-first-char) control-flag
+		       (while (> len control-flag)
+			 (setq len (1- len))
+			 (setq unread-command-events
+			       (cons (aref quail-current-key len)
+				     unread-command-events))))
+		  ))
+	  ((null control-flag)
+	   (unless quail-current-str
+	     (setq quail-current-str quail-current-key)
+	     ))
+	  ((equal control-flag t)
+	   (if (memq (aref quail-current-key (1- len))  ;; If vowel ending,
+		     vowels)                            ;; may have to put
+	       (setq control-flag nil)))))              ;; chillu. So don't
+  control-flag)                                         ;; end translation
+
+(quail-define-package "malayalam-mozhi" "Malayalam" "MlmMI" t
+                      "Malayalam transliteration by Mozhi method."
+                      nil nil t nil nil nil t nil
+                      'indian-mlm-mozhi-update-translation)
+
+(maphash
+ (lambda (key val)
+   (quail-defrule key (if (= (length val) 1)
+			  (string-to-char val)
+			(vector val))))
+ (cdr indian-mlm-mozhi-hash))
+
+(defun indian-mlm-mozhi-underscore (key len) (throw 'quail-tag nil))
+
+(quail-defrule "_" 'indian-mlm-mozhi-underscore)
+(quail-defrule "|" ?‌)
+(quail-defrule "||" ?​)
+
 ;;; indian.el ends here
--
2.20.1


             reply	other threads:[~2020-04-26 13:49 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-04-26 13:49 James Thomas [this message]
2020-04-26 15:43 ` [PATCH] Improve Malayalam language transliteration Eli Zaretskii
     [not found]   ` <87tv161aml.fsf@gmx.net>
2020-04-26 17:12     ` Eli Zaretskii
2020-04-26 22:11       ` James Thomas
2020-04-27  1:33       ` James Thomas
2020-04-27  2:42       ` James Thomas
2020-05-28 17:33         ` Eli Zaretskii
2020-05-29 14:57           ` James Thomas
2020-06-01  8:49           ` James Thomas
2020-06-01 15:02             ` Eli Zaretskii
2020-06-02  2:27               ` James Thomas
2020-07-15 12:11           ` James Thomas
2020-07-15 14:29             ` Eli Zaretskii
2020-07-16  3:37               ` James Thomas
2020-07-16  5:20                 ` James Thomas
2020-07-16  5:44                   ` James Thomas
2020-07-16 15:07                     ` Eli Zaretskii

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87d07ul5m1.fsf@gmx.net \
    --to=jimjoe@gmx.net \
    --cc=emacs-devel@gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).