unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
* [PATCH] Improve Malayalam language transliteration
@ 2020-04-26 13:49 James Thomas
  2020-04-26 15:43 ` Eli Zaretskii
  0 siblings, 1 reply; 17+ messages in thread
From: James Thomas @ 2020-04-26 13:49 UTC (permalink / raw)
  To: emacs-devel

[-- Attachment #1: Type: text/plain, Size: 632 bytes --]

The existing Quail ITRANS scheme is incomplete and not applicable to the
language - does not support some common characters and does not handle
its quirks like 'chillu's.  Also, the Inscript method has errors and is
not updated to the latest standard.

This patch implements the basic sufficient features of the Mozhi
scheme (the complete scheme is unnecessarily complicated IMO) and
updates Inscript.  Refer:
https://malayalam.kerala.gov.in/index.php/InputMethods
https://sites.google.com/site/cibu/

Note: If checking on Ubuntu (and maybe Debian) set the font to Noto
Sans Mono to avoid the problems with the default one.

--
Jim

[-- Attachment #2: 0001-Improve-Malayalam-language-transliteration.patch --]
[-- Type: text/x-diff, Size: 9808 bytes --]

From 7261783271799b0d9cbd5c49afb119f1b8d9e9d6 Mon Sep 17 00:00:00 2001
From: James Thomas <jimjoe@gmx.net>
Date: Sun, 26 Apr 2020 18:59:56 +0530
Subject: [PATCH] Improve Malayalam language transliteration

The current ITRANS scheme does not support some characters and
language quirks like 'chillu's.  The Inscript method is not complete.
* lisp/language/ind-util.el
  (indian-mlm-base-table): Add archaic chars & combos; cleanup.
  (indian-mlm-mozhi-table): For new scheme Mozhi.
* lisp/leim/quail/indian.el
  (inscript-mlm-keytable): Correct errors.
  Add Inscript chillus & zero-width chars, Mozhi scheme.
* etc/NEWS: Mention change
Replace ITRANS with a sufficient implementation of the Mozhi scheme.
Complete Inscript implementation.  Reference:
https://malayalam.kerala.gov.in/index.php/InputMethods
---
 etc/NEWS                  |   7 +++
 lisp/language/ind-util.el |  40 +++++++++++---
 lisp/leim/quail/indian.el | 106 ++++++++++++++++++++++++++++++++------
 3 files changed, 129 insertions(+), 24 deletions(-)

diff --git a/etc/NEWS b/etc/NEWS
index 025d5c14a7..e701cfef41 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -288,6 +288,13 @@ prefix on the Subject line in various languages.
 These new navigation commands are bound to 'n' and 'p' in
 'apropos-mode'.

+** Quail
+
+---
+*** Improved Malayalam language transliteration
+A sufficient implementation of the Mozhi scheme replaces the
+incomplete ITRANS scheme.  Inscript method updated to latest standard.
+
 \f
 * New Modes and Packages in Emacs 28.1

diff --git a/lisp/language/ind-util.el b/lisp/language/ind-util.el
index 4319e5537e..fd21f3a6a6 100644
--- a/lisp/language/ind-util.el
+++ b/lisp/language/ind-util.el
@@ -232,8 +232,8 @@ indian-mlm-base-table
   '(
     (;; VOWELS
      (?അ nil) (?ആ ?ാ) (?ഇ ?ി) (?ഈ ?ീ) (?ഉ ?ു) (?ഊ ?ൂ)
-     (?ഋ ?ൃ) (?ഌ nil) nil (?ഏ ?േ) (?എ ?െ) (?ഐ ?ൈ)
-     nil (?ഓ ?ോ) (?ഒ ?ൊ) (?ഔ ?ൌ) nil nil)
+     (?ഋ ?ൃ) (?ഌ ?ൢ) (?ൡ ?ൣ) (?ഏ ?േ) (?എ ?െ) (?ഐ ?ൈ)
+     nil (?ഒ ?ൊ) (?ഓ ?ോ) (?ഔ ?ൗ) (?് ?്) (?ൠ ?ൄ))
     (;; CONSONANTS
      ?ക ?ഖ ?ഗ ?ഘ ?ങ                  ;; GUTTRULS
      ?ച ?ഛ ?ജ ?ഝ ?ഞ                  ;; PALATALS
@@ -243,13 +243,14 @@ indian-mlm-base-table
      ?യ ?ര ?റ ?ല ?ള ?ഴ ?വ          ;; SEMIVOWELS
      ?ശ ?ഷ ?സ ?ഹ                    ;; SIBILANTS
      nil nil nil nil nil nil nil nil      ;; NUKTAS
-     "ജ്ഞ" "ക്ഷ")
+     "ക്ഷ"
+     "റ്റ" "ന്റ" "ത്ത" "ത്ഥ" "ഞ്ഞ" "ങ്ങ" "ന്ന"
+     "ഞ്ച" "ന്ക" "ങ്ക" "ച്ച" "ച്ഛ" "ക്ക"
+     "ബ്ബ" "ക്ക" "ഗ്ഗ" "ജ്ജ" "മ്മ" "പ്പ" "വ്വ" "ക്സ" "ശ്ശ")
     (;; Misc Symbols
      nil ?ം ?ഃ nil ?് nil nil)
     (;; Digits
-     ?൦ ?൧ ?൨ ?൩ ?൪ ?൫ ?൬ ?൭ ?൮ ?൯)
-    (;; Inscript-extra (4)  (#, $, ^, *, ])
-     "്ര" "ര്" "ത്ര" "ശ്ര" nil)))
+     ?൦ ?൧ ?൨ ?൩ ?൪ ?൫ ?൬ ?൭ ?൮ ?൯)))

 (defvar indian-tml-base-table
   '(
@@ -323,6 +324,29 @@ indian-itrans-v5-table-for-tamil
     (;; misc -- 7
      ".N" (".n" "M") "H" ".a" ".h" ("AUM" "OM") "..")))

+(defvar indian-mlm-mozhi-table
+  '(;; for encode/decode
+    (;; vowels -- 18
+     "a" ("aa" "A") "i" ("ii" "I") "u" ("uu" "U")
+     "R" "Ll" "Lll" ("E" "ae") "e" "ai"
+     nil  "o"   "O"   "au"  "~" "RR")
+    (;; consonants -- 40
+     ("k" "c")   "kh"  "g"   "gh"  "ng"
+     "ch" ("Ch" "chh") "j" "jh" "nj"
+     "T"   "Th"  "D"   "Dh"  "N"
+     "th"  "thh" "d"   "dh"  "n"   nil
+     "p"   ("ph" "f")  "b"   "bh"  "m"
+     "y"   "r"   "rr"  "l"  "L" "zh" ("v" "w")
+     ("S" "z") "sh" "s" "h"
+     nil nil nil nil nil nil nil nil
+     "X"
+     ;; some of these are extra to Mozhi
+     ("t" "tt") "nt" "tth" "tthh" "nnj" "nng" "nn"
+     "nch" "nc" "nk" "cch" "cchh" "cc"
+     "B" ("C" "K" "q") "G" "J" "M" "P" "V" "x" "Z")
+    (;; misc -- 7
+     nil nil "H")))
+
 (defvar indian-kyoto-harvard-table
   '(;; for encode/decode
     (;; vowel
@@ -520,9 +544,9 @@ indian-knd-itrans-v5-hash
   (indian-make-hash indian-knd-base-table
 			  indian-itrans-v5-table))

-(defvar indian-mlm-itrans-v5-hash
+(defvar indian-mlm-mozhi-hash
   (indian-make-hash indian-mlm-base-table
-			  indian-itrans-v5-table))
+			  indian-mlm-mozhi-table))

 (defvar indian-tml-itrans-v5-hash
   (indian-make-hash indian-tml-base-table
diff --git a/lisp/leim/quail/indian.el b/lisp/leim/quail/indian.el
index 2681eab0e5..7fd2b8ed65 100644
--- a/lisp/leim/quail/indian.el
+++ b/lisp/leim/quail/indian.el
@@ -117,12 +117,6 @@ "\\''"
  indian-knd-itrans-v5-hash "kannada-itrans" "Kannada" "KndIT"
  "Kannada transliteration by ITRANS method.")

-(if nil
-    (quail-define-package "malayalam-itrans" "Malayalam" "MlmIT" t "Malayalam ITRANS"))
-(quail-define-indian-trans-package
- indian-mlm-itrans-v5-hash "malayalam-itrans" "Malayalam" "MlmIT"
- "Malayalam transliteration by ITRANS method.")
-
 (defvar quail-tamil-itrans-syllable-table
   (let ((vowels
 	 '(("அ" nil "a")
@@ -358,24 +352,21 @@ inscript-mlm-keytable
   '(
     (;; VOWELS  (18)
      (?D nil) (?E ?e) (?F ?f) (?R ?r) (?G ?g) (?T ?t)
-     (?+ ?=) ("F]" "f]") (?! ?@) (?S ?s) (?Z ?z) (?W ?w)
-     (?| ?\\) (?~ ?`) (?A ?a) (?Q ?q) ("+]" "=]") ("R]" "r]"))
+     (?= ?+) nil nil (?S ?s) (?Z ?z) (?W ?w)
+     nil (?~ ?`) (?A ?a) (?Q ?q))
     (;; CONSONANTS (42)
      ?k ?K ?i ?I ?U                ;; GRUTTALS
      ?\; ?: ?p ?P ?}               ;; PALATALS
      ?' ?\" ?\[ ?{ ?C              ;; CEREBRALS
-     ?l ?L ?o ?O ?v ?V             ;; DENTALS
+     ?l ?L ?o ?O ?v nil            ;; DENTALS
      ?h ?H ?y ?Y ?c                ;; LABIALS
-     ?/ ?j ?J ?n ?N "N]" ?b        ;; SEMIVOWELS
+     ?/ ?j ?J ?n ?N ?B ?b          ;; SEMIVOWELS
      ?M ?< ?m ?u                   ;; SIBILANTS
-     "k]" "K]" "i]" "p]" "[]" "{]" "H]" "/]" ;; NUKTAS
-     ?% ?&)
+     nil nil nil nil nil nil nil nil nil) ;; NUKTAS
     (;; Misc Symbols (7)
-     ?X ?x ?_ ">]" ?d "X]" ?>)
+     nil ?x ?_ nil ?d)
     (;; Digits
-     ?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9)
-    (;; Inscripts
-     ?# ?$ ?^ ?* ?\])))
+     ?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9)))

 (defvar inscript-tml-keytable
   '(
@@ -463,6 +454,21 @@ inscript-tml-keytable
  "malayalam-inscript" "Malayalam" "MlmIS"
  "Malayalam keyboard Inscript.")

+;; Chillus
+(quail-defrule "Cd" ["ണ്"])
+(quail-defrule "Cd]" ?ൺ)
+(quail-defrule "vd" ["ന്"])
+(quail-defrule "vd]" ?ൻ)
+(quail-defrule "jd" ["ര്"])
+(quail-defrule "jd]" ?ർ)
+(quail-defrule "nd" ["ല്"])
+(quail-defrule "nd]" ?ൽ)
+(quail-defrule "Nd" ["ള്"])
+(quail-defrule "Nd]" ?ൾ)
+
+(quail-defrule "\\" ?‌)
+(quail-defrule "X" ?​)
+
 (if nil
     (quail-define-package "tamil-inscript" "Tamil" "TmlIS" t "Tamil keyboard Inscript"))
 (quail-define-inscript-package
@@ -571,4 +577,72 @@ inscript-tml-keytable
   ("?" ?\?)
   ("/" ?্))

+(defun indian-mlm-mozhi-update-translation (control-flag)
+  (let ((len (length quail-current-key)) chillu
+	(vowels '(?a ?e ?i ?o ?u ?A ?E ?I ?O ?U ?R)))
+    (cond ((numberp control-flag)
+	   (progn (if (= control-flag 0)
+		      (setq quail-current-str quail-current-key)
+		    (cond (input-method-exit-on-first-char)
+			  ((and (memq (aref quail-current-key
+					    (1- control-flag))
+				      vowels)
+				(setq chillu (cl-position
+					      (aref quail-current-key
+						    control-flag)
+					      '(?m ?N ?n ?r ?l ?L))))
+			   ;; conditions for putting chillu
+			   (and (or (and (= control-flag (1- len))
+					 (not (setq control-flag nil)))
+				    (and (= control-flag (- len 2))
+					 (let ((temp (aref quail-current-key
+							   (1- len))))
+                                           ;; is it last char of word?
+					   (not
+					    (or (and (>= temp ?a) (<= temp ?z))
+						(and (>= temp ?A) (<= temp ?Z))
+						(eq temp ?~))))
+					 (setq control-flag (1+ control-flag))))
+				(setq quail-current-str     ;; put chillu
+				      (concat (if (not (stringp
+							quail-current-str))
+						  (string quail-current-str)
+						quail-current-str)
+					      (string
+					       (nth chillu '(?ം ?ൺ ?ൻ ?ർ ?ൽ ?ൾ)))))))))
+		  (and (not input-method-exit-on-first-char) control-flag
+		       (while (> len control-flag)
+			 (setq len (1- len))
+			 (setq unread-command-events
+			       (cons (aref quail-current-key len)
+				     unread-command-events))))
+		  ))
+	  ((null control-flag)
+	   (unless quail-current-str
+	     (setq quail-current-str quail-current-key)
+	     ))
+	  ((equal control-flag t)
+	   (if (memq (aref quail-current-key (1- len))  ;; If vowel ending,
+		     vowels)                            ;; may have to put
+	       (setq control-flag nil)))))              ;; chillu. So don't
+  control-flag)                                         ;; end translation
+
+(quail-define-package "malayalam-mozhi" "Malayalam" "MlmMI" t
+                      "Malayalam transliteration by Mozhi method."
+                      nil nil t nil nil nil t nil
+                      'indian-mlm-mozhi-update-translation)
+
+(maphash
+ (lambda (key val)
+   (quail-defrule key (if (= (length val) 1)
+			  (string-to-char val)
+			(vector val))))
+ (cdr indian-mlm-mozhi-hash))
+
+(defun indian-mlm-mozhi-underscore (key len) (throw 'quail-tag nil))
+
+(quail-defrule "_" 'indian-mlm-mozhi-underscore)
+(quail-defrule "|" ?‌)
+(quail-defrule "||" ?​)
+
 ;;; indian.el ends here
--
2.20.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH] Improve Malayalam language transliteration
  2020-04-26 13:49 [PATCH] Improve Malayalam language transliteration James Thomas
@ 2020-04-26 15:43 ` Eli Zaretskii
       [not found]   ` <87tv161aml.fsf@gmx.net>
  0 siblings, 1 reply; 17+ messages in thread
From: Eli Zaretskii @ 2020-04-26 15:43 UTC (permalink / raw)
  To: James Thomas; +Cc: emacs-devel

> From: James Thomas <jimjoe@gmx.net>
> Date: Sun, 26 Apr 2020 19:19:10 +0530
> 
> The existing Quail ITRANS scheme is incomplete and not applicable to the
> language - does not support some common characters and does not handle
> its quirks like 'chillu's.  Also, the Inscript method has errors and is
> not updated to the latest standard.
> 
> This patch implements the basic sufficient features of the Mozhi
> scheme (the complete scheme is unnecessarily complicated IMO) and
> updates Inscript.  Refer:
> https://malayalam.kerala.gov.in/index.php/InputMethods
> https://sites.google.com/site/cibu/
> 
> Note: If checking on Ubuntu (and maybe Debian) set the font to Noto
> Sans Mono to avoid the problems with the default one.

Thanks.

This is a contribution of a significant size, so we would need a
copyright assignment from you before we could accept it.  Would you be
willing to start a legal paperwork rolling, so that we could accept
this and your future contributions?

> -(if nil
> -    (quail-define-package "malayalam-itrans" "Malayalam" "MlmIT" t "Malayalam ITRANS"))
> -(quail-define-indian-trans-package
> - indian-mlm-itrans-v5-hash "malayalam-itrans" "Malayalam" "MlmIT"
> - "Malayalam transliteration by ITRANS method.")

I'm not sure I understand: does this delete the ITRANS method?  Would
it make sense to leave it, and just add the new one?  And please also
note that lisp/language/indian.el references the malayalam-itrans
method; should that be changed as well?



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Improve Malayalam language transliteration
       [not found]   ` <87tv161aml.fsf@gmx.net>
@ 2020-04-26 17:12     ` Eli Zaretskii
  2020-04-26 22:11       ` James Thomas
                         ` (2 more replies)
  0 siblings, 3 replies; 17+ messages in thread
From: Eli Zaretskii @ 2020-04-26 17:12 UTC (permalink / raw)
  To: James Thomas; +Cc: emacs-devel

> From: James Thomas <jimjoe@gmx.net>
> Date: Sun, 26 Apr 2020 21:51:06 +0530
> 
> >> -(if nil
> >> -    (quail-define-package "malayalam-itrans" "Malayalam" "MlmIT" t "Malayalam ITRANS"))
> >> -(quail-define-indian-trans-package
> >> - indian-mlm-itrans-v5-hash "malayalam-itrans" "Malayalam" "MlmIT"
> >> - "Malayalam transliteration by ITRANS method.")
> >
> > I'm not sure I understand: does this delete the ITRANS method?  Would
> > it make sense to leave it, and just add the new one?  And please also
> > note that lisp/language/indian.el references the malayalam-itrans
> > method; should that be changed as well?
> 
> Yes this does delete and replace it.  I guess we could keep the ITRANS
> method also, but it's practically unusable for Malayalam (given its
> uniqueness and quirks) and not even defined for all its characters - for e.g.,
> the common characters ള (La), ഴ (zha), റ (rra) are missing. As I
> understand it, it was developed for Devanagari and was easily adapted
> to similar Indian languages. The Mozhi scheme is a way of expanding it
> to support Malayalam.

That's okay, but we don't like deleting existing capabilities without
some period during which it's deprecated.  So maybe say in NEWS that
ITRANS is deprecated, and add a comment in quail/indian.el that it
should be removed in some future release.

> Sorry I missed the reference in lisp/language/indian.el. It seems to be
> just defining the default input method for the language. Can be changed
> easily if decided.

If you think this new method is much better, it's okay to make it the
default, I think.  Just mention that in NEWS.

Thanks.

P.S. And please keep the list address on the CC, so that this
discussion is recorded.



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Improve Malayalam language transliteration
  2020-04-26 17:12     ` Eli Zaretskii
@ 2020-04-26 22:11       ` James Thomas
  2020-04-27  1:33       ` James Thomas
  2020-04-27  2:42       ` James Thomas
  2 siblings, 0 replies; 17+ messages in thread
From: James Thomas @ 2020-04-26 22:11 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: emacs-devel

Eli Zaretskii <eliz@gnu.org> writes:

>> From: James Thomas <jimjoe@gmx.net>
>> Date: Sun, 26 Apr 2020 21:51:06 +0530
>> 
>> >> -(if nil
>> >> -    (quail-define-package "malayalam-itrans" "Malayalam" "MlmIT" t "Malayalam ITRANS"))
>> >> -(quail-define-indian-trans-package
>> >> - indian-mlm-itrans-v5-hash "malayalam-itrans" "Malayalam" "MlmIT"
>> >> - "Malayalam transliteration by ITRANS method.")
>> >
>> > I'm not sure I understand: does this delete the ITRANS method?  Would
>> > it make sense to leave it, and just add the new one?  And please also
>> > note that lisp/language/indian.el references the malayalam-itrans
>> > method; should that be changed as well?
>> 
>> Yes this does delete and replace it.  I guess we could keep the ITRANS
>> method also, but it's practically unusable for Malayalam (given its
>> uniqueness and quirks) and not even defined for all its characters - for e.g.,
>> the common characters ള (La), ഴ (zha), റ (rra) are missing. As I
>> understand it, it was developed for Devanagari and was easily adapted
>> to similar Indian languages. The Mozhi scheme is a way of expanding it
>> to support Malayalam.
>
> That's okay, but we don't like deleting existing capabilities without
> some period during which it's deprecated.  So maybe say in NEWS that
> ITRANS is deprecated, and add a comment in quail/indian.el that it
> should be removed in some future release.

Okay, it's settled then. I'll send in a patch with such changes.

>
>> Sorry I missed the reference in lisp/language/indian.el. It seems to be
>> just defining the default input method for the language. Can be changed
>> easily if decided.
>
> If you think this new method is much better, it's okay to make it the
> default, I think.  Just mention that in NEWS.

Will do that too in the new patch.

>
> Thanks.
>
> P.S. And please keep the list address on the CC, so that this
> discussion is recorded.

--
Jim



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Improve Malayalam language transliteration
  2020-04-26 17:12     ` Eli Zaretskii
  2020-04-26 22:11       ` James Thomas
@ 2020-04-27  1:33       ` James Thomas
  2020-04-27  2:42       ` James Thomas
  2 siblings, 0 replies; 17+ messages in thread
From: James Thomas @ 2020-04-27  1:33 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: emacs-devel

[-- Attachment #1: Type: text/plain, Size: 818 bytes --]

Eli Zaretskii <eliz@gnu.org> writes:

>> From: James Thomas <jimjoe@gmx.net>
>> Date: Sun, 26 Apr 2020 21:51:06 +0530
>>
> That's okay, but we don't like deleting existing capabilities without
> some period during which it's deprecated.  So maybe say in NEWS that
> ITRANS is deprecated, and add a comment in quail/indian.el that it
> should be removed in some future release.
>
>> Sorry I missed the reference in lisp/language/indian.el. It seems to be
>> just defining the default input method for the language. Can be changed
>> easily if decided.
>
> If you think this new method is much better, it's okay to make it the
> default, I think.  Just mention that in NEWS.
>

Here's a modified patch without ITRANS removal. I have refrained from
making the new Mozhi method the default.

--
Jim

[-- Attachment #2: 0002-Improve-Malayalam-language-transliteration.patch --]
[-- Type: text/x-diff, Size: 9555 bytes --]

From 3e9bf54134bea2ccf30d1e174696a971d9f7de5a Mon Sep 17 00:00:00 2001
From: James Thomas <jimjoe@gmx.net>
Date: Mon, 27 Apr 2020 06:53:50 +0530
Subject: [PATCH] Improve Malayalam language transliteration

The existing ITRANS scheme does not support some characters and
language quirks like 'chillu's.  The Inscript method has errors.
* lisp/language/ind-util.el
  (indian-mlm-base-table): + archaic chars, Mozhi combos; cleanup.
  (indian-mlm-mozhi-table): For new scheme Mozhi.
* lisp/leim/quail/indian.el
  (inscript-mlm-keytable): Correct errors.
  Add Inscript chillus & zero-width chars, Mozhi scheme.
* etc/NEWS: Mention change.
Add a sufficient implementation of the Mozhi scheme and complete
Inscript scheme.
---
 etc/NEWS                  |   7 +++
 lisp/language/ind-util.el |  40 ++++++++++++---
 lisp/leim/quail/indian.el | 101 ++++++++++++++++++++++++++++++++++----
 3 files changed, 132 insertions(+), 16 deletions(-)

diff --git a/etc/NEWS b/etc/NEWS
index 025d5c14a7..aa551177d1 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -288,6 +288,13 @@ prefix on the Subject line in various languages.
 These new navigation commands are bound to 'n' and 'p' in
 'apropos-mode'.

+** Quail
+
+---
+*** Improved Malayalam language transliteration
+Added new Mozhi scheme. The inapplicable ITRANS scheme is now
+deprecated. Errors in Inscript method corrected.
+
 \f
 * New Modes and Packages in Emacs 28.1

diff --git a/lisp/language/ind-util.el b/lisp/language/ind-util.el
index 4319e5537e..0c1f09c9b6 100644
--- a/lisp/language/ind-util.el
+++ b/lisp/language/ind-util.el
@@ -232,8 +232,8 @@ indian-mlm-base-table
   '(
     (;; VOWELS
      (?അ nil) (?ആ ?ാ) (?ഇ ?ി) (?ഈ ?ീ) (?ഉ ?ു) (?ഊ ?ൂ)
-     (?ഋ ?ൃ) (?ഌ nil) nil (?ഏ ?േ) (?എ ?െ) (?ഐ ?ൈ)
-     nil (?ഓ ?ോ) (?ഒ ?ൊ) (?ഔ ?ൌ) nil nil)
+     (?ഋ ?ൃ) (?ഌ ?ൢ) (?ൡ ?ൣ) (?ഏ ?േ) (?എ ?െ) (?ഐ ?ൈ)
+     nil (?ഒ ?ൊ) (?ഓ ?ോ) (?ഔ ?ൗ) (?് ?്) (?ൠ ?ൄ))
     (;; CONSONANTS
      ?ക ?ഖ ?ഗ ?ഘ ?ങ                  ;; GUTTRULS
      ?ച ?ഛ ?ജ ?ഝ ?ഞ                  ;; PALATALS
@@ -243,13 +243,14 @@ indian-mlm-base-table
      ?യ ?ര ?റ ?ല ?ള ?ഴ ?വ          ;; SEMIVOWELS
      ?ശ ?ഷ ?സ ?ഹ                    ;; SIBILANTS
      nil nil nil nil nil nil nil nil      ;; NUKTAS
-     "ജ്ഞ" "ക്ഷ")
+     "ജ്ഞ" "ക്ഷ"
+     "റ്റ" "ന്റ" "ത്ത" "ത്ഥ" "ഞ്ഞ" "ങ്ങ" "ന്ന"
+     "ഞ്ച" "ന്ക" "ങ്ക" "ച്ച" "ച്ഛ" "ക്ക"
+     "ബ്ബ" "ക്ക" "ഗ്ഗ" "ജ്ജ" "മ്മ" "പ്പ" "വ്വ" "ക്സ" "ശ്ശ")
     (;; Misc Symbols
      nil ?ം ?ഃ nil ?് nil nil)
     (;; Digits
-     ?൦ ?൧ ?൨ ?൩ ?൪ ?൫ ?൬ ?൭ ?൮ ?൯)
-    (;; Inscript-extra (4)  (#, $, ^, *, ])
-     "്ര" "ര്" "ത്ര" "ശ്ര" nil)))
+     ?൦ ?൧ ?൨ ?൩ ?൪ ?൫ ?൬ ?൭ ?൮ ?൯)))

 (defvar indian-tml-base-table
   '(
@@ -323,6 +324,29 @@ indian-itrans-v5-table-for-tamil
     (;; misc -- 7
      ".N" (".n" "M") "H" ".a" ".h" ("AUM" "OM") "..")))

+(defvar indian-mlm-mozhi-table
+  '(;; for encode/decode
+    (;; vowels -- 18
+     "a" ("aa" "A") "i" ("ii" "I") "u" ("uu" "U")
+     "R" "Ll" "Lll" ("E" "ae") "e" "ai"
+     nil  "o"   "O"   "au"  "~" "RR")
+    (;; consonants -- 40
+     ("k" "c")   "kh"  "g"   "gh"  "ng"
+     "ch" ("Ch" "chh") "j" "jh" "nj"
+     "T"   "Th"  "D"   "Dh"  "N"
+     "th"  "thh" "d"   "dh"  "n"   nil
+     "p"   ("ph" "f")  "b"   "bh"  "m"
+     "y"   "r"   "rr"  "l"  "L" "zh" ("v" "w")
+     ("S" "z") "sh" "s" "h"
+     nil nil nil nil nil nil nil nil
+     nil "X"
+     ;; some of these are extra to Mozhi
+     ("t" "tt") "nt" "tth" "tthh" "nnj" "nng" "nn"
+     "nch" "nc" "nk" "cch" "cchh" "cc"
+     "B" ("C" "K" "q") "G" "J" "M" "P" "V" "x" "Z")
+    (;; misc -- 7
+     nil nil "H")))
+
 (defvar indian-kyoto-harvard-table
   '(;; for encode/decode
     (;; vowel
@@ -524,6 +548,10 @@ indian-mlm-itrans-v5-hash
   (indian-make-hash indian-mlm-base-table
 			  indian-itrans-v5-table))

+(defvar indian-mlm-mozhi-hash
+  (indian-make-hash indian-mlm-base-table
+			  indian-mlm-mozhi-table))
+
 (defvar indian-tml-itrans-v5-hash
   (indian-make-hash indian-tml-base-table
 			  indian-itrans-v5-table-for-tamil))
diff --git a/lisp/leim/quail/indian.el b/lisp/leim/quail/indian.el
index 2681eab0e5..9724d2d4a6 100644
--- a/lisp/leim/quail/indian.el
+++ b/lisp/leim/quail/indian.el
@@ -117,6 +117,7 @@ "\\''"
  indian-knd-itrans-v5-hash "kannada-itrans" "Kannada" "KndIT"
  "Kannada transliteration by ITRANS method.")

+;; ITRANS not applicable to Malayalam & could be removed eventually
 (if nil
     (quail-define-package "malayalam-itrans" "Malayalam" "MlmIT" t "Malayalam ITRANS"))
 (quail-define-indian-trans-package
@@ -358,24 +359,21 @@ inscript-mlm-keytable
   '(
     (;; VOWELS  (18)
      (?D nil) (?E ?e) (?F ?f) (?R ?r) (?G ?g) (?T ?t)
-     (?+ ?=) ("F]" "f]") (?! ?@) (?S ?s) (?Z ?z) (?W ?w)
-     (?| ?\\) (?~ ?`) (?A ?a) (?Q ?q) ("+]" "=]") ("R]" "r]"))
+     (?= ?+) nil nil (?S ?s) (?Z ?z) (?W ?w)
+     nil (?~ ?`) (?A ?a) (?Q ?q))
     (;; CONSONANTS (42)
      ?k ?K ?i ?I ?U                ;; GRUTTALS
      ?\; ?: ?p ?P ?}               ;; PALATALS
      ?' ?\" ?\[ ?{ ?C              ;; CEREBRALS
-     ?l ?L ?o ?O ?v ?V             ;; DENTALS
+     ?l ?L ?o ?O ?v nil            ;; DENTALS
      ?h ?H ?y ?Y ?c                ;; LABIALS
-     ?/ ?j ?J ?n ?N "N]" ?b        ;; SEMIVOWELS
+     ?/ ?j ?J ?n ?N ?B ?b          ;; SEMIVOWELS
      ?M ?< ?m ?u                   ;; SIBILANTS
-     "k]" "K]" "i]" "p]" "[]" "{]" "H]" "/]" ;; NUKTAS
-     ?% ?&)
+     nil nil nil nil nil nil nil nil nil) ;; NUKTAS
     (;; Misc Symbols (7)
-     ?X ?x ?_ ">]" ?d "X]" ?>)
+     nil ?x ?_ nil ?d)
     (;; Digits
-     ?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9)
-    (;; Inscripts
-     ?# ?$ ?^ ?* ?\])))
+     ?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9)))

 (defvar inscript-tml-keytable
   '(
@@ -463,6 +461,21 @@ inscript-tml-keytable
  "malayalam-inscript" "Malayalam" "MlmIS"
  "Malayalam keyboard Inscript.")

+;; Chillus
+(quail-defrule "Cd" ["ണ്"])
+(quail-defrule "Cd]" ?ൺ)
+(quail-defrule "vd" ["ന്"])
+(quail-defrule "vd]" ?ൻ)
+(quail-defrule "jd" ["ര്"])
+(quail-defrule "jd]" ?ർ)
+(quail-defrule "nd" ["ല്"])
+(quail-defrule "nd]" ?ൽ)
+(quail-defrule "Nd" ["ള്"])
+(quail-defrule "Nd]" ?ൾ)
+
+(quail-defrule "\\" ?‌)
+(quail-defrule "X" ?​)
+
 (if nil
     (quail-define-package "tamil-inscript" "Tamil" "TmlIS" t "Tamil keyboard Inscript"))
 (quail-define-inscript-package
@@ -571,4 +584,72 @@ inscript-tml-keytable
   ("?" ?\?)
   ("/" ?্))

+(defun indian-mlm-mozhi-update-translation (control-flag)
+  (let ((len (length quail-current-key)) chillu
+	(vowels '(?a ?e ?i ?o ?u ?A ?E ?I ?O ?U ?R)))
+    (cond ((numberp control-flag)
+	   (progn (if (= control-flag 0)
+		      (setq quail-current-str quail-current-key)
+		    (cond (input-method-exit-on-first-char)
+			  ((and (memq (aref quail-current-key
+					    (1- control-flag))
+				      vowels)
+				(setq chillu (cl-position
+					      (aref quail-current-key
+						    control-flag)
+					      '(?m ?N ?n ?r ?l ?L))))
+			   ;; conditions for putting chillu
+			   (and (or (and (= control-flag (1- len))
+					 (not (setq control-flag nil)))
+				    (and (= control-flag (- len 2))
+					 (let ((temp (aref quail-current-key
+							   (1- len))))
+                                           ;; is it last char of word?
+					   (not
+					    (or (and (>= temp ?a) (<= temp ?z))
+						(and (>= temp ?A) (<= temp ?Z))
+						(eq temp ?~))))
+					 (setq control-flag (1+ control-flag))))
+				(setq quail-current-str     ;; put chillu
+				      (concat (if (not (stringp
+							quail-current-str))
+						  (string quail-current-str)
+						quail-current-str)
+					      (string
+					       (nth chillu '(?ം ?ൺ ?ൻ ?ർ ?ൽ ?ൾ)))))))))
+		  (and (not input-method-exit-on-first-char) control-flag
+		       (while (> len control-flag)
+			 (setq len (1- len))
+			 (setq unread-command-events
+			       (cons (aref quail-current-key len)
+				     unread-command-events))))
+		  ))
+	  ((null control-flag)
+	   (unless quail-current-str
+	     (setq quail-current-str quail-current-key)
+	     ))
+	  ((equal control-flag t)
+	   (if (memq (aref quail-current-key (1- len))  ;; If vowel ending,
+		     vowels)                            ;; may have to put
+	       (setq control-flag nil)))))              ;; chillu. So don't
+  control-flag)                                         ;; end translation
+
+(quail-define-package "malayalam-mozhi" "Malayalam" "MlmMI" t
+                      "Malayalam transliteration by Mozhi method."
+                      nil nil t nil nil nil t nil
+                      'indian-mlm-mozhi-update-translation)
+
+(maphash
+ (lambda (key val)
+   (quail-defrule key (if (= (length val) 1)
+			  (string-to-char val)
+			(vector val))))
+ (cdr indian-mlm-mozhi-hash))
+
+(defun indian-mlm-mozhi-underscore (key len) (throw 'quail-tag nil))
+
+(quail-defrule "_" 'indian-mlm-mozhi-underscore)
+(quail-defrule "|" ?‌)
+(quail-defrule "||" ?​)
+
 ;;; indian.el ends here
--
2.20.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH] Improve Malayalam language transliteration
  2020-04-26 17:12     ` Eli Zaretskii
  2020-04-26 22:11       ` James Thomas
  2020-04-27  1:33       ` James Thomas
@ 2020-04-27  2:42       ` James Thomas
  2020-05-28 17:33         ` Eli Zaretskii
  2 siblings, 1 reply; 17+ messages in thread
From: James Thomas @ 2020-04-27  2:42 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: emacs-devel

[-- Attachment #1: Type: text/plain, Size: 796 bytes --]

Eli Zaretskii <eliz@gnu.org> writes:

>
> That's okay, but we don't like deleting existing capabilities without
> some period during which it's deprecated.  So maybe say in NEWS that
> ITRANS is deprecated, and add a comment in quail/indian.el that it
> should be removed in some future release.
>
>> Sorry I missed the reference in lisp/language/indian.el. It seems to be
>> just defining the default input method for the language. Can be changed
>> easily if decided.
>
> If you think this new method is much better, it's okay to make it the
> default, I think.  Just mention that in NEWS.
>
> Thanks.
>
> P.S. And please keep the list address on the CC, so that this
> discussion is recorded.

Sorry for the bother, but this newer patch implements the Inscript stuff
in an easier way.

--
Jim

[-- Attachment #2: 0003-Improve-Malayalam-language-transliteration.patch --]
[-- Type: text/x-diff, Size: 9310 bytes --]

From 0b655da7d98d6ff5c6211d1a56e879ac291f9c34 Mon Sep 17 00:00:00 2001
From: James Thomas <jimjoe@gmx.net>
Date: Mon, 27 Apr 2020 08:06:48 +0530
Subject: [PATCH] Improve Malayalam language transliteration

The existing ITRANS scheme does not support some characters and
language quirks like 'chillu's.  The Inscript method has errors.
* lisp/language/ind-util.el
  (indian-mlm-base-table): + archaic chars, Mozhi combos; cleanup.
  (indian-mlm-mozhi-table): For new scheme Mozhi.
* lisp/leim/quail/indian.el
  (inscript-mlm-keytable): Correct errors.
  Add Inscript chillus & zero-width chars, Mozhi scheme.
* etc/NEWS: Mention change
Add a sufficient implementation of the Mozhi scheme.  Complete Inscript
implementation.
---
 etc/NEWS                  |  7 +++
 lisp/language/ind-util.el | 40 +++++++++++++++---
 lisp/leim/quail/indian.el | 89 +++++++++++++++++++++++++++++++++++----
 3 files changed, 122 insertions(+), 14 deletions(-)

diff --git a/etc/NEWS b/etc/NEWS
index 025d5c14a7..aa551177d1 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -288,6 +288,13 @@ prefix on the Subject line in various languages.
 These new navigation commands are bound to 'n' and 'p' in
 'apropos-mode'.

+** Quail
+
+---
+*** Improved Malayalam language transliteration
+Added new Mozhi scheme. The inapplicable ITRANS scheme is now
+deprecated. Errors in Inscript method corrected.
+
 \f
 * New Modes and Packages in Emacs 28.1

diff --git a/lisp/language/ind-util.el b/lisp/language/ind-util.el
index 4319e5537e..62885227f1 100644
--- a/lisp/language/ind-util.el
+++ b/lisp/language/ind-util.el
@@ -232,8 +232,8 @@ indian-mlm-base-table
   '(
     (;; VOWELS
      (?അ nil) (?ആ ?ാ) (?ഇ ?ി) (?ഈ ?ീ) (?ഉ ?ു) (?ഊ ?ൂ)
-     (?ഋ ?ൃ) (?ഌ nil) nil (?ഏ ?േ) (?എ ?െ) (?ഐ ?ൈ)
-     nil (?ഓ ?ോ) (?ഒ ?ൊ) (?ഔ ?ൌ) nil nil)
+     (?ഋ ?ൃ) (?ഌ ?ൢ) (?ൡ ?ൣ) (?ഏ ?േ) (?എ ?െ) (?ഐ ?ൈ)
+     nil (?ഒ ?ൊ) (?ഓ ?ോ) (?ഔ ?ൗ) (?് ?്) (?ൠ ?ൄ))
     (;; CONSONANTS
      ?ക ?ഖ ?ഗ ?ഘ ?ങ                  ;; GUTTRULS
      ?ച ?ഛ ?ജ ?ഝ ?ഞ                  ;; PALATALS
@@ -243,13 +243,16 @@ indian-mlm-base-table
      ?യ ?ര ?റ ?ല ?ള ?ഴ ?വ          ;; SEMIVOWELS
      ?ശ ?ഷ ?സ ?ഹ                    ;; SIBILANTS
      nil nil nil nil nil nil nil nil      ;; NUKTAS
-     "ജ്ഞ" "ക്ഷ")
+     "ജ്ഞ" "ക്ഷ"
+     "റ്റ" "ന്റ" "ത്ത" "ത്ഥ" "ഞ്ഞ" "ങ്ങ" "ന്ന"
+     "ഞ്ച" "ന്ക" "ങ്ക" "ച്ച" "ച്ഛ" "ക്ക"
+     "ബ്ബ" "ക്ക" "ഗ്ഗ" "ജ്ജ" "മ്മ" "പ്പ" "വ്വ" "ക്സ" "ശ്ശ")
     (;; Misc Symbols
      nil ?ം ?ഃ nil ?് nil nil)
     (;; Digits
      ?൦ ?൧ ?൨ ?൩ ?൪ ?൫ ?൬ ?൭ ?൮ ?൯)
-    (;; Inscript-extra (4)  (#, $, ^, *, ])
-     "്ര" "ര്" "ത്ര" "ശ്ര" nil)))
+    (;; Chillus
+     "ണ്" ?ൺ "ന്" ?ൻ "ര്" ?ർ "ല്" ?ൽ "ള്" ?ൾ)))

 (defvar indian-tml-base-table
   '(
@@ -323,6 +326,29 @@ indian-itrans-v5-table-for-tamil
     (;; misc -- 7
      ".N" (".n" "M") "H" ".a" ".h" ("AUM" "OM") "..")))

+(defvar indian-mlm-mozhi-table
+  '(;; for encode/decode
+    (;; vowels -- 18
+     "a" ("aa" "A") "i" ("ii" "I") "u" ("uu" "U")
+     "R" "Ll" "Lll" ("E" "ae") "e" "ai"
+     nil  "o"   "O"   "au"  "~" "RR")
+    (;; consonants -- 40
+     ("k" "c")   "kh"  "g"   "gh"  "ng"
+     "ch" ("Ch" "chh") "j" "jh" "nj"
+     "T"   "Th"  "D"   "Dh"  "N"
+     "th"  "thh" "d"   "dh"  "n"   nil
+     "p"   ("ph" "f")  "b"   "bh"  "m"
+     "y"   "r"   "rr"  "l"  "L" "zh" ("v" "w")
+     ("S" "z") "sh" "s" "h"
+     nil nil nil nil nil nil nil nil
+     nil "X"
+     ;; some of these are extra to Mozhi
+     ("t" "tt") "nt" "tth" "tthh" "nnj" "nng" "nn"
+     "nch" "nc" "nk" "cch" "cchh" "cc"
+     "B" ("C" "K" "q") "G" "J" "M" "P" "V" "x" "Z")
+    (;; misc -- 7
+     nil nil "H")))
+
 (defvar indian-kyoto-harvard-table
   '(;; for encode/decode
     (;; vowel
@@ -524,6 +550,10 @@ indian-mlm-itrans-v5-hash
   (indian-make-hash indian-mlm-base-table
 			  indian-itrans-v5-table))

+(defvar indian-mlm-mozhi-hash
+  (indian-make-hash indian-mlm-base-table
+			  indian-mlm-mozhi-table))
+
 (defvar indian-tml-itrans-v5-hash
   (indian-make-hash indian-tml-base-table
 			  indian-itrans-v5-table-for-tamil))
diff --git a/lisp/leim/quail/indian.el b/lisp/leim/quail/indian.el
index 2681eab0e5..100ae63f6a 100644
--- a/lisp/leim/quail/indian.el
+++ b/lisp/leim/quail/indian.el
@@ -117,6 +117,7 @@ "\\''"
  indian-knd-itrans-v5-hash "kannada-itrans" "Kannada" "KndIT"
  "Kannada transliteration by ITRANS method.")

+;; ITRANS not applicable to Malayalam & could be removed eventually
 (if nil
     (quail-define-package "malayalam-itrans" "Malayalam" "MlmIT" t "Malayalam ITRANS"))
 (quail-define-indian-trans-package
@@ -358,24 +359,23 @@ inscript-mlm-keytable
   '(
     (;; VOWELS  (18)
      (?D nil) (?E ?e) (?F ?f) (?R ?r) (?G ?g) (?T ?t)
-     (?+ ?=) ("F]" "f]") (?! ?@) (?S ?s) (?Z ?z) (?W ?w)
-     (?| ?\\) (?~ ?`) (?A ?a) (?Q ?q) ("+]" "=]") ("R]" "r]"))
+     (?= ?+) nil nil (?S ?s) (?Z ?z) (?W ?w)
+     nil (?~ ?`) (?A ?a) (?Q ?q))
     (;; CONSONANTS (42)
      ?k ?K ?i ?I ?U                ;; GRUTTALS
      ?\; ?: ?p ?P ?}               ;; PALATALS
      ?' ?\" ?\[ ?{ ?C              ;; CEREBRALS
-     ?l ?L ?o ?O ?v ?V             ;; DENTALS
+     ?l ?L ?o ?O ?v nil            ;; DENTALS
      ?h ?H ?y ?Y ?c                ;; LABIALS
-     ?/ ?j ?J ?n ?N "N]" ?b        ;; SEMIVOWELS
+     ?/ ?j ?J ?n ?N ?B ?b          ;; SEMIVOWELS
      ?M ?< ?m ?u                   ;; SIBILANTS
-     "k]" "K]" "i]" "p]" "[]" "{]" "H]" "/]" ;; NUKTAS
-     ?% ?&)
+     nil nil nil nil nil nil nil nil nil) ;; NUKTAS
     (;; Misc Symbols (7)
-     ?X ?x ?_ ">]" ?d "X]" ?>)
+     nil ?x ?_ nil ?d)
     (;; Digits
      ?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9)
-    (;; Inscripts
-     ?# ?$ ?^ ?* ?\])))
+    (;; Chillus
+     "Cd" "Cd]" "vd" "vd]" "jd" "jd]" "nd" "nd]" "Nd" "Nd]")))

 (defvar inscript-tml-keytable
   '(
@@ -463,6 +463,9 @@ inscript-tml-keytable
  "malayalam-inscript" "Malayalam" "MlmIS"
  "Malayalam keyboard Inscript.")

+(quail-defrule "\\" ?‌)
+(quail-defrule "X" ?​)
+
 (if nil
     (quail-define-package "tamil-inscript" "Tamil" "TmlIS" t "Tamil keyboard Inscript"))
 (quail-define-inscript-package
@@ -571,4 +574,72 @@ inscript-tml-keytable
   ("?" ?\?)
   ("/" ?্))

+(defun indian-mlm-mozhi-update-translation (control-flag)
+  (let ((len (length quail-current-key)) chillu
+	(vowels '(?a ?e ?i ?o ?u ?A ?E ?I ?O ?U ?R)))
+    (cond ((numberp control-flag)
+	   (progn (if (= control-flag 0)
+		      (setq quail-current-str quail-current-key)
+		    (cond (input-method-exit-on-first-char)
+			  ((and (memq (aref quail-current-key
+					    (1- control-flag))
+				      vowels)
+				(setq chillu (cl-position
+					      (aref quail-current-key
+						    control-flag)
+					      '(?m ?N ?n ?r ?l ?L))))
+			   ;; conditions for putting chillu
+			   (and (or (and (= control-flag (1- len))
+					 (not (setq control-flag nil)))
+				    (and (= control-flag (- len 2))
+					 (let ((temp (aref quail-current-key
+							   (1- len))))
+                                           ;; is it last char of word?
+					   (not
+					    (or (and (>= temp ?a) (<= temp ?z))
+						(and (>= temp ?A) (<= temp ?Z))
+						(eq temp ?~))))
+					 (setq control-flag (1+ control-flag))))
+				(setq quail-current-str     ;; put chillu
+				      (concat (if (not (stringp
+							quail-current-str))
+						  (string quail-current-str)
+						quail-current-str)
+					      (string
+					       (nth chillu '(?ം ?ൺ ?ൻ ?ർ ?ൽ ?ൾ)))))))))
+		  (and (not input-method-exit-on-first-char) control-flag
+		       (while (> len control-flag)
+			 (setq len (1- len))
+			 (setq unread-command-events
+			       (cons (aref quail-current-key len)
+				     unread-command-events))))
+		  ))
+	  ((null control-flag)
+	   (unless quail-current-str
+	     (setq quail-current-str quail-current-key)
+	     ))
+	  ((equal control-flag t)
+	   (if (memq (aref quail-current-key (1- len))  ;; If vowel ending,
+		     vowels)                            ;; may have to put
+	       (setq control-flag nil)))))              ;; chillu. So don't
+  control-flag)                                         ;; end translation
+
+(quail-define-package "malayalam-mozhi" "Malayalam" "MlmMI" t
+                      "Malayalam transliteration by Mozhi method."
+                      nil nil t nil nil nil t nil
+                      'indian-mlm-mozhi-update-translation)
+
+(maphash
+ (lambda (key val)
+   (quail-defrule key (if (= (length val) 1)
+			  (string-to-char val)
+			(vector val))))
+ (cdr indian-mlm-mozhi-hash))
+
+(defun indian-mlm-mozhi-underscore (key len) (throw 'quail-tag nil))
+
+(quail-defrule "_" 'indian-mlm-mozhi-underscore)
+(quail-defrule "|" ?‌)
+(quail-defrule "||" ?​)
+
 ;;; indian.el ends here
--
2.20.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH] Improve Malayalam language transliteration
  2020-04-27  2:42       ` James Thomas
@ 2020-05-28 17:33         ` Eli Zaretskii
  2020-05-29 14:57           ` James Thomas
                             ` (2 more replies)
  0 siblings, 3 replies; 17+ messages in thread
From: Eli Zaretskii @ 2020-05-28 17:33 UTC (permalink / raw)
  To: James Thomas; +Cc: emacs-devel

> From: James Thomas <jimjoe@gmx.net>
> Cc: emacs-devel@gnu.org
> Date: Mon, 27 Apr 2020 08:12:03 +0530
> 
> Sorry for the bother, but this newer patch implements the Inscript stuff
> in an easier way.

Thanks, I pushed this to the master branch.



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Improve Malayalam language transliteration
  2020-05-28 17:33         ` Eli Zaretskii
@ 2020-05-29 14:57           ` James Thomas
  2020-06-01  8:49           ` James Thomas
  2020-07-15 12:11           ` James Thomas
  2 siblings, 0 replies; 17+ messages in thread
From: James Thomas @ 2020-05-29 14:57 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: emacs-devel

Eli Zaretskii <eliz@gnu.org> writes:

>> From: James Thomas <jimjoe@gmx.net>
>> Cc: emacs-devel@gnu.org
>> Date: Mon, 27 Apr 2020 08:12:03 +0530
>>
>> Sorry for the bother, but this newer patch implements the Inscript stuff
>> in an easier way.
>
> Thanks, I pushed this to the master branch.

Thank you!



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Improve Malayalam language transliteration
  2020-05-28 17:33         ` Eli Zaretskii
  2020-05-29 14:57           ` James Thomas
@ 2020-06-01  8:49           ` James Thomas
  2020-06-01 15:02             ` Eli Zaretskii
  2020-07-15 12:11           ` James Thomas
  2 siblings, 1 reply; 17+ messages in thread
From: James Thomas @ 2020-06-01  8:49 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: emacs-devel

Eli Zaretskii <eliz@gnu.org> writes:

>> From: James Thomas <jimjoe@gmx.net>
>> Cc: emacs-devel@gnu.org
>> Date: Mon, 27 Apr 2020 08:12:03 +0530
>>
>> Sorry for the bother, but this newer patch implements the Inscript stuff
>> in an easier way.
>
> Thanks, I pushed this to the master branch.

Just checked git and I think there's a problem - I can't see the changes
to 'indian.el'.



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Improve Malayalam language transliteration
  2020-06-01  8:49           ` James Thomas
@ 2020-06-01 15:02             ` Eli Zaretskii
  2020-06-02  2:27               ` James Thomas
  0 siblings, 1 reply; 17+ messages in thread
From: Eli Zaretskii @ 2020-06-01 15:02 UTC (permalink / raw)
  To: James Thomas; +Cc: emacs-devel

> From: James Thomas <jimjoe@gmx.net>
> Cc: emacs-devel@gnu.org
> Date: Mon, 01 Jun 2020 14:19:03 +0530
> 
> Eli Zaretskii <eliz@gnu.org> writes:
> 
> > Thanks, I pushed this to the master branch.
> 
> Just checked git and I think there's a problem - I can't see the changes
> to 'indian.el'.

Did you check the master branch?  If so, perhaps your branch is not in
sync: I mistakenly left out indian.el from the original commit, but
pushed it later (3 days ago, to be correct).

Sorry about the messup.



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Improve Malayalam language transliteration
  2020-06-01 15:02             ` Eli Zaretskii
@ 2020-06-02  2:27               ` James Thomas
  0 siblings, 0 replies; 17+ messages in thread
From: James Thomas @ 2020-06-02  2:27 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: emacs-devel

Eli Zaretskii <eliz@gnu.org> writes:

> Did you check the master branch?  If so, perhaps your branch is not in
> sync: I mistakenly left out indian.el from the original commit, but
> pushed it later (3 days ago, to be correct).
>
> Sorry about the messup.

Got it (I had checked only the 1st commit). Works fine at my end, btw.



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Improve Malayalam language transliteration
  2020-05-28 17:33         ` Eli Zaretskii
  2020-05-29 14:57           ` James Thomas
  2020-06-01  8:49           ` James Thomas
@ 2020-07-15 12:11           ` James Thomas
  2020-07-15 14:29             ` Eli Zaretskii
  2 siblings, 1 reply; 17+ messages in thread
From: James Thomas @ 2020-07-15 12:11 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: emacs-devel

[-- Attachment #1: Type: text/plain, Size: 221 bytes --]

Please consider this new patch (to my own code) which uses strings
rather than (expensive) lists to represent constant char sequences.

Eli Zaretskii <eliz@gnu.org> writes:

> Thanks, I pushed this to the master branch.


[-- Attachment #2: 0001-indian-mlm-mozhi-update-translation-Use-strings-for-.patch --]
[-- Type: text/x-diff, Size: 2521 bytes --]

From 7e3007d5d08384e7c4c2e93d6e20ab4fc435d3fd Mon Sep 17 00:00:00 2001
From: James Thomas <jimjoe@gmx.net>
Date: Wed, 15 Jul 2020 17:29:08 +0530
Subject: [PATCH] indian-mlm-mozhi-update-translation: Use strings for constant
 sequences

* lisp/leim/quail/indian.el (indian-mlm-mozhi-update-translation):
Use strings rather than lists for constant sequences.
Copyright-paperwork-exempt: yes
---
 lisp/leim/quail/indian.el | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/lisp/leim/quail/indian.el b/lisp/leim/quail/indian.el
index 100ae63f6a..e1322ddc1e 100644
--- a/lisp/leim/quail/indian.el
+++ b/lisp/leim/quail/indian.el
@@ -576,18 +576,18 @@ "X"
 
 (defun indian-mlm-mozhi-update-translation (control-flag)
   (let ((len (length quail-current-key)) chillu
-	(vowels '(?a ?e ?i ?o ?u ?A ?E ?I ?O ?U ?R)))
+	(vowels "aeiouAEIOUR"))
     (cond ((numberp control-flag)
 	   (progn (if (= control-flag 0)
 		      (setq quail-current-str quail-current-key)
 		    (cond (input-method-exit-on-first-char)
-			  ((and (memq (aref quail-current-key
-					    (1- control-flag))
-				      vowels)
+			  ((and (cl-find (aref quail-current-key
+					       (1- control-flag))
+				         vowels)
 				(setq chillu (cl-position
 					      (aref quail-current-key
 						    control-flag)
-					      '(?m ?N ?n ?r ?l ?L))))
+					      "mNnrlL")))
 			   ;; conditions for putting chillu
 			   (and (or (and (= control-flag (1- len))
 					 (not (setq control-flag nil)))
@@ -606,7 +606,7 @@ indian-mlm-mozhi-update-translation
 						  (string quail-current-str)
 						quail-current-str)
 					      (string
-					       (nth chillu '(?ം ?ൺ ?ൻ ?ർ ?ൽ ?ൾ)))))))))
+					       (aref "ംൺൻർൽൾ" chillu))))))))
 		  (and (not input-method-exit-on-first-char) control-flag
 		       (while (> len control-flag)
 			 (setq len (1- len))
@@ -619,8 +619,8 @@ indian-mlm-mozhi-update-translation
 	     (setq quail-current-str quail-current-key)
 	     ))
 	  ((equal control-flag t)
-	   (if (memq (aref quail-current-key (1- len))  ;; If vowel ending,
-		     vowels)                            ;; may have to put
+	   (if (cl-find (aref quail-current-key (1- len));; If vowel ending,
+		        vowels)                          ;; may have to put
 	       (setq control-flag nil)))))              ;; chillu. So don't
   control-flag)                                         ;; end translation
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH] Improve Malayalam language transliteration
  2020-07-15 12:11           ` James Thomas
@ 2020-07-15 14:29             ` Eli Zaretskii
  2020-07-16  3:37               ` James Thomas
  0 siblings, 1 reply; 17+ messages in thread
From: Eli Zaretskii @ 2020-07-15 14:29 UTC (permalink / raw)
  To: James Thomas; +Cc: emacs-devel

> From: James Thomas <jimjoe@gmx.net>
> Cc: emacs-devel@gnu.org
> Date: Wed, 15 Jul 2020 17:41:02 +0530
> 
> Please consider this new patch (to my own code) which uses strings
> rather than (expensive) lists to represent constant char sequences.

Why do you think lists are more expensive than strings in this case.
Did you have a chance to measure the performance and saw a significant
difference?

Thanks.



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Improve Malayalam language transliteration
  2020-07-15 14:29             ` Eli Zaretskii
@ 2020-07-16  3:37               ` James Thomas
  2020-07-16  5:20                 ` James Thomas
  0 siblings, 1 reply; 17+ messages in thread
From: James Thomas @ 2020-07-16  3:37 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: emacs-devel

Eli Zaretskii <eliz@gnu.org> writes:

> Why do you think lists are more expensive than strings in this case.

These are constants in the code, and I reasoned that a (contiguous)
string representation would use less memory and be easier to lookup.

The only reason I didn't think of this at first was that char-tables in
indian.el & ind-util.el used lists rather than vectors, but I don't know
why, honestly.

> Did you have a chance to measure the performance and saw a significant
> difference?

I have done it now by running this 5 times in succession:
./emacs -Q --batch --eval '(progn (set-input-method "malayalam-mozhi") (message "%s %s" (benchmark-run 1000 (self-insert-command ?U) (self-insert-command ?L)) (memory-use-counts)))'

For benchmark-run:

Before:
(0.0038055209999999996 0 0.0)
(0.0037328350000000003 0 0.0)
(0.003799021 0 0.0)
(0.0038411929999999997 0 0.0)
(0.003767182 0 0.0)
After:
(0.00391531 0 0.0)
(0.003733149 0 0.0)
(0.0037935509999999996 0 0.0)
(0.003811992 0 0.0)
(0.0038083170000000003 0 0.0)

Seems to be either no improvement or a slight degradation in timings...

For memory-use-counts:

Before:
(2237361 649 4075372 17010 1612818 4995 86731)
After:
(2237339 649 4075373 17010 1612855 4995 86734)

Note the difference in conses (1st item), vector cells (3rd),
string-chars (5th) and strings (last). Accounting for their unit sizes
the benefit is:
(2237361-2237339)*16+(4075372-4075373)*16+(1612818-1612855)*1+(86731-86734)*32
=203 bytes

Not sure if this is significant. I'll defer to your judgement...



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Improve Malayalam language transliteration
  2020-07-16  3:37               ` James Thomas
@ 2020-07-16  5:20                 ` James Thomas
  2020-07-16  5:44                   ` James Thomas
  0 siblings, 1 reply; 17+ messages in thread
From: James Thomas @ 2020-07-16  5:20 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: emacs-devel

James Thomas <jimjoe@gmx.net> writes:

> I have done it now by running this 5 times in succession:
> ./emacs -Q --batch --eval '(progn (set-input-method "malayalam-mozhi") (message "%s %s" (benchmark-run 1000 (self-insert-command ?U) (self-insert-command ?L)) (memory-use-counts)))'

Sorry I made a mistake with the self-insert-command. I tried it again
with this:

./emacs -Q --batch --eval '(progn (set-input-method "malayalam-mozhi") (message "%s %s" (benchmark-run 1000 (self-insert-command 1 ?U) (self-insert-command 1 ?L)) (memory-use-counts)))'

Before
(0.028942057 1 0.018573165000000003)
(0.028422779 1 0.018723127000000006)
(0.027688517 1 0.01788452900000001)
(0.027688517 1 0.01788452900000001)
(0.028487356000000002 1 0.01823290999999999)

After
(0.027500888 1 0.017777874)
(0.027439256999999998 1 0.017800450999999995)
(0.027768902999999998 1 0.017677656000000014)
(0.027987988 1 0.018469169000000007)
(0.028140825 1 0.018314785)

> Seems to be either no improvement or a slight degradation in timings...

So for runtime it seems either better or unchanged.



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Improve Malayalam language transliteration
  2020-07-16  5:20                 ` James Thomas
@ 2020-07-16  5:44                   ` James Thomas
  2020-07-16 15:07                     ` Eli Zaretskii
  0 siblings, 1 reply; 17+ messages in thread
From: James Thomas @ 2020-07-16  5:44 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: emacs-devel

James Thomas <jimjoe@gmx.net> writes:

> So for runtime it seems either better or unchanged.

Oops, and for memory...

Before
(2239385 650 4085383 17010 1612853 4995 86735)
After
(2239363 650 4085384 17010 1612901 4995 86738)

(2239385-2239363)*16+(4085383-4085384)*16+(1612853-1612901)*1+(86735-86738)*32
=192 bytes saved.



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] Improve Malayalam language transliteration
  2020-07-16  5:44                   ` James Thomas
@ 2020-07-16 15:07                     ` Eli Zaretskii
  0 siblings, 0 replies; 17+ messages in thread
From: Eli Zaretskii @ 2020-07-16 15:07 UTC (permalink / raw)
  To: James Thomas; +Cc: emacs-devel

> From: James Thomas <jimjoe@gmx.net>
> Cc: emacs-devel@gnu.org
> Date: Thu, 16 Jul 2020 11:14:49 +0530
> 
> James Thomas <jimjoe@gmx.net> writes:
> 
> > So for runtime it seems either better or unchanged.
> 
> Oops, and for memory...
> 
> Before
> (2239385 650 4085383 17010 1612853 4995 86735)
> After
> (2239363 650 4085384 17010 1612901 4995 86738)
> 
> (2239385-2239363)*16+(4085383-4085384)*16+(1612853-1612901)*1+(86735-86738)*32
> =192 bytes saved.

Thanks, the differences are insignificant, IMO, and I personally
prefer the current code which uses lists.  Does someone think that
switching to strings will be better?



^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2020-07-16 15:07 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-26 13:49 [PATCH] Improve Malayalam language transliteration James Thomas
2020-04-26 15:43 ` Eli Zaretskii
     [not found]   ` <87tv161aml.fsf@gmx.net>
2020-04-26 17:12     ` Eli Zaretskii
2020-04-26 22:11       ` James Thomas
2020-04-27  1:33       ` James Thomas
2020-04-27  2:42       ` James Thomas
2020-05-28 17:33         ` Eli Zaretskii
2020-05-29 14:57           ` James Thomas
2020-06-01  8:49           ` James Thomas
2020-06-01 15:02             ` Eli Zaretskii
2020-06-02  2:27               ` James Thomas
2020-07-15 12:11           ` James Thomas
2020-07-15 14:29             ` Eli Zaretskii
2020-07-16  3:37               ` James Thomas
2020-07-16  5:20                 ` James Thomas
2020-07-16  5:44                   ` James Thomas
2020-07-16 15:07                     ` Eli Zaretskii

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).