;;; tai-tham.el --- support for Tai Tham -*- coding: utf-8 -*- ;; Copyright (C) 2008, 2009, 2010, 2011 ;; National Institute of Advanced Industrial Science and Technology (AIST) ;; Registration Number H13PRO009 ;; Keywords: multilingual, Tai Tham, i18n ;; This file is part of GNU Emacs. ;; GNU Emacs is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation, either version 3 of the License, or ;; (at your option) any later version. ;; GNU Emacs is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License ;; along with GNU Emacs. If not, see . ;;; Code: ;; (set-language-info-alist ;; "Northern Thai" '((charset unicode) ;; (coding-system utf-8) ;; (coding-priority utf-8) ;; (sample-text . ;; "Northern Thai (ᨣᩣᩴᨾᩮᩬᩥᨦ / ᨽᩣᩈᩣᩃ᩶ᩣ᩠ᨶᨶᩣ) ᩈ᩠ᩅᩢᩔ᩠ᨯᩦᨣᩕᩢ᩠ᨸ") ;; (documentation . t))) ;; To load: ;; (load-file "~/tham/tai-tham.el") tai-tham-composable-pattern ;; (defvar tai-tham-composable-pattern (let ((table ;; C is letters, independent vowels, digits, punctuation and symbols. '(("C" . "[\u1A20-\u1A54\u1A80-\u1A89\u1A90-\u1A99\u1AA0-\u1AAD]") ("M" . "[\u1A55-\u1A57\u1A59-\u1A5E\u1A61-\u1A7C\u1A7F]"); Mark ("H" . "\u1A60") ; sakot ("S" . "[\u1A75-\u1A7C]") ; Marks commuting with sakot ("N" . "\u1A58"))) ; mai kang lai ;; The definition of a sequence of interacting Tai Tham characters is ;; surprisingly complicated. The basic syllable structure should just be: ;; ;; C(M|HC)* ;; ;; There are three complications: ;; ;; 1. Emacs uses a backtracking regular expression engine, but it only ;; backtracks if the characters accepted so far don't only match the regular ;; expression. Thus if M includes sakot, CHC will be parsed as CH and then ;; C - there is no cause to backtrack! On the other hand, missing consonants ;; should not disrupt display - the glyph for sakot will normally alert the ;; user that text entry is incomplete. ;; ;; 2. Some characters can be swapped round with sakot without changing the ;; signification of the sequence of characters. The regular expression ;; works with strings of characters rather than traces of fully decomposed ;; characters subject to Unicode's canonical equivalence. ;; ;; 3. Which syllable mai kang lai belongs to depends on the font. Again, if ;; M included mai kang lai, CNC would be parsed as CN and C. The word ;; ᨴᩘ᩠ᩃᩣ᩠ᨿ has mai kang lai in the middle of an orthographic syllable. ; (basic_syllable "C\\(N*\\(M\\|HS*C?\\)\\)*") (basic_syllable "C\\(N*\\(M\\|HS*C\\)\\)*") (regexp "X\\(N\\(X\\)?\\)*H?")) ; X is basic syllable (let ((case-fold-search nil)) (setq regexp (replace-regexp-in-string "X" basic_syllable regexp t t)) (dolist (elt table) (setq regexp (replace-regexp-in-string (car elt) (cdr elt) regexp t t)))) regexp)) ; Failed attempt to get proper composition for incomplete word ᨴᩘ᩠ᩃᩣ᩠. ;(let ((elt (list (vector tai-tham-composable-pattern 3 'font-shape-gstring) ; (vector tai-tham-composable-pattern 2 'font-shape-gstring) ; (vector tai-tham-composable-pattern 1 'font-shape-gstring) ; (vector tai-tham-composable-pattern 0 'font-shape-gstring) ; (vector "." 0 'font-shape-gstring) ; ))) ; (set-char-table-range composition-function-table '(#x1A20 . #x1AAD) elt)) (let ((elt (list (vector tai-tham-composable-pattern 0 'font-shape-gstring) (vector "." 0 'font-shape-gstring) ))) (set-char-table-range composition-function-table '(#x1A20 . #x1AAD) elt))