unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
blob 5a7867f14e8bc976aa489593de9d480f4015a512 13345 bytes (raw)
name: lisp/language/misc-lang.el 	 # note: path name is non-authoritative(*)

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
 
;;; misc-lang.el --- support for miscellaneous languages (characters)  -*- lexical-binding: t; -*-

;; Copyright (C) 2012-2023 Free Software Foundation, Inc.
;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
;;   2005, 2006, 2007, 2008, 2009, 2010, 2011
;;   National Institute of Advanced Industrial Science and Technology (AIST)
;;   Registration Number H14PRO021

;; Keywords: multilingual, character set, coding system

;; This file is part of GNU Emacs.

;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.

;;; Commentary:

;;; Code:

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; IPA (International Phonetic Alphabet)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(set-language-info-alist
 "IPA" '((charset . (ipa))
	 (coding-priority utf-8)
	 (coding-system utf-8)
	 (input-method . "ipa")
	 (nonascii-translation . ipa)
	 (documentation . "\
IPA is International Phonetic Alphabet for English, French, German
and Italian.")))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Arabic
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(define-coding-system 'iso-8859-6
  "ISO-8859-6 based encoding (MIME:ISO-8859-6)."
  :coding-type 'charset
  :mnemonic ?6
  :charset-list '(iso-8859-6)
  :mime-charset 'iso-8859-6)

(define-coding-system 'windows-1256
  "windows-1256 (Arabic) encoding (MIME: WINDOWS-1256)"
  :coding-type 'charset
  :mnemonic ?A
  :charset-list '(windows-1256)
  :mime-charset 'windows-1256)
(define-coding-system-alias 'cp1256 'windows-1256)

(set-language-info-alist
 "Arabic" '((charset unicode)
	    (coding-system utf-8 iso-8859-6 windows-1256)
	    (coding-priority utf-8 iso-8859-6 windows-1256)
	    (input-method . "arabic")
	    (sample-text . "Arabic	السّلام عليكم")
	    (documentation . "Bidirectional editing is supported.")))

(set-language-info-alist
 "Persian" '((charset unicode)
	    (coding-system utf-8 iso-8859-6 windows-1256)
	    (coding-priority utf-8 iso-8859-6 windows-1256)
	    (input-method . "farsi-transliterate-banan")
	    (sample-text . "Persian	فارسی")
	    (documentation . "Bidirectional editing is supported.")))

(defcustom arabic-shaper-ZWNJ-handling nil
  "How to handle ZWMJ in Arabic text rendering.
This variable controls the way to handle a glyph for ZWNJ
returned by the underling shaping engine.

The default value is nil, which means that the ZWNJ glyph is
displayed as is.

If the value is `absorb', ZWNJ is absorbed into the previous
grapheme cluster, and not displayed.

If the value is `as-space', the glyph is displayed by a
thin (i.e. 1-dot width) space."
  :group 'mule
  :version "26.1"
  :type '(choice
          (const :tag "default" nil)
          (const :tag "as space" as-space)
          (const :tag "absorb" absorb))
  :set (lambda (sym val)
         (set-default sym val)
         (clear-composition-cache)))

;; Record error in arabic-change-gstring.
(defvar arabic-shape-log nil)

(defun arabic-shape-gstring (gstring direction)
  (setq gstring (font-shape-gstring gstring direction))
  (condition-case err
      (when arabic-shaper-ZWNJ-handling
        (let ((font (lgstring-font gstring))
              (i 1)
              (len (lgstring-glyph-len gstring))
              (modified nil))
          (while (< i len)
            (let ((glyph (lgstring-glyph gstring i)))
              (when (eq (lglyph-char glyph) #x200c)
                (cond
                 ((eq arabic-shaper-ZWNJ-handling 'as-space)
                  (if (> (- (lglyph-rbearing glyph) (lglyph-lbearing glyph)) 0)
                      (let ((space-glyph (aref (font-get-glyphs font 0 1 " ") 0)))
                        (when space-glyph
                          (lglyph-set-code glyph (aref space-glyph 3))
                          (lglyph-set-width glyph (aref space-glyph 4)))))
                  (lglyph-set-adjustment glyph 0 0 1)
                  (setq modified t))
                 ((eq arabic-shaper-ZWNJ-handling 'absorb)
                  (let ((prev (lgstring-glyph gstring (1- i))))
                    (lglyph-set-from-to prev (lglyph-from prev) (lglyph-to glyph))
                    (setq gstring (lgstring-remove-glyph gstring i))
                    (setq len (1- len)))
                  (setq modified t)))))
            (setq i (1+ i)))
          (if modified
              (lgstring-set-id gstring nil))))
    (error (push err arabic-shape-log)))
  gstring)

(set-char-table-range
 composition-function-table
 '(#x600 . #x74F)
 (list (vector "[\u0600-\u074F\u200C\u200D]+"
               0 #'arabic-shape-gstring)))
(set-char-table-range
 composition-function-table
 '(#x200C . #x200D)
  (list (vector "[\u200C\u200D][\u0600-\u074F\u200C\u200D]+"
                0 #'arabic-shape-gstring)))

;; The Egyptian Hieroglyph Format Controls were introduced in Unicode
;; Standard v12.0.  Apparently, they are not yet well supported in
;; existing fonts, as of late 2020.  But there's no reason for us not
;; to be ready for when they will be!
;; The below is needed to support the arrangement of the Egyptian
;; Hieroglyphs in "quadrats", as directed by the format controls,
;; which specify how the hieroglyphs should be joined horizontally and
;; vertically.
(defun egyptian-shape-grouping (gstring direction)
  (if (= (lgstring-char gstring 0) #x13437)
      (let ((nchars (lgstring-char-len gstring))
            (i 1)
            (nesting 1)
            ch)
        ;; Find where this group ends.
        (while (and (< i nchars) (> nesting 0))
          (setq ch (lgstring-char gstring i))
          (cond
           ((= ch #x13437)
            (setq nesting (1+ nesting)))
           ((= ch #x13438)
            (setq nesting (1- nesting))))
          (setq i (1+ i)))
        (when (zerop nesting)
          ;; Make a new gstring from the characters that constitute a
          ;; complete nested group.
          (let ((new-header (make-vector (1+ i) nil))
                (new-gstring (make-vector (+ i 2) nil)))
            (aset new-header 0 (lgstring-font gstring))
            (dotimes (j i)
              (aset new-header (1+ j) (lgstring-char gstring j))
              (lgstring-set-glyph new-gstring j (lgstring-glyph gstring j)))
            (lgstring-set-header new-gstring new-header)
            (font-shape-gstring new-gstring direction))))))

(let ((hieroglyph "[\U00013000-\U0001342F]"))
  ;; HORIZONTAL/VERTICAL JOINER and INSERT AT.../OVERLAY controls
  (set-char-table-range
   composition-function-table
   '(#x13430 . #x13436)
   (list (vector (concat hieroglyph "[\U00013430-\U00013436]" hieroglyph)
                 ;; We use font-shape-gstring so that, if the font
                 ;; doesn't support these controls, the glyphs are
                 ;; displayed individually, and not as a single
                 ;; grapheme cluster.
                 1 #'font-shape-gstring)))
  ;; Grouping controls
  (set-char-table-range
   composition-function-table
   #x13437
   (list (vector "\U00013437[\U00013000-\U0001343F]+"
                 0 #'egyptian-shape-grouping)))
  ;; "Normal" hieroglyphs, for fonts that don't support the above
  ;; controls, but do shape sequences of hieroglyphs without the
  ;; controls.
  ;; FIXME: As of late 2021, Egyptian Hieroglyph Format Controls are
  ;; not yet supported in existing fonts and/or shaping engines, but
  ;; some fonts do provide ligatures with which texts in Egyptian
  ;; Hieroglyphs are correctly displayed.  If and when these format
  ;; controls are supported, as described in section 11.4 "Egyptian
  ;; Hieroglyphs" of the Unicode Standard, the five lines below (which
  ;; allow composition of hieroglyphs without formatting controls
  ;; around) can be removed, and the entry in etc/HELLO can be
  ;; restored to:
  ;; Egyptian Hieroglyphs (𓂋𓐰𓏤𓈖𓆎𓅓𓏏𓐰𓊖) 𓅓𓊵𓐰𓐷𓏏𓊪𓐸, 𓇍𓇋𓂻𓍘𓇋
  (set-char-table-range
   composition-function-table
   '(#x13000 . #x1342E)
   (list (vector "[\U00013000-\U0001342E]+"
                 0 #'font-shape-gstring))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Hanifi Rohingya
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(set-language-info-alist
 "Hanifi Rohingya" '((charset unicode)
                     (coding-system utf-8)
                     (coding-priority utf-8)
                     (input-method . "hanifi-rohingya")
                     (sample-text . "Hanifi Rohingya (𐴌𐴟𐴇𐴥𐴝𐴚𐴒𐴙𐴝 𐴇𐴝𐴕𐴞𐴉𐴞 𐴓𐴠𐴑𐴤𐴝)	𐴀𐴝𐴏𐴓𐴝𐴀𐴡𐴤𐴛𐴝𐴓𐴝𐴙𐴑𐴟𐴔")
                     (documentation . "\
Rohingya language and its script Hanifi Rohingya are supported
in this language environment."))
 '("Misc"))

;; Hanifi Rohingya composition rules
(set-char-table-range
 composition-function-table
 '(#x10D1D . #x10D27)
 (list (vector
        "[\x10D00-\x10D27]+"
        1 'font-shape-gstring)))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Kharoṣṭhī
;; Author: Stefan Baums <baums@gandhari.org>
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(set-language-info-alist
 "Kharoshthi" '((charset unicode)
	        (coding-system utf-8)
	        (coding-priority utf-8)
	        (input-method . "kharoshthi")
                (sample-text . "Kharoṣṭhī (𐨑𐨪𐨆𐨛𐨁)	𐨣𐨨𐨲𐨪𐨆 𐨐𐨪𐨅𐨨𐨁")
	        (documentation . "\
Language environment for Gāndhārī, Sanskrit, and other languages
using the Kharoṣṭhī script."))
 '("Indian"))

(let ((consonant     "[\U00010A00\U00010A10-\U00010A35]")
      (vowel         "[\U00010A01-\U00010A06]")
      (virama        "\U00010A3F")
      (modifier      "[\U00010A0C-\U00010A0F\U00010A38-\U00010A3A]"))
  (set-char-table-range composition-function-table
		        '(#x10A3F . #x10A3F)
                        (list
                         (vector
                          (concat consonant
                                  "\\(?:" virama consonant "\\)*"
                                  modifier "*"
                                  virama "?"
                                  vowel "*"
                                  modifier "*")
                          1 'font-shape-gstring))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Adlam
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(set-language-info-alist
 "Adlam" '((charset unicode)
           (coding-system utf-8)
           (coding-priority utf-8)
           (input-method . "adlam")
           (sample-text . "Adlam (𞤀𞤣𞤤𞤢𞤥)	𞤅𞤢𞤤𞤢𞥄𞤥")
           (documentation . "\
Fulani language and its script Adlam are supported
in this language environment."))
 '("Misc"))

;; Adlam composition rules
(set-char-table-range
 composition-function-table
 '(#x1E900 . #x1E95F)
 (list (vector
        "[\x1E900-\x1E95F]+"
        0 'font-shape-gstring)))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Mende Kikakui
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(set-language-info-alist
 "Mende Kikakui" '((charset unicode)
                   (coding-system utf-8)
                   (coding-priority utf-8)
                   (input-method . "mende-kikakui")
                   (sample-text . "Mende Kikakui (𞠀𞠁𞠂)	𞠛𞠉")
                   (documentation . "\
Mende language and its script Kikakui are supported
in this language environment."))
 '("Misc"))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Gothic
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(set-language-info-alist
 "Gothic" '((charset unicode)
            (coding-system utf-8)
            (coding-priority utf-8)
            (input-method . "gothic")
            (sample-text . "Gothic (𐌲𐌿𐍄𐌹𐍃𐌺𐌰)	𐌷𐌰𐌹𐌻𐍃 / 𐌷𐌰𐌹𐌻𐌰")
            (documentation . "\
Ancient Gothic language using the Gothic script is supported in this
language environment."))
 '("Misc"))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Coptic
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(set-language-info-alist
 "Coptic" '((charset unicode)
            (coding-system utf-8)
            (coding-priority utf-8)
            (input-method . "coptic")
            (sample-text . "Coptic (ⲘⲉⲧⲢⲉⲙ̀ⲛⲭⲏⲙⲓ)	Ⲛⲟⲩϥⲣⲓ")
            (documentation . "\
Coptic language using the Coptic script is supported in this
language environment."))
 '("Misc"))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Traditional Mongolian
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(set-char-table-range
 composition-function-table
 '(#x1820 . #x18AF)
 (list (vector "[\u200C\u200D][\u1820-\u18AF][\u200C\u200D]?"
               1 'font-shape-gstring)
       (vector "[\u1820-\u18AF][\u200C\u200D]" 0 'font-shape-gstring)
       (vector "[\u1820-\u18AF\u202F\u180B-\u180F\u1807]+"
               0 'font-shape-gstring)))


(provide 'misc-lang)

;;; misc-lang.el ends here

debug log:

solving 5a7867f14e8 ...
found 5a7867f14e8 in https://git.savannah.gnu.org/cgit/emacs.git

(*) Git path names are given by the tree(s) the blob belongs to.
    Blobs themselves have no identifier aside from the hash of its contents.^

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).