Index: src/lread.c =================================================================== RCS file: /cvsroot/emacs/emacs/src/lread.c,v retrieving revision 1.350 diff -u -r1.350 lread.c --- src/lread.c 27 Feb 2006 02:04:35 -0000 1.350 +++ src/lread.c 5 May 2006 17:09:37 -0000 @@ -87,6 +87,9 @@ Lisp_Object Qbackquote, Qcomma, Qcomma_at, Qcomma_dot, Qfunction; Lisp_Object Qinhibit_file_name_operation; Lisp_Object Qeval_buffer_list, Veval_buffer_list; +Lisp_Object Qutf_translate_cjk_mode, Qutf_translate_cjk_lang_env, Qutf_translate_cjk_load_tables; +Lisp_Object Qutf_subst_table_for_decode, Qtranslation_hash_table; +Lisp_Object Qutf_translation_table_for_decode, Qtranslation_table; extern Lisp_Object Qevent_symbol_element_mask; extern Lisp_Object Qfile_exists_p; @@ -1731,6 +1734,110 @@ return str[0]; } + +#define READ_HEX_ESCAPE(i, c) \ + while (1) \ + { \ + c = READCHAR; \ + if (c >= '0' && c <= '9') \ + { \ + i *= 16; \ + i += c - '0'; \ + } \ + else if ((c >= 'a' && c <= 'f') \ + || (c >= 'A' && c <= 'F')) \ + { \ + i *= 16; \ + if (c >= 'a' && c <= 'f') \ + i += c - 'a' + 10; \ + else \ + i += c - 'A' + 10; \ + } \ + else \ + { \ + UNREAD (c); \ + break; \ + } \ + } + + + +/* Return the internal character coresponding to an UCS code point.*/ + +int +ucs_to_internal (ucs) + int ucs; +{ + int c = 0; + Lisp_Object tmp_char; + + if (! EQ (Qnil, SYMBOL_VALUE (Qutf_translate_cjk_mode))) + /* cf. `utf-lookup-subst-table-for-decode' */ + { + Lisp_Object hash; + + if (EQ (Qnil, SYMBOL_VALUE (Qutf_translate_cjk_lang_env))) + call0 (Qutf_translate_cjk_load_tables); + + hash = Fget (Qutf_subst_table_for_decode, Qtranslation_hash_table); + + if (HASH_TABLE_P (hash)) + { + tmp_char = Fgethash (make_number (ucs), hash, Qnil); + if (! EQ (Qnil, tmp_char)) + { + CHECK_NUMBER (tmp_char); + c = XFASTINT (tmp_char); + } + } + } + + if (c) + /* We found the character already in the translation hash table. + Do nothing. */ + ; + else if (ucs < 160) + c = ucs; + else if (ucs < 256) + c = MAKE_CHAR (charset_latin_iso8859_1, ucs, 0); + else if (ucs < 0x2500) + { + ucs -= 0x0100; + c = MAKE_CHAR (charset_mule_unicode_0100_24ff, + ((ucs / 96) + 32), + ((ucs % 96) + 32)); + } + else if (ucs < 0x3400) + { + ucs -= 0x2500; + c = MAKE_CHAR (charset_mule_unicode_2500_33ff, + ((ucs / 96) + 32), + ((ucs % 96) + 32)); + } + else if ((ucs >= 0xE000) && (ucs < 0x10000)) + { + ucs -= 0xE000; + c = MAKE_CHAR (charset_mule_unicode_e000_ffff, + ((ucs / 96) + 32), + ((ucs % 96) + 32)); + } + + if (c || ucs == 0) /* U+0000 is also a valid character. */ + { + Lisp_Object vect = Fget (Qutf_translation_table_for_decode, + Qtranslation_table); + if (CHAR_TABLE_P (vect)) + { + tmp_char = Faref (vect, make_number (c)); + if (! EQ (Qnil, tmp_char)) + return XFASTINT (tmp_char); + } + return c; + } + else error ("Invalid or unsupported UCS character: %x", ucs); +} + + /* Read a \-escape sequence, assuming we already read the `\'. If the escape sequence forces unibyte, store 1 into *BYTEREP. If the escape sequence forces multibyte, store 2 into *BYTEREP. @@ -1879,34 +1986,23 @@ /* A hex escape, as in ANSI C. */ { int i = 0; - while (1) - { - c = READCHAR; - if (c >= '0' && c <= '9') - { - i *= 16; - i += c - '0'; - } - else if ((c >= 'a' && c <= 'f') - || (c >= 'A' && c <= 'F')) - { - i *= 16; - if (c >= 'a' && c <= 'f') - i += c - 'a' + 10; - else - i += c - 'A' + 10; - } - else - { - UNREAD (c); - break; - } - } - + READ_HEX_ESCAPE (i, c); *byterep = 2; return i; } + case 'u': + /* A hexadecimal reference to an UCS character. */ + { + int i = 0; + + READ_HEX_ESCAPE (i, c); + *byterep = 2; + + return ucs_to_internal (i); + + } + default: if (BASE_LEADING_CODE_P (c)) c = read_multibyte (c, readcharfun); @@ -4121,6 +4217,27 @@ Vloads_in_progress = Qnil; staticpro (&Vloads_in_progress); + + Qutf_translate_cjk_mode = intern ("utf-translate-cjk-mode"); + staticpro (&Qutf_translate_cjk_mode); + + Qutf_translate_cjk_lang_env = intern ("utf-translate-cjk-lang-env"); + staticpro (&Qutf_translate_cjk_lang_env); + + Qutf_translate_cjk_load_tables = intern ("utf-translate-cjk-load-tables"); + staticpro (&Qutf_translate_cjk_load_tables); + + Qutf_subst_table_for_decode = intern ("utf-subst-table-for-decode"); + staticpro (&Qutf_subst_table_for_decode); + + Qtranslation_hash_table = intern ("translation-hash-table"); + staticpro (&Qutf_subst_table_for_decode); + + Qutf_translation_table_for_decode = intern ("utf-translation-table-for-decode"); + staticpro (&Qutf_translation_table_for_decode); + + Qtranslation_table = intern ("translation-table"); + staticpro (&Qtranslation_table); } /* arch-tag: a0d02733-0f96-4844-a659-9fd53c4f414d