From: Oliver Scholz <alkibiades@gmx.de>
Subject: Re: [PATCH] Unicode Lisp reader escapes
Date: Fri, 05 May 2006 19:23:36 +0200 [thread overview]
Message-ID: <e3g1ok$c4i$1@sea.gmane.org> (raw)
In-Reply-To: 87bquctz7t.fsf-monnier+emacs@gnu.org
[-- Attachment #1: Type: text/plain, Size: 817 bytes --]
For what it's worth, I just tried the attached little stress test on
an updated C port of `decode-char' in order to check whether it
returns equivalent results. It does. (Well, except intentional
differences like that `ucs_to_internal' throws an error where
`decode-char' returns nil.)
Basically the test runs through all positive integers up to MAX_CHAR
and inserts an alist into a temp buffer with each car being the
integer and each cdr being a character in the \u syntax (e.g.
`?\u3b1'). It then reads that alist again and checks whether
`decode-char' on its car is `eq' to its cdr. I tried it with and
without `utf-translate-cjk-mode' and with and without
`utf-fragment-on-decoding'. Since all tests succeed, ucs_to_internal
and `decode-char' are functionally equivalent on all supported
characters.
The test:
[-- Attachment #2: ucs-test.el --]
[-- Type: application/emacs-lisp, Size: 1517 bytes --]
[-- Attachment #3: Type: text/plain, Size: 20 bytes --]
The updated patch:
[-- Attachment #4: ucs-escapes.diff --]
[-- Type: text/plain, Size: 6643 bytes --]
Index: src/lread.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/lread.c,v
retrieving revision 1.350
diff -u -r1.350 lread.c
--- src/lread.c 27 Feb 2006 02:04:35 -0000 1.350
+++ src/lread.c 5 May 2006 17:09:37 -0000
@@ -87,6 +87,9 @@
Lisp_Object Qbackquote, Qcomma, Qcomma_at, Qcomma_dot, Qfunction;
Lisp_Object Qinhibit_file_name_operation;
Lisp_Object Qeval_buffer_list, Veval_buffer_list;
+Lisp_Object Qutf_translate_cjk_mode, Qutf_translate_cjk_lang_env, Qutf_translate_cjk_load_tables;
+Lisp_Object Qutf_subst_table_for_decode, Qtranslation_hash_table;
+Lisp_Object Qutf_translation_table_for_decode, Qtranslation_table;
extern Lisp_Object Qevent_symbol_element_mask;
extern Lisp_Object Qfile_exists_p;
@@ -1731,6 +1734,110 @@
return str[0];
}
+
+#define READ_HEX_ESCAPE(i, c) \
+ while (1) \
+ { \
+ c = READCHAR; \
+ if (c >= '0' && c <= '9') \
+ { \
+ i *= 16; \
+ i += c - '0'; \
+ } \
+ else if ((c >= 'a' && c <= 'f') \
+ || (c >= 'A' && c <= 'F')) \
+ { \
+ i *= 16; \
+ if (c >= 'a' && c <= 'f') \
+ i += c - 'a' + 10; \
+ else \
+ i += c - 'A' + 10; \
+ } \
+ else \
+ { \
+ UNREAD (c); \
+ break; \
+ } \
+ }
+
+
+
+/* Return the internal character coresponding to an UCS code point.*/
+
+int
+ucs_to_internal (ucs)
+ int ucs;
+{
+ int c = 0;
+ Lisp_Object tmp_char;
+
+ if (! EQ (Qnil, SYMBOL_VALUE (Qutf_translate_cjk_mode)))
+ /* cf. `utf-lookup-subst-table-for-decode' */
+ {
+ Lisp_Object hash;
+
+ if (EQ (Qnil, SYMBOL_VALUE (Qutf_translate_cjk_lang_env)))
+ call0 (Qutf_translate_cjk_load_tables);
+
+ hash = Fget (Qutf_subst_table_for_decode, Qtranslation_hash_table);
+
+ if (HASH_TABLE_P (hash))
+ {
+ tmp_char = Fgethash (make_number (ucs), hash, Qnil);
+ if (! EQ (Qnil, tmp_char))
+ {
+ CHECK_NUMBER (tmp_char);
+ c = XFASTINT (tmp_char);
+ }
+ }
+ }
+
+ if (c)
+ /* We found the character already in the translation hash table.
+ Do nothing. */
+ ;
+ else if (ucs < 160)
+ c = ucs;
+ else if (ucs < 256)
+ c = MAKE_CHAR (charset_latin_iso8859_1, ucs, 0);
+ else if (ucs < 0x2500)
+ {
+ ucs -= 0x0100;
+ c = MAKE_CHAR (charset_mule_unicode_0100_24ff,
+ ((ucs / 96) + 32),
+ ((ucs % 96) + 32));
+ }
+ else if (ucs < 0x3400)
+ {
+ ucs -= 0x2500;
+ c = MAKE_CHAR (charset_mule_unicode_2500_33ff,
+ ((ucs / 96) + 32),
+ ((ucs % 96) + 32));
+ }
+ else if ((ucs >= 0xE000) && (ucs < 0x10000))
+ {
+ ucs -= 0xE000;
+ c = MAKE_CHAR (charset_mule_unicode_e000_ffff,
+ ((ucs / 96) + 32),
+ ((ucs % 96) + 32));
+ }
+
+ if (c || ucs == 0) /* U+0000 is also a valid character. */
+ {
+ Lisp_Object vect = Fget (Qutf_translation_table_for_decode,
+ Qtranslation_table);
+ if (CHAR_TABLE_P (vect))
+ {
+ tmp_char = Faref (vect, make_number (c));
+ if (! EQ (Qnil, tmp_char))
+ return XFASTINT (tmp_char);
+ }
+ return c;
+ }
+ else error ("Invalid or unsupported UCS character: %x", ucs);
+}
+
+
/* Read a \-escape sequence, assuming we already read the `\'.
If the escape sequence forces unibyte, store 1 into *BYTEREP.
If the escape sequence forces multibyte, store 2 into *BYTEREP.
@@ -1879,34 +1986,23 @@
/* A hex escape, as in ANSI C. */
{
int i = 0;
- while (1)
- {
- c = READCHAR;
- if (c >= '0' && c <= '9')
- {
- i *= 16;
- i += c - '0';
- }
- else if ((c >= 'a' && c <= 'f')
- || (c >= 'A' && c <= 'F'))
- {
- i *= 16;
- if (c >= 'a' && c <= 'f')
- i += c - 'a' + 10;
- else
- i += c - 'A' + 10;
- }
- else
- {
- UNREAD (c);
- break;
- }
- }
-
+ READ_HEX_ESCAPE (i, c);
*byterep = 2;
return i;
}
+ case 'u':
+ /* A hexadecimal reference to an UCS character. */
+ {
+ int i = 0;
+
+ READ_HEX_ESCAPE (i, c);
+ *byterep = 2;
+
+ return ucs_to_internal (i);
+
+ }
+
default:
if (BASE_LEADING_CODE_P (c))
c = read_multibyte (c, readcharfun);
@@ -4121,6 +4217,27 @@
Vloads_in_progress = Qnil;
staticpro (&Vloads_in_progress);
+
+ Qutf_translate_cjk_mode = intern ("utf-translate-cjk-mode");
+ staticpro (&Qutf_translate_cjk_mode);
+
+ Qutf_translate_cjk_lang_env = intern ("utf-translate-cjk-lang-env");
+ staticpro (&Qutf_translate_cjk_lang_env);
+
+ Qutf_translate_cjk_load_tables = intern ("utf-translate-cjk-load-tables");
+ staticpro (&Qutf_translate_cjk_load_tables);
+
+ Qutf_subst_table_for_decode = intern ("utf-subst-table-for-decode");
+ staticpro (&Qutf_subst_table_for_decode);
+
+ Qtranslation_hash_table = intern ("translation-hash-table");
+ staticpro (&Qutf_subst_table_for_decode);
+
+ Qutf_translation_table_for_decode = intern ("utf-translation-table-for-decode");
+ staticpro (&Qutf_translation_table_for_decode);
+
+ Qtranslation_table = intern ("translation-table");
+ staticpro (&Qtranslation_table);
}
/* arch-tag: a0d02733-0f96-4844-a659-9fd53c4f414d
[-- Attachment #5: Type: text/plain, Size: 87 bytes --]
Oliver
--
16 Floréal an 214 de la Révolution
Liberté, Egalité, Fraternité!
[-- Attachment #6: Type: text/plain, Size: 142 bytes --]
_______________________________________________
Emacs-devel mailing list
Emacs-devel@gnu.org
http://lists.gnu.org/mailman/listinfo/emacs-devel
next prev parent reply other threads:[~2006-05-05 17:23 UTC|newest]
Thread overview: 202+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-04-29 15:35 [PATCH] Unicode Lisp reader escapes Aidan Kehoe
2006-04-29 23:26 ` Stefan Monnier
2006-04-30 8:26 ` Aidan Kehoe
2006-04-30 3:04 ` Richard Stallman
2006-04-30 8:14 ` Aidan Kehoe
2006-04-30 20:53 ` Richard Stallman
2006-04-30 21:04 ` Andreas Schwab
2006-04-30 21:57 ` Aidan Kehoe
2006-04-30 22:14 ` Andreas Schwab
2006-05-01 18:32 ` Richard Stallman
2006-05-01 19:03 ` Oliver Scholz
2006-05-02 4:45 ` Richard Stallman
2006-05-02 0:46 ` Kenichi Handa
2006-05-02 6:41 ` Aidan Kehoe
2006-05-02 21:36 ` Richard Stallman
2006-04-30 21:56 ` Aidan Kehoe
2006-05-01 1:44 ` Miles Bader
2006-05-01 3:12 ` Stefan Monnier
2006-05-01 3:41 ` Miles Bader
2006-05-01 12:29 ` Stefan Monnier
2006-05-05 23:15 ` Juri Linkov
2006-05-06 23:36 ` Richard Stallman
2006-05-09 20:43 ` Juri Linkov
2006-05-11 3:44 ` Richard Stallman
2006-05-11 12:03 ` Juri Linkov
2006-05-11 13:16 ` Kenichi Handa
2006-05-12 4:15 ` Richard Stallman
2006-06-03 18:44 ` Aidan Kehoe
[not found] ` <17537.54719.354843.89030@parhasard.net>
[not found] ` <ufyieqj0v.fsf@gnu.org>
2006-06-15 18:38 ` Aidan Kehoe
2006-06-17 18:57 ` Eli Zaretskii
2006-06-18 16:11 ` Aidan Kehoe
2006-06-18 19:55 ` Eli Zaretskii
2006-06-20 2:37 ` Kenichi Handa
2006-06-20 17:56 ` Richard Stallman
2006-06-23 18:35 ` Aidan Kehoe
2006-06-24 6:50 ` Eli Zaretskii
2006-05-02 6:43 ` Kenichi Handa
2006-05-02 7:00 ` Aidan Kehoe
2006-05-02 10:45 ` Eli Zaretskii
2006-05-02 11:13 ` Aidan Kehoe
2006-05-02 19:31 ` Eli Zaretskii
2006-05-02 20:25 ` Aidan Kehoe
2006-05-02 22:16 ` Oliver Scholz
2006-05-02 11:33 ` Kenichi Handa
2006-05-02 22:50 ` Aidan Kehoe
2006-05-03 7:43 ` Kenichi Handa
2006-05-03 17:21 ` Kevin Rodgers
2006-05-03 18:51 ` Andreas Schwab
2006-05-04 21:14 ` Aidan Kehoe
2006-05-08 1:31 ` Kenichi Handa
2006-05-08 6:54 ` Aidan Kehoe
2006-05-08 13:55 ` Stefan Monnier
2006-05-08 14:24 ` Aidan Kehoe
2006-05-08 15:32 ` Stefan Monnier
2006-05-08 16:39 ` Aidan Kehoe
2006-05-08 17:39 ` Stefan Monnier
2006-05-09 7:04 ` Aidan Kehoe
2006-05-09 19:05 ` Eli Zaretskii
2006-05-10 6:05 ` Aidan Kehoe
2006-05-10 17:49 ` Eli Zaretskii
2006-05-10 21:37 ` Luc Teirlinck
2006-05-11 3:45 ` Eli Zaretskii
2006-05-10 21:48 ` Luc Teirlinck
2006-05-11 1:08 ` Luc Teirlinck
2006-05-11 2:29 ` Luc Teirlinck
2006-05-11 3:46 ` Richard Stallman
2006-05-09 0:36 ` Kenichi Handa
2006-05-02 10:36 ` Eli Zaretskii
2006-05-02 10:59 ` Aidan Kehoe
2006-05-02 19:26 ` Eli Zaretskii
2006-05-03 2:59 ` Kenichi Handa
2006-05-03 8:47 ` Eli Zaretskii
2006-05-03 14:21 ` Stefan Monnier
2006-05-03 18:26 ` Eli Zaretskii
2006-05-03 21:12 ` Ken Raeburn
2006-05-04 14:17 ` Richard Stallman
2006-05-04 16:41 ` Aidan Kehoe
2006-05-05 10:39 ` Eli Zaretskii
2006-05-05 16:35 ` Aidan Kehoe
2006-05-05 19:05 ` Richard Stallman
2006-05-05 19:20 ` Aidan Kehoe
2006-05-05 19:57 ` Aidan Kehoe
2006-05-06 14:25 ` Richard Stallman
2006-05-06 17:26 ` Aidan Kehoe
2006-05-07 5:01 ` Richard Stallman
2006-05-07 6:38 ` Aidan Kehoe
2006-05-07 7:00 ` David Kastrup
2006-05-07 7:15 ` Aidan Kehoe
2006-05-07 16:50 ` Aidan Kehoe
2006-05-08 22:28 ` Richard Stallman
2006-05-04 1:33 ` Kenichi Handa
2006-05-04 8:23 ` Oliver Scholz
2006-05-04 11:57 ` Kim F. Storm
2006-05-04 12:18 ` Stefan Monnier
2006-05-04 12:21 ` Kim F. Storm
2006-05-04 16:31 ` Eli Zaretskii
2006-05-04 21:40 ` Stefan Monnier
2006-05-05 10:25 ` Eli Zaretskii
2006-05-05 12:31 ` Oliver Scholz
2006-05-05 18:08 ` Stuart D. Herring
2006-05-05 13:05 ` Stefan Monnier
2006-05-05 17:23 ` Oliver Scholz [this message]
2006-05-04 13:07 ` Oliver Scholz
2006-05-04 16:32 ` Eli Zaretskii
2006-05-04 20:55 ` Aidan Kehoe
2006-05-05 9:33 ` Oliver Scholz
2006-05-05 10:02 ` Oliver Scholz
2006-05-05 18:33 ` Aidan Kehoe
2006-05-05 18:42 ` Oliver Scholz
2006-05-05 21:37 ` Eli Zaretskii
2006-05-06 14:24 ` Richard Stallman
2006-05-06 15:01 ` Oliver Scholz
[not found] ` <877j4z5had.fsf@gmx.de>
2006-05-07 5:00 ` Richard Stallman
2006-05-07 12:38 ` Kenichi Handa
2006-05-07 21:26 ` Oliver Scholz
2006-05-08 1:14 ` Kenichi Handa
2006-05-08 22:29 ` Richard Stallman
2006-05-09 3:42 ` Eli Zaretskii
2006-05-09 20:41 ` Richard Stallman
2006-05-09 21:03 ` Stefan Monnier
2006-05-10 3:33 ` Eli Zaretskii
2006-05-09 5:13 ` Kenichi Handa
2006-05-10 3:20 ` Richard Stallman
2006-05-10 5:37 ` Kenichi Handa
2006-05-10 7:22 ` Stefan Monnier
2006-05-11 3:45 ` Richard Stallman
2006-05-11 12:41 ` Stefan Monnier
2006-05-11 12:51 ` Kenichi Handa
2006-05-11 16:46 ` Stefan Monnier
2006-05-11 3:44 ` Richard Stallman
2006-05-11 3:44 ` Richard Stallman
2006-05-11 7:31 ` Kenichi Handa
2006-05-12 4:14 ` Richard Stallman
2006-05-12 5:26 ` Kenichi Handa
2006-05-13 4:52 ` Richard Stallman
2006-05-13 13:25 ` Stefan Monnier
2006-05-13 20:41 ` Richard Stallman
2006-05-14 13:32 ` Stefan Monnier
2006-05-14 23:29 ` Richard Stallman
2006-05-15 0:55 ` Stefan Monnier
2006-05-15 2:49 ` Oliver Scholz
2006-05-15 3:27 ` Stefan Monnier
2006-05-15 10:20 ` Oliver Scholz
2006-05-15 11:12 ` Oliver Scholz
2006-05-15 20:37 ` Richard Stallman
2006-05-16 9:49 ` Oliver Scholz
2006-05-16 11:16 ` Kim F. Storm
2006-05-16 11:39 ` Romain Francoise
2006-05-16 11:58 ` Oliver Scholz
2006-05-16 14:24 ` Kim F. Storm
2006-05-17 3:45 ` Richard Stallman
2006-05-17 8:37 ` Oliver Scholz
2006-05-17 20:09 ` Richard Stallman
2006-05-17 12:37 ` Oliver Scholz
2006-05-19 2:05 ` Richard Stallman
2006-05-19 8:47 ` Oliver Scholz
2006-05-18 1:09 ` Kenichi Handa
2006-05-21 0:57 ` Richard Stallman
2006-05-22 1:33 ` Kenichi Handa
2006-05-22 15:12 ` Richard Stallman
2006-05-23 1:05 ` Kenichi Handa
2006-05-23 5:18 ` Juri Linkov
2006-05-24 2:18 ` Richard Stallman
2006-06-02 6:49 ` Kenichi Handa
2006-06-02 8:00 ` Kim F. Storm
2006-06-02 9:27 ` Juri Linkov
2006-06-02 10:50 ` Eli Zaretskii
2006-06-02 11:39 ` Kenichi Handa
2006-06-02 12:12 ` Juri Linkov
2006-06-02 22:39 ` Richard Stallman
2006-06-03 6:42 ` Juri Linkov
2006-06-04 2:23 ` Richard Stallman
2006-06-05 7:24 ` Kenichi Handa
2006-06-05 21:31 ` Richard Stallman
2006-06-07 1:24 ` Kenichi Handa
2006-06-02 22:39 ` Richard Stallman
2006-05-24 2:17 ` Richard Stallman
2006-05-17 15:15 ` Stefan Monnier
2006-05-17 3:45 ` Richard Stallman
2006-05-17 3:45 ` Richard Stallman
2006-05-17 8:53 ` Oliver Scholz
2006-05-17 20:09 ` Richard Stallman
2006-05-18 9:12 ` Oliver Scholz
2006-05-15 20:37 ` Richard Stallman
2006-05-15 5:13 ` Kenichi Handa
2006-05-15 8:06 ` Kim F. Storm
2006-05-15 9:04 ` Andreas Schwab
2006-05-15 20:38 ` Richard Stallman
2006-05-15 14:08 ` Stefan Monnier
2006-05-15 20:37 ` Richard Stallman
2006-05-16 10:07 ` Oliver Scholz
2006-05-18 0:31 ` Kenichi Handa
2006-05-11 9:44 ` Oliver Scholz
2006-05-08 7:36 ` Richard Stallman
2006-05-08 7:50 ` Kenichi Handa
2006-05-05 19:05 ` Richard Stallman
2006-05-05 21:43 ` Eli Zaretskii
2006-05-06 14:25 ` Richard Stallman
2006-05-04 1:26 ` Kenichi Handa
[not found] <E1FaJ0b-0008G8-8u@monty-python.gnu.org>
2006-04-30 21:16 ` Jonathan Yavner
2006-05-01 18:32 ` Richard Stallman
2006-05-02 5:03 ` Jonathan Yavner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='e3g1ok$c4i$1@sea.gmane.org' \
--to=alkibiades@gmx.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this external index
https://git.savannah.gnu.org/cgit/emacs.git
https://git.savannah.gnu.org/cgit/emacs/org-mode.git
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.