all messages for Emacs-related lists mirrored at yhetil.org
 help / color / mirror / code / Atom feed
From: Oliver Scholz <alkibiades@gmx.de>
Subject: Re: [PATCH] Unicode Lisp reader escapes
Date: Fri, 05 May 2006 19:23:36 +0200	[thread overview]
Message-ID: <e3g1ok$c4i$1@sea.gmane.org> (raw)
In-Reply-To: 87bquctz7t.fsf-monnier+emacs@gnu.org

[-- Attachment #1: Type: text/plain, Size: 817 bytes --]

For what it's worth, I just tried the attached little stress test on
an updated C port of `decode-char' in order to check whether it
returns equivalent results. It does. (Well, except intentional
differences like that `ucs_to_internal' throws an error where
`decode-char' returns nil.)

Basically the test runs through all positive integers up to MAX_CHAR
and inserts an alist into a temp buffer with each car being the
integer and each cdr being a character in the \u syntax (e.g.
`?\u3b1'). It then reads that alist again and checks whether
`decode-char' on its car is `eq' to its cdr. I tried it with and
without `utf-translate-cjk-mode' and with and without
`utf-fragment-on-decoding'. Since all tests succeed, ucs_to_internal
and `decode-char' are functionally equivalent on all supported
characters.

The test: 

[-- Attachment #2: ucs-test.el --]
[-- Type: application/emacs-lisp, Size: 1517 bytes --]

[-- Attachment #3: Type: text/plain, Size: 20 bytes --]


The updated patch: 

[-- Attachment #4: ucs-escapes.diff --]
[-- Type: text/plain, Size: 6643 bytes --]

Index: src/lread.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/lread.c,v
retrieving revision 1.350
diff -u -r1.350 lread.c
--- src/lread.c	27 Feb 2006 02:04:35 -0000	1.350
+++ src/lread.c	5 May 2006 17:09:37 -0000
@@ -87,6 +87,9 @@
 Lisp_Object Qbackquote, Qcomma, Qcomma_at, Qcomma_dot, Qfunction;
 Lisp_Object Qinhibit_file_name_operation;
 Lisp_Object Qeval_buffer_list, Veval_buffer_list;
+Lisp_Object Qutf_translate_cjk_mode, Qutf_translate_cjk_lang_env, Qutf_translate_cjk_load_tables;
+Lisp_Object Qutf_subst_table_for_decode, Qtranslation_hash_table;
+Lisp_Object Qutf_translation_table_for_decode, Qtranslation_table;
 
 extern Lisp_Object Qevent_symbol_element_mask;
 extern Lisp_Object Qfile_exists_p;
@@ -1731,6 +1734,110 @@
   return str[0];
 }
 
+
+#define READ_HEX_ESCAPE(i, c)                                         \
+  while (1)                                                           \
+    {                                                                 \
+      c = READCHAR;                                                   \
+      if (c >= '0' && c <= '9')                                       \
+        {                                                             \
+          i *= 16;                                                    \
+          i += c - '0';                                               \
+        }                                                             \
+      else if ((c >= 'a' && c <= 'f')                                 \
+               || (c >= 'A' && c <= 'F'))                             \
+        {                                                             \
+          i *= 16;                                                    \
+          if (c >= 'a' && c <= 'f')                                   \
+            i += c - 'a' + 10;                                        \
+          else                                                        \
+            i += c - 'A' + 10;                                        \
+        }                                                             \
+      else                                                            \
+        {                                                             \
+          UNREAD (c);                                                 \
+          break;                                                      \
+        }                                                             \
+    }
+
+
+
+/* Return the internal character coresponding to an UCS code point.*/
+
+int
+ucs_to_internal (ucs)
+     int ucs;
+{
+  int c = 0;
+  Lisp_Object tmp_char;
+
+  if (! EQ (Qnil, SYMBOL_VALUE (Qutf_translate_cjk_mode)))
+    /* cf. `utf-lookup-subst-table-for-decode' */
+    {
+      Lisp_Object hash;
+      
+      if (EQ (Qnil, SYMBOL_VALUE (Qutf_translate_cjk_lang_env)))
+        call0 (Qutf_translate_cjk_load_tables);
+
+      hash = Fget (Qutf_subst_table_for_decode, Qtranslation_hash_table);
+
+      if (HASH_TABLE_P (hash))
+        {
+          tmp_char = Fgethash (make_number (ucs), hash, Qnil);
+          if (! EQ (Qnil, tmp_char))
+            {
+              CHECK_NUMBER (tmp_char);
+              c = XFASTINT (tmp_char);
+            }
+        }
+    }
+
+  if (c)
+    /* We found the character already in the translation hash table.
+       Do nothing. */
+    ;
+  else if (ucs < 160)
+    c = ucs;
+  else if (ucs < 256)
+    c = MAKE_CHAR (charset_latin_iso8859_1, ucs, 0);
+  else if (ucs < 0x2500)
+    {
+      ucs -= 0x0100;
+      c = MAKE_CHAR (charset_mule_unicode_0100_24ff,
+                     ((ucs / 96) + 32),
+                     ((ucs % 96) + 32));
+    }
+    else if (ucs < 0x3400)
+    {
+      ucs -= 0x2500;
+      c = MAKE_CHAR (charset_mule_unicode_2500_33ff,
+                     ((ucs / 96) + 32),
+                     ((ucs % 96) + 32));
+    }
+    else if ((ucs >= 0xE000) && (ucs < 0x10000))
+      {
+        ucs -= 0xE000;
+        c = MAKE_CHAR (charset_mule_unicode_e000_ffff,
+                       ((ucs / 96) + 32),
+                       ((ucs % 96) + 32));
+      }
+  
+  if (c || ucs == 0) /* U+0000 is also a valid character. */
+    {
+      Lisp_Object vect = Fget (Qutf_translation_table_for_decode,
+                               Qtranslation_table);
+      if (CHAR_TABLE_P (vect))
+        {
+          tmp_char = Faref (vect, make_number (c));
+          if (! EQ (Qnil, tmp_char))
+            return XFASTINT (tmp_char);
+        }
+      return c;
+    }
+  else error ("Invalid or unsupported UCS character: %x", ucs);
+}
+
+      
 /* Read a \-escape sequence, assuming we already read the `\'.
    If the escape sequence forces unibyte, store 1 into *BYTEREP.
    If the escape sequence forces multibyte, store 2 into *BYTEREP.
@@ -1879,34 +1986,23 @@
       /* A hex escape, as in ANSI C.  */
       {
 	int i = 0;
-	while (1)
-	  {
-	    c = READCHAR;
-	    if (c >= '0' && c <= '9')
-	      {
-		i *= 16;
-		i += c - '0';
-	      }
-	    else if ((c >= 'a' && c <= 'f')
-		     || (c >= 'A' && c <= 'F'))
-	      {
-		i *= 16;
-		if (c >= 'a' && c <= 'f')
-		  i += c - 'a' + 10;
-		else
-		  i += c - 'A' + 10;
-	      }
-	    else
-	      {
-		UNREAD (c);
-		break;
-	      }
-	  }
-
+        READ_HEX_ESCAPE (i, c);
 	*byterep = 2;
 	return i;
       }
 
+    case 'u':
+      /* A hexadecimal reference to an UCS character. */
+      {
+        int i = 0;
+        
+        READ_HEX_ESCAPE (i, c);
+        *byterep = 2;
+
+        return ucs_to_internal (i);
+
+      }
+
     default:
       if (BASE_LEADING_CODE_P (c))
 	c = read_multibyte (c, readcharfun);
@@ -4121,6 +4217,27 @@
 
   Vloads_in_progress = Qnil;
   staticpro (&Vloads_in_progress);
+
+  Qutf_translate_cjk_mode = intern ("utf-translate-cjk-mode");
+  staticpro (&Qutf_translate_cjk_mode);
+  
+  Qutf_translate_cjk_lang_env = intern ("utf-translate-cjk-lang-env");
+  staticpro (&Qutf_translate_cjk_lang_env);
+  
+  Qutf_translate_cjk_load_tables = intern ("utf-translate-cjk-load-tables");
+  staticpro (&Qutf_translate_cjk_load_tables);
+  
+  Qutf_subst_table_for_decode = intern ("utf-subst-table-for-decode");
+  staticpro (&Qutf_subst_table_for_decode);
+  
+  Qtranslation_hash_table = intern ("translation-hash-table");
+  staticpro (&Qutf_subst_table_for_decode);
+
+  Qutf_translation_table_for_decode = intern ("utf-translation-table-for-decode");
+  staticpro (&Qutf_translation_table_for_decode);
+  
+  Qtranslation_table = intern ("translation-table");
+  staticpro (&Qtranslation_table);
 }
 
 /* arch-tag: a0d02733-0f96-4844-a659-9fd53c4f414d

[-- Attachment #5: Type: text/plain, Size: 87 bytes --]



    Oliver
-- 
16 Floréal an 214 de la Révolution
Liberté, Egalité, Fraternité!

[-- Attachment #6: Type: text/plain, Size: 142 bytes --]

_______________________________________________
Emacs-devel mailing list
Emacs-devel@gnu.org
http://lists.gnu.org/mailman/listinfo/emacs-devel

  reply	other threads:[~2006-05-05 17:23 UTC|newest]

Thread overview: 202+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-04-29 15:35 [PATCH] Unicode Lisp reader escapes Aidan Kehoe
2006-04-29 23:26 ` Stefan Monnier
2006-04-30  8:26   ` Aidan Kehoe
2006-04-30  3:04 ` Richard Stallman
2006-04-30  8:14   ` Aidan Kehoe
2006-04-30 20:53     ` Richard Stallman
2006-04-30 21:04       ` Andreas Schwab
2006-04-30 21:57         ` Aidan Kehoe
2006-04-30 22:14           ` Andreas Schwab
2006-05-01 18:32         ` Richard Stallman
2006-05-01 19:03           ` Oliver Scholz
2006-05-02  4:45             ` Richard Stallman
2006-05-02  0:46           ` Kenichi Handa
2006-05-02  6:41           ` Aidan Kehoe
2006-05-02 21:36             ` Richard Stallman
2006-04-30 21:56       ` Aidan Kehoe
2006-05-01  1:44         ` Miles Bader
2006-05-01  3:12           ` Stefan Monnier
2006-05-01  3:41             ` Miles Bader
2006-05-01 12:29               ` Stefan Monnier
2006-05-05 23:15       ` Juri Linkov
2006-05-06 23:36         ` Richard Stallman
2006-05-09 20:43           ` Juri Linkov
2006-05-11  3:44             ` Richard Stallman
2006-05-11 12:03               ` Juri Linkov
2006-05-11 13:16                 ` Kenichi Handa
2006-05-12  4:15                 ` Richard Stallman
2006-06-03 18:44                   ` Aidan Kehoe
     [not found]                   ` <17537.54719.354843.89030@parhasard.net>
     [not found]                     ` <ufyieqj0v.fsf@gnu.org>
2006-06-15 18:38                       ` Aidan Kehoe
2006-06-17 18:57                         ` Eli Zaretskii
2006-06-18 16:11                           ` Aidan Kehoe
2006-06-18 19:55                             ` Eli Zaretskii
2006-06-20  2:37                               ` Kenichi Handa
2006-06-20 17:56                                 ` Richard Stallman
2006-06-23 18:35                                 ` Aidan Kehoe
2006-06-24  6:50                                   ` Eli Zaretskii
2006-05-02  6:43 ` Kenichi Handa
2006-05-02  7:00   ` Aidan Kehoe
2006-05-02 10:45     ` Eli Zaretskii
2006-05-02 11:13       ` Aidan Kehoe
2006-05-02 19:31         ` Eli Zaretskii
2006-05-02 20:25           ` Aidan Kehoe
2006-05-02 22:16             ` Oliver Scholz
2006-05-02 11:33     ` Kenichi Handa
2006-05-02 22:50       ` Aidan Kehoe
2006-05-03  7:43         ` Kenichi Handa
2006-05-03 17:21         ` Kevin Rodgers
2006-05-03 18:51           ` Andreas Schwab
2006-05-04 21:14             ` Aidan Kehoe
2006-05-08  1:31               ` Kenichi Handa
2006-05-08  6:54                 ` Aidan Kehoe
2006-05-08 13:55                 ` Stefan Monnier
2006-05-08 14:24                   ` Aidan Kehoe
2006-05-08 15:32                     ` Stefan Monnier
2006-05-08 16:39                       ` Aidan Kehoe
2006-05-08 17:39                         ` Stefan Monnier
2006-05-09  7:04                           ` Aidan Kehoe
2006-05-09 19:05                             ` Eli Zaretskii
2006-05-10  6:05                               ` Aidan Kehoe
2006-05-10 17:49                                 ` Eli Zaretskii
2006-05-10 21:37                                   ` Luc Teirlinck
2006-05-11  3:45                                     ` Eli Zaretskii
2006-05-10 21:48                                   ` Luc Teirlinck
2006-05-11  1:08                                   ` Luc Teirlinck
2006-05-11  2:29                                     ` Luc Teirlinck
2006-05-11  3:46                                   ` Richard Stallman
2006-05-09  0:36                   ` Kenichi Handa
2006-05-02 10:36   ` Eli Zaretskii
2006-05-02 10:59     ` Aidan Kehoe
2006-05-02 19:26       ` Eli Zaretskii
2006-05-03  2:59     ` Kenichi Handa
2006-05-03  8:47       ` Eli Zaretskii
2006-05-03 14:21         ` Stefan Monnier
2006-05-03 18:26           ` Eli Zaretskii
2006-05-03 21:12             ` Ken Raeburn
2006-05-04 14:17             ` Richard Stallman
2006-05-04 16:41               ` Aidan Kehoe
2006-05-05 10:39                 ` Eli Zaretskii
2006-05-05 16:35                   ` Aidan Kehoe
2006-05-05 19:05                 ` Richard Stallman
2006-05-05 19:20                   ` Aidan Kehoe
2006-05-05 19:57                     ` Aidan Kehoe
2006-05-06 14:25                       ` Richard Stallman
2006-05-06 17:26                         ` Aidan Kehoe
2006-05-07  5:01                           ` Richard Stallman
2006-05-07  6:38                             ` Aidan Kehoe
2006-05-07  7:00                               ` David Kastrup
2006-05-07  7:15                                 ` Aidan Kehoe
2006-05-07 16:50                             ` Aidan Kehoe
2006-05-08 22:28                               ` Richard Stallman
2006-05-04  1:33           ` Kenichi Handa
2006-05-04  8:23             ` Oliver Scholz
2006-05-04 11:57               ` Kim F. Storm
2006-05-04 12:18                 ` Stefan Monnier
2006-05-04 12:21                   ` Kim F. Storm
2006-05-04 16:31                   ` Eli Zaretskii
2006-05-04 21:40                     ` Stefan Monnier
2006-05-05 10:25                       ` Eli Zaretskii
2006-05-05 12:31                         ` Oliver Scholz
2006-05-05 18:08                           ` Stuart D. Herring
2006-05-05 13:05                         ` Stefan Monnier
2006-05-05 17:23                           ` Oliver Scholz [this message]
2006-05-04 13:07                 ` Oliver Scholz
2006-05-04 16:32             ` Eli Zaretskii
2006-05-04 20:55               ` Aidan Kehoe
2006-05-05  9:33                 ` Oliver Scholz
2006-05-05 10:02                   ` Oliver Scholz
2006-05-05 18:33                   ` Aidan Kehoe
2006-05-05 18:42                     ` Oliver Scholz
2006-05-05 21:37                     ` Eli Zaretskii
2006-05-06 14:24                   ` Richard Stallman
2006-05-06 15:01                     ` Oliver Scholz
     [not found]                     ` <877j4z5had.fsf@gmx.de>
2006-05-07  5:00                       ` Richard Stallman
2006-05-07 12:38                         ` Kenichi Handa
2006-05-07 21:26                           ` Oliver Scholz
2006-05-08  1:14                             ` Kenichi Handa
2006-05-08 22:29                             ` Richard Stallman
2006-05-09  3:42                               ` Eli Zaretskii
2006-05-09 20:41                                 ` Richard Stallman
2006-05-09 21:03                                   ` Stefan Monnier
2006-05-10  3:33                                   ` Eli Zaretskii
2006-05-09  5:13                               ` Kenichi Handa
2006-05-10  3:20                                 ` Richard Stallman
2006-05-10  5:37                                   ` Kenichi Handa
2006-05-10  7:22                                     ` Stefan Monnier
2006-05-11  3:45                                       ` Richard Stallman
2006-05-11 12:41                                         ` Stefan Monnier
2006-05-11 12:51                                           ` Kenichi Handa
2006-05-11 16:46                                             ` Stefan Monnier
2006-05-11  3:44                                     ` Richard Stallman
2006-05-11  3:44                                     ` Richard Stallman
2006-05-11  7:31                                       ` Kenichi Handa
2006-05-12  4:14                                         ` Richard Stallman
2006-05-12  5:26                                           ` Kenichi Handa
2006-05-13  4:52                                             ` Richard Stallman
2006-05-13 13:25                                               ` Stefan Monnier
2006-05-13 20:41                                                 ` Richard Stallman
2006-05-14 13:32                                                   ` Stefan Monnier
2006-05-14 23:29                                                     ` Richard Stallman
2006-05-15  0:55                                                       ` Stefan Monnier
2006-05-15  2:49                                                         ` Oliver Scholz
2006-05-15  3:27                                                           ` Stefan Monnier
2006-05-15 10:20                                                             ` Oliver Scholz
2006-05-15 11:12                                                               ` Oliver Scholz
2006-05-15 20:37                                                           ` Richard Stallman
2006-05-16  9:49                                                             ` Oliver Scholz
2006-05-16 11:16                                                               ` Kim F. Storm
2006-05-16 11:39                                                                 ` Romain Francoise
2006-05-16 11:58                                                                 ` Oliver Scholz
2006-05-16 14:24                                                                   ` Kim F. Storm
2006-05-17  3:45                                                                   ` Richard Stallman
2006-05-17  8:37                                                                     ` Oliver Scholz
2006-05-17 20:09                                                                       ` Richard Stallman
2006-05-17 12:37                                                                     ` Oliver Scholz
2006-05-19  2:05                                                                       ` Richard Stallman
2006-05-19  8:47                                                                         ` Oliver Scholz
2006-05-18  1:09                                                                     ` Kenichi Handa
2006-05-21  0:57                                                                       ` Richard Stallman
2006-05-22  1:33                                                                         ` Kenichi Handa
2006-05-22 15:12                                                                           ` Richard Stallman
2006-05-23  1:05                                                                             ` Kenichi Handa
2006-05-23  5:18                                                                               ` Juri Linkov
2006-05-24  2:18                                                                                 ` Richard Stallman
2006-06-02  6:49                                                                                   ` Kenichi Handa
2006-06-02  8:00                                                                                     ` Kim F. Storm
2006-06-02  9:27                                                                                     ` Juri Linkov
2006-06-02 10:50                                                                                       ` Eli Zaretskii
2006-06-02 11:39                                                                                       ` Kenichi Handa
2006-06-02 12:12                                                                                         ` Juri Linkov
2006-06-02 22:39                                                                                       ` Richard Stallman
2006-06-03  6:42                                                                                         ` Juri Linkov
2006-06-04  2:23                                                                                           ` Richard Stallman
2006-06-05  7:24                                                                                             ` Kenichi Handa
2006-06-05 21:31                                                                                               ` Richard Stallman
2006-06-07  1:24                                                                                                 ` Kenichi Handa
2006-06-02 22:39                                                                                     ` Richard Stallman
2006-05-24  2:17                                                                               ` Richard Stallman
2006-05-17 15:15                                                                   ` Stefan Monnier
2006-05-17  3:45                                                                 ` Richard Stallman
2006-05-17  3:45                                                               ` Richard Stallman
2006-05-17  8:53                                                                 ` Oliver Scholz
2006-05-17 20:09                                                                   ` Richard Stallman
2006-05-18  9:12                                                                     ` Oliver Scholz
2006-05-15 20:37                                                         ` Richard Stallman
2006-05-15  5:13                                               ` Kenichi Handa
2006-05-15  8:06                                                 ` Kim F. Storm
2006-05-15  9:04                                                   ` Andreas Schwab
2006-05-15 20:38                                                   ` Richard Stallman
2006-05-15 14:08                                                 ` Stefan Monnier
2006-05-15 20:37                                                 ` Richard Stallman
2006-05-16 10:07                                                   ` Oliver Scholz
2006-05-18  0:31                                                   ` Kenichi Handa
2006-05-11  9:44                                       ` Oliver Scholz
2006-05-08  7:36                           ` Richard Stallman
2006-05-08  7:50                             ` Kenichi Handa
2006-05-05 19:05               ` Richard Stallman
2006-05-05 21:43                 ` Eli Zaretskii
2006-05-06 14:25                   ` Richard Stallman
2006-05-04  1:26         ` Kenichi Handa
     [not found] <E1FaJ0b-0008G8-8u@monty-python.gnu.org>
2006-04-30 21:16 ` Jonathan Yavner
2006-05-01 18:32   ` Richard Stallman
2006-05-02  5:03     ` Jonathan Yavner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='e3g1ok$c4i$1@sea.gmane.org' \
    --to=alkibiades@gmx.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this external index

	https://git.savannah.gnu.org/cgit/emacs.git
	https://git.savannah.gnu.org/cgit/emacs/org-mode.git

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.