all messages for Emacs-related lists mirrored at yhetil.org
 help / color / mirror / code / Atom feed
* utf-8.el
@ 2005-01-18 16:37 Stefan Monnier
  2005-01-19  2:51 ` utf-8.el Kenichi Handa
  0 siblings, 1 reply; 10+ messages in thread
From: Stefan Monnier @ 2005-01-18 16:37 UTC (permalink / raw)


Does anyone see a problem with the simple patch below?
Also, could anyone confirm that the docstring of mule-utf-8 is correct in
saying that invalid utf-8 sequences are not always correctly preserved?
Why is that?  Can't we fix it?

Also could anyone explain to me why `utf-8-compose' needs to lookup the
hashtable (get 'utf-subst-table-for-decode 'translation-hash-table), since
it looks to me like ccl-decode-mule-utf-8 already takes care of decoding
chars that are in this table.  I also don't understand the following part of
the code:

	  (if (= l 2)
	      (put-text-property (point) (min (point-max) (+ l (point)))
				 'display (format "\\%03o" ch))
	    (compose-region (point) (+ l (point)) ?�))

what does it mean for l (the number of bytes) to be equal to 2?


        Stefan


--- orig/lisp/international/utf-8.el
+++ mod/lisp/international/utf-8.el
@@ -2,7 +2,7 @@
 
 ;; Copyright (C) 2001, 2004 Electrotechnical Laboratory, JAPAN.
 ;; Licensed to the Free Software Foundation.
-;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
+;; Copyright (C) 2001, 2002, 2005  Free Software Foundation, Inc.
 
 ;; Author: TAKAHASHI Naoto  <ntakahas@m17n.org>
 ;; Maintainer: FSF
@@ -259,7 +259,7 @@
 				 (funcall decode-char-no-trans (car x))
 				 (funcall decode-char-no-trans (cdr x))))
 		     ranges "")))
-  ;; These forces loading and settting tables for
+  ;; This forces loading and setting tables for
   ;; utf-translate-cjk-mode.
   (setq utf-translate-cjk-lang-env nil
 	ucs-mule-cjk-to-unicode (make-hash-table :test 'eq)
@@ -951,10 +951,7 @@
   (save-excursion
     (save-restriction
       (narrow-to-region (point) (+ (point) length))
-      ;; Can't do eval-when-compile to insert a multibyte constant
-      ;; version of the string in the loop, since it's always loaded as
-      ;; unibyte from a byte-compiled file.
-      (let ((range (string-as-multibyte "^\xc0-\xc3\xe1-\xf7"))
+      (let ((range "^\xc0-\xc3\xe1-\xf7")
 	    (buffer-multibyte enable-multibyte-characters)
 	    hash-table ch)
 	(set-buffer-multibyte t)
@@ -1036,8 +1033,7 @@
     mule-unicode-0100-24ff
     mule-unicode-2500-33ff
     mule-unicode-e000-ffff
-    ,@(if utf-translate-cjk-mode
-	  utf-translate-cjk-charsets))
+    ,@utf-translate-cjk-charsets)
    (mime-charset . utf-8)
    (coding-category . coding-category-utf-8)
    (valid-codes (0 . 255))
@@ -1054,23 +1050,23 @@
 ;; I think this needs special private charsets defined for the
 ;; untranslated sequences, if it's going to work well.
 
-;;; (defun utf-8-compose-function (pos to pattern &optional string)
-;;;   (let* ((prop (get-char-property pos 'composition string))
-;;; 	 (l (and prop (- (cadr prop) (car prop)))))
-;;;     (cond ((and l (> l (- to pos)))
-;;; 	   (delete-region pos to))
-;;; 	  ((and (> (char-after pos) 224)
-;;; 		(< (char-after pos) 256)
-;;; 		(save-restriction
-;;; 		  (narrow-to-region pos to)
-;;; 		  (utf-8-compose)))
-;;; 	   t))))
-
-;;; (dotimes (i 96)
-;;;   (aset composition-function-table
-;;; 	(+ 128 i)
-;;; 	`((,(string-as-multibyte "[\200-\237\240-\377]")
-;;; 	   . utf-8-compose-function))))
+;; (defun utf-8-compose-function (pos to pattern &optional string)
+;;   (let* ((prop (get-char-property pos 'composition string))
+;; 	 (l (and prop (- (cadr prop) (car prop)))))
+;;     (cond ((and l (> l (- to pos)))
+;; 	   (delete-region pos to))
+;; 	  ((and (> (char-after pos) 224)
+;; 		(< (char-after pos) 256)
+;; 		(save-restriction
+;; 		  (narrow-to-region pos to)
+;; 		  (utf-8-compose)))
+;; 	   t))))
+
+;; (dotimes (i 96)
+;;   (aset composition-function-table
+;; 	(+ 128 i)
+;; 	`((,(string-as-multibyte "[\200-\237\240-\377]")
+;; 	   . utf-8-compose-function))))
 
 ;; arch-tag: b08735b7-753b-4ae6-b754-0f3efe4515c5
 ;;; utf-8.el ends here

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2005-01-20  1:00 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-01-18 16:37 utf-8.el Stefan Monnier
2005-01-19  2:51 ` utf-8.el Kenichi Handa
2005-01-19  4:37   ` utf-8.el Stefan Monnier
2005-01-19  6:15     ` utf-8.el Kenichi Handa
2005-01-19 23:03       ` utf-8.el Stefan Monnier
2005-01-19 23:47         ` utf-8.el Kenichi Handa
2005-01-19 23:52           ` utf-8.el Stefan Monnier
2005-01-20  1:00             ` utf-8.el Kenichi Handa
2005-01-19 10:51   ` utf-8.el Andreas Schwab
2005-01-19 13:09     ` utf-8.el Kenichi Handa

Code repositories for project(s) associated with this external index

	https://git.savannah.gnu.org/cgit/emacs.git
	https://git.savannah.gnu.org/cgit/emacs/org-mode.git

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.