From: Stefan Monnier <monnier@iro.umontreal.ca>
Subject: utf-8.el
Date: Tue, 18 Jan 2005 11:37:26 -0500 [thread overview]
Message-ID: <jwvpt02zp5h.fsf-monnier+emacs@gnu.org> (raw)
Does anyone see a problem with the simple patch below?
Also, could anyone confirm that the docstring of mule-utf-8 is correct in
saying that invalid utf-8 sequences are not always correctly preserved?
Why is that? Can't we fix it?
Also could anyone explain to me why `utf-8-compose' needs to lookup the
hashtable (get 'utf-subst-table-for-decode 'translation-hash-table), since
it looks to me like ccl-decode-mule-utf-8 already takes care of decoding
chars that are in this table. I also don't understand the following part of
the code:
(if (= l 2)
(put-text-property (point) (min (point-max) (+ l (point)))
'display (format "\\%03o" ch))
(compose-region (point) (+ l (point)) ?�))
what does it mean for l (the number of bytes) to be equal to 2?
Stefan
--- orig/lisp/international/utf-8.el
+++ mod/lisp/international/utf-8.el
@@ -2,7 +2,7 @@
;; Copyright (C) 2001, 2004 Electrotechnical Laboratory, JAPAN.
;; Licensed to the Free Software Foundation.
-;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
+;; Copyright (C) 2001, 2002, 2005 Free Software Foundation, Inc.
;; Author: TAKAHASHI Naoto <ntakahas@m17n.org>
;; Maintainer: FSF
@@ -259,7 +259,7 @@
(funcall decode-char-no-trans (car x))
(funcall decode-char-no-trans (cdr x))))
ranges "")))
- ;; These forces loading and settting tables for
+ ;; This forces loading and setting tables for
;; utf-translate-cjk-mode.
(setq utf-translate-cjk-lang-env nil
ucs-mule-cjk-to-unicode (make-hash-table :test 'eq)
@@ -951,10 +951,7 @@
(save-excursion
(save-restriction
(narrow-to-region (point) (+ (point) length))
- ;; Can't do eval-when-compile to insert a multibyte constant
- ;; version of the string in the loop, since it's always loaded as
- ;; unibyte from a byte-compiled file.
- (let ((range (string-as-multibyte "^\xc0-\xc3\xe1-\xf7"))
+ (let ((range "^\xc0-\xc3\xe1-\xf7")
(buffer-multibyte enable-multibyte-characters)
hash-table ch)
(set-buffer-multibyte t)
@@ -1036,8 +1033,7 @@
mule-unicode-0100-24ff
mule-unicode-2500-33ff
mule-unicode-e000-ffff
- ,@(if utf-translate-cjk-mode
- utf-translate-cjk-charsets))
+ ,@utf-translate-cjk-charsets)
(mime-charset . utf-8)
(coding-category . coding-category-utf-8)
(valid-codes (0 . 255))
@@ -1054,23 +1050,23 @@
;; I think this needs special private charsets defined for the
;; untranslated sequences, if it's going to work well.
-;;; (defun utf-8-compose-function (pos to pattern &optional string)
-;;; (let* ((prop (get-char-property pos 'composition string))
-;;; (l (and prop (- (cadr prop) (car prop)))))
-;;; (cond ((and l (> l (- to pos)))
-;;; (delete-region pos to))
-;;; ((and (> (char-after pos) 224)
-;;; (< (char-after pos) 256)
-;;; (save-restriction
-;;; (narrow-to-region pos to)
-;;; (utf-8-compose)))
-;;; t))))
-
-;;; (dotimes (i 96)
-;;; (aset composition-function-table
-;;; (+ 128 i)
-;;; `((,(string-as-multibyte "[\200-\237\240-\377]")
-;;; . utf-8-compose-function))))
+;; (defun utf-8-compose-function (pos to pattern &optional string)
+;; (let* ((prop (get-char-property pos 'composition string))
+;; (l (and prop (- (cadr prop) (car prop)))))
+;; (cond ((and l (> l (- to pos)))
+;; (delete-region pos to))
+;; ((and (> (char-after pos) 224)
+;; (< (char-after pos) 256)
+;; (save-restriction
+;; (narrow-to-region pos to)
+;; (utf-8-compose)))
+;; t))))
+
+;; (dotimes (i 96)
+;; (aset composition-function-table
+;; (+ 128 i)
+;; `((,(string-as-multibyte "[\200-\237\240-\377]")
+;; . utf-8-compose-function))))
;; arch-tag: b08735b7-753b-4ae6-b754-0f3efe4515c5
;;; utf-8.el ends here
next reply other threads:[~2005-01-18 16:37 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-01-18 16:37 Stefan Monnier [this message]
2005-01-19 2:51 ` utf-8.el Kenichi Handa
2005-01-19 4:37 ` utf-8.el Stefan Monnier
2005-01-19 6:15 ` utf-8.el Kenichi Handa
2005-01-19 23:03 ` utf-8.el Stefan Monnier
2005-01-19 23:47 ` utf-8.el Kenichi Handa
2005-01-19 23:52 ` utf-8.el Stefan Monnier
2005-01-20 1:00 ` utf-8.el Kenichi Handa
2005-01-19 10:51 ` utf-8.el Andreas Schwab
2005-01-19 13:09 ` utf-8.el Kenichi Handa
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://www.gnu.org/software/emacs/
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=jwvpt02zp5h.fsf-monnier+emacs@gnu.org \
--to=monnier@iro.umontreal.ca \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://git.savannah.gnu.org/cgit/emacs.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).