From: Stefan Monnier <monnier@iro.umontreal.ca>
To: rms@gnu.org
Cc: emacs-devel@gnu.org, Kenichi Handa <handa@m17n.org>
Subject: Re: (aset UNIBYTE-STRING MULTIBYTE-CHAR)
Date: Sat, 16 Feb 2008 09:33:24 -0500 [thread overview]
Message-ID: <jwvbq6hkm8t.fsf-monnier+emacs@gnu.org> (raw)
In-Reply-To: <E1JQFz2-0005qg-M7@fencepost.gnu.org> (Richard Stallman's message of "Sat, 16 Feb 2008 00:53:08 -0500")
> If we are going to inhibit aset on multibyte strings, I think we
> should inhibit aset on any strings to avoid a further confusion.
> I think someone should try making it work.
> The way I suggested should not be terribly hard.
The problem is the following: while it can be made to work, it will be
inefficient. If we just make it work, the callers will never get to
know that they're doing things in a terribly inefficient way. The real
fix is to change the caller.
BTW, I suggest the patch below to fix one such caller.
Stefan
--- orig/src/casefiddle.c
+++ mod/src/casefiddle.c
@@ -75,23 +76,18 @@
return obj;
}
- if (STRINGP (obj))
+ if (!STRINGP (obj))
+ wrong_type_argument (Qchar_or_string_p, obj);
+ else if (STRING_UNIBYTE (obj))
{
- int multibyte = STRING_MULTIBYTE (obj);
- int i, i_byte, len;
- int size = SCHARS (obj);
+ EMACS_INT i;
+ EMACS_INT size = SCHARS (obj);
obj = Fcopy_sequence (obj);
- for (i = i_byte = 0; i < size; i++, i_byte += len)
+ for (i = 0; i < size; i++)
{
- if (multibyte)
- c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, 0, len);
- else
- {
- c = SREF (obj, i_byte);
- len = 1;
- MAKE_CHAR_MULTIBYTE (c);
- }
+ c = SREF (obj, i);
+ MAKE_CHAR_MULTIBYTE (c);
c1 = c;
if (inword && flag != CASE_CAPITALIZE_UP)
c = DOWNCASE (c);
@@ -102,24 +98,51 @@
inword = (SYNTAX (c) == Sword);
if (c != c1)
{
- if (! multibyte)
- {
- MAKE_CHAR_UNIBYTE (c);
- SSET (obj, i_byte, c);
- }
- else if (ASCII_CHAR_P (c1) && ASCII_CHAR_P (c))
- SSET (obj, i_byte, c);
- else
- {
- Faset (obj, make_number (i), make_number (c));
- i_byte += CHAR_BYTES (c) - len;
- }
+ MAKE_CHAR_UNIBYTE (c);
+ if (c < 0 || c > 255)
+ error ("Non-unibyte char in unibyte string");
+ SSET (obj, i, c);
}
}
return obj;
}
+ else
+ {
+ EMACS_INT i, i_byte, len;
+ EMACS_INT size = SCHARS (obj);
+ USE_SAFE_ALLOCA;
+ unsigned char *dst, *o;
+ /* Over-allocate by 12%: this is a minor overhead, but should be
+ sufficient in 99.999% of the cases to avoid a reallocation. */
+ EMACS_INT o_size = SBYTES (obj) + SBYTES (obj) / 8 + MAX_MULTIBYTE_LENGTH;
+ SAFE_ALLOCA (dst, void *, o_size);
+ o = dst;
- wrong_type_argument (Qchar_or_string_p, obj);
+ for (i = i_byte = 0; i < size; i++, i_byte += len)
+ {
+ if ((o - dst) + MAX_MULTIBYTE_LENGTH > o_size)
+ { /* Not enough space for the next char: grow the destination. */
+ unsigned char *old_dst = dst;
+ o_size += o_size; /* Probably overkill, but extremely rare. */
+ SAFE_ALLOCA (dst, void *, o_size);
+ bcopy (old_dst, dst, o - old_dst);
+ o = dst + (o - old_dst);
+ }
+ c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, 0, len);
+ if (inword && flag != CASE_CAPITALIZE_UP)
+ c = DOWNCASE (c);
+ else if (!UPPERCASEP (c)
+ && (!inword || flag != CASE_CAPITALIZE_UP))
+ c = UPCASE1 (c);
+ if ((int) flag >= (int) CASE_CAPITALIZE)
+ inword = (SYNTAX (c) == Sword);
+ o += CHAR_STRING (c, o);
+ }
+ eassert (o - dst <= o_size);
+ obj = make_multibyte_string (dst, size, o - dst);
+ SAFE_FREE ();
+ return obj;
+ }
}
DEFUN ("upcase", Fupcase, Supcase, 1, 1, 0,
@@ -329,10 +352,10 @@
return Qnil;
}
\f
-Lisp_Object
+static Lisp_Object
operate_on_word (arg, newpoint)
Lisp_Object arg;
- int *newpoint;
+ EMACS_INT *newpoint;
{
Lisp_Object val;
int farend;
@@ -358,7 +381,7 @@
Lisp_Object arg;
{
Lisp_Object beg, end;
- int newpoint;
+ EMACS_INT newpoint;
XSETFASTINT (beg, PT);
end = operate_on_word (arg, &newpoint);
casify_region (CASE_UP, beg, end);
@@ -373,7 +396,7 @@
Lisp_Object arg;
{
Lisp_Object beg, end;
- int newpoint;
+ EMACS_INT newpoint;
XSETFASTINT (beg, PT);
end = operate_on_word (arg, &newpoint);
casify_region (CASE_DOWN, beg, end);
@@ -390,7 +413,7 @@
Lisp_Object arg;
{
Lisp_Object beg, end;
- int newpoint;
+ EMACS_INT newpoint;
XSETFASTINT (beg, PT);
end = operate_on_word (arg, &newpoint);
casify_region (CASE_CAPITALIZE, beg, end);
next prev parent reply other threads:[~2008-02-16 14:33 UTC|newest]
Thread overview: 43+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-02-13 2:36 (aset UNIBYTE-STRING MULTIBYTE-CHAR) Kenichi Handa
2008-02-13 2:49 ` Stefan Monnier
2008-02-13 3:48 ` Kenichi Handa
2008-02-13 15:33 ` Stefan Monnier
2008-02-13 18:06 ` Stephen J. Turnbull
2008-02-13 19:33 ` Stefan Monnier
2008-02-13 22:49 ` Miles Bader
2008-02-14 1:11 ` Stephen J. Turnbull
2008-02-14 1:17 ` Miles Bader
2008-02-14 1:40 ` Stefan Monnier
2008-02-14 1:49 ` Miles Bader
2008-02-14 18:10 ` Richard Stallman
2008-02-14 22:40 ` David Kastrup
2008-02-15 1:08 ` Stephen J. Turnbull
2008-02-15 1:17 ` Miles Bader
2008-02-15 7:27 ` David Kastrup
2008-02-15 12:58 ` Richard Stallman
2008-02-14 23:37 ` Leo
2008-02-15 12:59 ` Richard Stallman
2008-02-14 4:20 ` Stephen J. Turnbull
2008-02-14 4:42 ` Richard Stallman
2008-02-15 1:39 ` Kenichi Handa
2008-02-15 4:27 ` Stefan Monnier
2008-02-15 8:42 ` Eli Zaretskii
2008-02-15 8:53 ` Miles Bader
2008-02-16 12:55 ` Eli Zaretskii
2008-02-16 5:53 ` Richard Stallman
2008-02-16 14:33 ` Stefan Monnier [this message]
2008-02-17 20:29 ` Richard Stallman
2008-02-18 1:15 ` Stefan Monnier
2008-02-18 4:00 ` Kenichi Handa
2008-02-18 17:31 ` Richard Stallman
2008-02-13 22:01 ` Richard Stallman
2008-02-13 23:13 ` Miles Bader
-- strict thread matches above, loose matches on Subject: below --
2008-04-15 7:11 Kenichi Handa
2008-04-15 15:52 ` Stefan Monnier
2008-04-17 1:13 ` Kenichi Handa
2008-05-07 19:31 Harald Hanche-Olsen
2008-05-14 6:54 ` Harald Hanche-Olsen
2008-05-14 12:22 ` Stefan Monnier
2008-05-14 12:50 ` Harald Hanche-Olsen
2008-05-15 1:18 ` Stefan Monnier
2008-05-15 6:11 ` Harald Hanche-Olsen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=jwvbq6hkm8t.fsf-monnier+emacs@gnu.org \
--to=monnier@iro.umontreal.ca \
--cc=emacs-devel@gnu.org \
--cc=handa@m17n.org \
--cc=rms@gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this external index
https://git.savannah.gnu.org/cgit/emacs.git
https://git.savannah.gnu.org/cgit/emacs/org-mode.git
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.