all messages for Emacs-related lists mirrored at yhetil.org
 help / color / mirror / code / Atom feed
From: Stefan Monnier <monnier@iro.umontreal.ca>
To: rms@gnu.org
Cc: emacs-devel@gnu.org, Kenichi Handa <handa@m17n.org>
Subject: Re: (aset UNIBYTE-STRING MULTIBYTE-CHAR)
Date: Sat, 16 Feb 2008 09:33:24 -0500	[thread overview]
Message-ID: <jwvbq6hkm8t.fsf-monnier+emacs@gnu.org> (raw)
In-Reply-To: <E1JQFz2-0005qg-M7@fencepost.gnu.org> (Richard Stallman's message of "Sat, 16 Feb 2008 00:53:08 -0500")

>       If we are going to inhibit aset on multibyte strings, I think we
>     should inhibit aset on any strings to avoid a further confusion.

> I think someone should try making it work.
> The way I suggested should not be terribly hard.

The problem is the following: while it can be made to work, it will be
inefficient.  If we just make it work, the callers will never get to
know that they're doing things in a terribly inefficient way.  The real
fix is to change the caller.

BTW, I suggest the patch below to fix one such caller.


        Stefan


--- orig/src/casefiddle.c
+++ mod/src/casefiddle.c
@@ -75,23 +76,18 @@
       return obj;
     }
 
-  if (STRINGP (obj))
+  if (!STRINGP (obj))
+    wrong_type_argument (Qchar_or_string_p, obj);
+  else if (STRING_UNIBYTE (obj))
     {
-      int multibyte = STRING_MULTIBYTE (obj);
-      int i, i_byte, len;
-      int size = SCHARS (obj);
+      EMACS_INT i;
+      EMACS_INT size = SCHARS (obj);
 
       obj = Fcopy_sequence (obj);
-      for (i = i_byte = 0; i < size; i++, i_byte += len)
+      for (i = 0; i < size; i++)
 	{
-	  if (multibyte)
-	    c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, 0, len);
-	  else
-	    {
-	      c = SREF (obj, i_byte);
-	      len = 1;
-	      MAKE_CHAR_MULTIBYTE (c);
-	    }
+	  c = SREF (obj, i);
+	  MAKE_CHAR_MULTIBYTE (c);
 	  c1 = c;
 	  if (inword && flag != CASE_CAPITALIZE_UP)
 	    c = DOWNCASE (c);
@@ -102,24 +98,51 @@
 	    inword = (SYNTAX (c) == Sword);
 	  if (c != c1)
 	    {
-	      if (! multibyte)
-		{
-		  MAKE_CHAR_UNIBYTE (c);
-		  SSET (obj, i_byte, c);
-		}
-	      else if (ASCII_CHAR_P (c1) && ASCII_CHAR_P (c))
-		SSET (obj, i_byte,  c);
-	      else
-		{
-		  Faset (obj, make_number (i), make_number (c));
-		  i_byte += CHAR_BYTES (c) - len;
-		}
+	      MAKE_CHAR_UNIBYTE (c);
+	      if (c < 0 || c > 255)
+		error ("Non-unibyte char in unibyte string");
+	      SSET (obj, i, c);
 	    }
 	}
       return obj;
     }
+  else
+    {
+      EMACS_INT i, i_byte, len;
+      EMACS_INT size = SCHARS (obj);
+      USE_SAFE_ALLOCA;
+      unsigned char *dst, *o;
+      /* Over-allocate by 12%: this is a minor overhead, but should be
+	 sufficient in 99.999% of the cases to avoid a reallocation.  */
+      EMACS_INT o_size = SBYTES (obj) + SBYTES (obj) / 8 + MAX_MULTIBYTE_LENGTH;
+      SAFE_ALLOCA (dst, void *, o_size);
+      o = dst;
 
-  wrong_type_argument (Qchar_or_string_p, obj);
+      for (i = i_byte = 0; i < size; i++, i_byte += len)
+	{
+	  if ((o - dst) + MAX_MULTIBYTE_LENGTH > o_size)
+	    { /* Not enough space for the next char: grow the destination.  */
+	      unsigned char *old_dst = dst;
+	      o_size += o_size;	/* Probably overkill, but extremely rare.  */
+	      SAFE_ALLOCA (dst, void *, o_size);
+	      bcopy (old_dst, dst, o - old_dst);
+	      o = dst + (o - old_dst);
+	    }
+	  c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, 0, len);
+	  if (inword && flag != CASE_CAPITALIZE_UP)
+	    c = DOWNCASE (c);
+	  else if (!UPPERCASEP (c)
+		   && (!inword || flag != CASE_CAPITALIZE_UP))
+	    c = UPCASE1 (c);
+	  if ((int) flag >= (int) CASE_CAPITALIZE)
+	    inword = (SYNTAX (c) == Sword);
+	  o += CHAR_STRING (c, o);
+	}
+      eassert (o - dst <= o_size);
+      obj = make_multibyte_string (dst, size, o - dst);
+      SAFE_FREE ();
+      return obj;
+    }
 }
 
 DEFUN ("upcase", Fupcase, Supcase, 1, 1, 0,
@@ -329,10 +352,10 @@
   return Qnil;
 }
 \f
-Lisp_Object
+static Lisp_Object
 operate_on_word (arg, newpoint)
      Lisp_Object arg;
-     int *newpoint;
+     EMACS_INT *newpoint;
 {
   Lisp_Object val;
   int farend;
@@ -358,7 +381,7 @@
      Lisp_Object arg;
 {
   Lisp_Object beg, end;
-  int newpoint;
+  EMACS_INT newpoint;
   XSETFASTINT (beg, PT);
   end = operate_on_word (arg, &newpoint);
   casify_region (CASE_UP, beg, end);
@@ -373,7 +396,7 @@
      Lisp_Object arg;
 {
   Lisp_Object beg, end;
-  int newpoint;
+  EMACS_INT newpoint;
   XSETFASTINT (beg, PT);
   end = operate_on_word (arg, &newpoint);
   casify_region (CASE_DOWN, beg, end);
@@ -390,7 +413,7 @@
      Lisp_Object arg;
 {
   Lisp_Object beg, end;
-  int newpoint;
+  EMACS_INT newpoint;
   XSETFASTINT (beg, PT);
   end = operate_on_word (arg, &newpoint);
   casify_region (CASE_CAPITALIZE, beg, end);




  reply	other threads:[~2008-02-16 14:33 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-02-13  2:36 (aset UNIBYTE-STRING MULTIBYTE-CHAR) Kenichi Handa
2008-02-13  2:49 ` Stefan Monnier
2008-02-13  3:48   ` Kenichi Handa
2008-02-13 15:33     ` Stefan Monnier
2008-02-13 18:06       ` Stephen J. Turnbull
2008-02-13 19:33         ` Stefan Monnier
2008-02-13 22:49         ` Miles Bader
2008-02-14  1:11           ` Stephen J. Turnbull
2008-02-14  1:17             ` Miles Bader
2008-02-14  1:40               ` Stefan Monnier
2008-02-14  1:49                 ` Miles Bader
2008-02-14 18:10                 ` Richard Stallman
2008-02-14 22:40                   ` David Kastrup
2008-02-15  1:08                     ` Stephen J. Turnbull
2008-02-15  1:17                       ` Miles Bader
2008-02-15  7:27                         ` David Kastrup
2008-02-15 12:58                     ` Richard Stallman
2008-02-14 23:37                   ` Leo
2008-02-15 12:59                     ` Richard Stallman
2008-02-14  4:20               ` Stephen J. Turnbull
2008-02-14  4:42         ` Richard Stallman
2008-02-15  1:39       ` Kenichi Handa
2008-02-15  4:27         ` Stefan Monnier
2008-02-15  8:42         ` Eli Zaretskii
2008-02-15  8:53           ` Miles Bader
2008-02-16 12:55             ` Eli Zaretskii
2008-02-16  5:53         ` Richard Stallman
2008-02-16 14:33           ` Stefan Monnier [this message]
2008-02-17 20:29             ` Richard Stallman
2008-02-18  1:15               ` Stefan Monnier
2008-02-18  4:00                 ` Kenichi Handa
2008-02-18 17:31                 ` Richard Stallman
2008-02-13 22:01 ` Richard Stallman
2008-02-13 23:13   ` Miles Bader
  -- strict thread matches above, loose matches on Subject: below --
2008-04-15  7:11 Kenichi Handa
2008-04-15 15:52 ` Stefan Monnier
2008-04-17  1:13   ` Kenichi Handa
2008-05-07 19:31 Harald Hanche-Olsen
2008-05-14  6:54 ` Harald Hanche-Olsen
2008-05-14 12:22   ` Stefan Monnier
2008-05-14 12:50     ` Harald Hanche-Olsen
2008-05-15  1:18       ` Stefan Monnier
2008-05-15  6:11         ` Harald Hanche-Olsen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=jwvbq6hkm8t.fsf-monnier+emacs@gnu.org \
    --to=monnier@iro.umontreal.ca \
    --cc=emacs-devel@gnu.org \
    --cc=handa@m17n.org \
    --cc=rms@gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this external index

	https://git.savannah.gnu.org/cgit/emacs.git
	https://git.savannah.gnu.org/cgit/emacs/org-mode.git

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.