unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
* Using empty_string as the only "" string
@ 2007-04-24 16:32 Dmitry Antipov
  2007-04-24 17:05 ` Juanma Barranquero
                   ` (2 more replies)
  0 siblings, 3 replies; 59+ messages in thread
From: Dmitry Antipov @ 2007-04-24 16:32 UTC (permalink / raw)
  To: emacs-devel

[-- Attachment #1: Type: text/plain, Size: 192 bytes --]

Hello all,

probably I've missed something, but what's the reason(s) to have a
lot of "" (zero-length) strings ? Why not uniq them into the only
one ? Here is a way I'm doing this...

Dmitry


[-- Attachment #2: empty_string.patch --]
[-- Type: text/plain, Size: 3698 bytes --]

Index: alloc.c
===================================================================
RCS file: /sources/emacs/emacs/src/alloc.c,v
retrieving revision 1.409
diff -u -r1.409 alloc.c
--- alloc.c	16 Apr 2007 03:09:33 -0000	1.409
+++ alloc.c	24 Apr 2007 15:38:29 -0000
@@ -1947,7 +1947,7 @@
    S->data.  Set S->size to NCHARS and S->size_byte to NBYTES.  Free
    S->data if it was initially non-null.  */
 
-void
+struct Lisp_String *
 allocate_string_data (s, nchars, nbytes)
      struct Lisp_String *s;
      int nchars, nbytes;
@@ -2049,6 +2049,7 @@
     }
 
   consing_since_gc += needed;
+  return s;
 }
 
 
@@ -2493,14 +2494,14 @@
      int nchars, nbytes;
 {
   Lisp_Object string;
-  struct Lisp_String *s;
 
   if (nchars < 0)
     abort ();
+  if (!nbytes)
+    return empty_string;
 
-  s = allocate_string ();
-  allocate_string_data (s, nchars, nbytes);
-  XSETSTRING (string, s);
+  XSETSTRING (string, allocate_string_data (allocate_string (), 
+					    nchars, nbytes));
   string_chars_consed += nbytes;
   return string;
 }
@@ -6469,6 +6470,12 @@
   Qpost_gc_hook = intern ("post-gc-hook");
   staticpro (&Qpost_gc_hook);
 
+  /* Must be initialized before any other possible string
+     allocation can be made, and before syms_of_lread ().  */
+  XSETSTRING (empty_string, allocate_string_data (allocate_string (), 0, 0));
+  STRING_SET_UNIBYTE (empty_string);
+  staticpro (&empty_string);
+
   DEFVAR_LISP ("memory-signal-data", &Vmemory_signal_data,
 	       doc: /* Precomputed `signal' argument for memory-full error.  */);
   /* We build this in advance because if we wait until we need it, we might
Index: emacs.c
===================================================================
RCS file: /sources/emacs/emacs/src/emacs.c,v
retrieving revision 1.401
diff -u -r1.401 emacs.c
--- emacs.c	3 Apr 2007 15:25:28 -0000	1.401
+++ emacs.c	24 Apr 2007 15:38:38 -0000
@@ -2468,9 +2468,6 @@
 The hook is not run in batch mode, i.e., if `noninteractive' is non-nil.  */);
   Vkill_emacs_hook = Qnil;
 
-  empty_string = build_string ("");
-  staticpro (&empty_string);
-
   DEFVAR_INT ("emacs-priority", &emacs_priority,
 	      doc: /* Priority for Emacs to run at.
 This value is effective only if set before Emacs is dumped,
Index: lisp.h
===================================================================
RCS file: /sources/emacs/emacs/src/lisp.h,v
retrieving revision 1.574
diff -u -r1.574 lisp.h
--- lisp.h	17 Mar 2007 18:27:10 -0000	1.574
+++ lisp.h	24 Apr 2007 15:38:42 -0000
@@ -2545,7 +2545,8 @@
 
 /* Defined in alloc.c */
 extern void check_pure_size P_ ((void));
-extern void allocate_string_data P_ ((struct Lisp_String *, int, int));
+extern struct Lisp_String * allocate_string_data P_ ((struct Lisp_String *,
+						      int, int));
 extern void reset_malloc_hooks P_ ((void));
 extern void uninterrupt_malloc P_ ((void));
 extern void malloc_warning P_ ((char *));
Index: lread.c
===================================================================
RCS file: /sources/emacs/emacs/src/lread.c,v
retrieving revision 1.369
diff -u -r1.369 lread.c
--- lread.c	28 Mar 2007 08:16:19 -0000	1.369
+++ lread.c	24 Apr 2007 15:38:47 -0000
@@ -4070,8 +4070,7 @@
 in order to do so.  However, if you want to customize which suffixes
 the loading functions recognize as compression suffixes, you should
 customize `jka-compr-load-suffixes' rather than the present variable.  */);
-  /* We don't use empty_string because it's not initialized yet.  */
-  Vload_file_rep_suffixes = Fcons (build_string (""), Qnil);
+  Vload_file_rep_suffixes = Fcons (empty_string, Qnil);
 
   DEFVAR_BOOL ("load-in-progress", &load_in_progress,
 	       doc: /* Non-nil iff inside of `load'.  */);

[-- Attachment #3: Type: text/plain, Size: 142 bytes --]

_______________________________________________
Emacs-devel mailing list
Emacs-devel@gnu.org
http://lists.gnu.org/mailman/listinfo/emacs-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 16:32 Using " Dmitry Antipov
@ 2007-04-24 17:05 ` Juanma Barranquero
  2007-04-24 18:11   ` Andreas Schwab
  2007-04-25  2:05   ` Richard Stallman
  2007-04-24 17:48 ` Stefan Monnier
  2007-04-25  2:05 ` Richard Stallman
  2 siblings, 2 replies; 59+ messages in thread
From: Juanma Barranquero @ 2007-04-24 17:05 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: emacs-devel

On 4/24/07, Dmitry Antipov <dmantipov@yandex.ru> wrote:

> probably I've missed something, but what's the reason(s) to have a
> lot of "" (zero-length) strings ? Why not uniq them into the only
> one ?

With your patch, (eq "" "") => t

So:

ELISP> (eq (substring "ab" 0) (substring "ab" 0))
nil
ELISP> (eq (substring "ab" 1) (substring "ab" 1))
nil
ELISP> (eq (substring "ab" 2) (substring "ab" 2))
t

That doesn't seem a nice behaviour.

             Juanma

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 16:32 Using " Dmitry Antipov
  2007-04-24 17:05 ` Juanma Barranquero
@ 2007-04-24 17:48 ` Stefan Monnier
  2007-04-25  2:05   ` Richard Stallman
  2007-04-26 14:24   ` Dmitry Antipov
  2007-04-25  2:05 ` Richard Stallman
  2 siblings, 2 replies; 59+ messages in thread
From: Stefan Monnier @ 2007-04-24 17:48 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: emacs-devel

> probably I've missed something, but what's the reason(s) to have a
> lot of "" (zero-length) strings ? Why not uniq them into the only
> one ?

Because it has the disadvantage of leading to subtly different behavior,
while the benefit is at best very marginal, if any.


        Stefan


PS: But if you're interested in such small optimizations, I have another one
in my local Emacs where the Lisp_String data type is changed to:

   struct Lisp_String
     {
       EMACS_INT size;
       EMACS_INT size_byte : BITS_PER_EMACS_INT - 1;
       unsigned inlined : 1;	/* 0 -> ptr, 1 -> chars; in union below.  */
       INTERVAL intervals;		/* text properties in this string */
       union
       {
         unsigned char *ptr;
         unsigned char chars[STRING_MAXINLINE];
       } data;
     };

this way, on 32bit systems, strings of up to 3 bytes can be represented with
just a Lisp_String without any `sdata'.  On 64bit systems, this can be used
for strings up to 7 bytes long (i.e. almost 50% of all allocated strings,
IIRC).  And it can also be used for all the strings in the pure space (no
matter how long), so it saves about 50KB of pure space (can't remember the
exact number, but IIRC it was more than 10KB and less than 100KB).

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 17:05 ` Juanma Barranquero
@ 2007-04-24 18:11   ` Andreas Schwab
  2007-04-24 18:50     ` Juanma Barranquero
  2007-04-25  2:05   ` Richard Stallman
  1 sibling, 1 reply; 59+ messages in thread
From: Andreas Schwab @ 2007-04-24 18:11 UTC (permalink / raw)
  To: Juanma Barranquero; +Cc: Dmitry Antipov, emacs-devel

"Juanma Barranquero" <lekktu@gmail.com> writes:

> With your patch, (eq "" "") => t
>
> So:
>
> ELISP> (eq (substring "ab" 0) (substring "ab" 0))
> nil
> ELISP> (eq (substring "ab" 1) (substring "ab" 1))
> nil
> ELISP> (eq (substring "ab" 2) (substring "ab" 2))
> t
>
> That doesn't seem a nice behaviour.

I see nothing wrong with that.  If you want to compare strings you have to
use equal or string-equal anyway.

Andreas.

-- 
Andreas Schwab, SuSE Labs, schwab@suse.de
SuSE Linux Products GmbH, Maxfeldstraße 5, 90409 Nürnberg, Germany
PGP key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 18:11   ` Andreas Schwab
@ 2007-04-24 18:50     ` Juanma Barranquero
  2007-04-24 21:38       ` Andreas Schwab
                         ` (2 more replies)
  0 siblings, 3 replies; 59+ messages in thread
From: Juanma Barranquero @ 2007-04-24 18:50 UTC (permalink / raw)
  To: Andreas Schwab; +Cc: Dmitry Antipov, emacs-devel

On 4/24/07, Andreas Schwab <schwab@suse.de> wrote:

> I see nothing wrong with that.

I do see it. The issue is not string comparison, but object identity.
It's no different from

(eq 0.0 0.0) => nil

I would be mightily surprised if

  (eq (- 2.0 0.0) (- 2.0 0.0)) => nil
  (eq (- 2.0 1.0) (- 2.0 1.0)) => nil
  (eq (- 2.0 2.0) (- 2.0 2.0)) => t

were true.

             Juanma

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 18:50     ` Juanma Barranquero
@ 2007-04-24 21:38       ` Andreas Schwab
  2007-04-24 21:54         ` Juanma Barranquero
  2007-04-24 21:57         ` David Kastrup
  2007-04-24 21:39       ` Miles Bader
  2007-04-25  2:05       ` Richard Stallman
  2 siblings, 2 replies; 59+ messages in thread
From: Andreas Schwab @ 2007-04-24 21:38 UTC (permalink / raw)
  To: Juanma Barranquero; +Cc: Dmitry Antipov, emacs-devel

"Juanma Barranquero" <lekktu@gmail.com> writes:

> On 4/24/07, Andreas Schwab <schwab@suse.de> wrote:
>
>> I see nothing wrong with that.
>
> I do see it. The issue is not string comparison, but object identity.
> It's no different from
>
> (eq 0.0 0.0) => nil
>
> I would be mightily surprised if
>
>  (eq (- 2.0 0.0) (- 2.0 0.0)) => nil
>  (eq (- 2.0 1.0) (- 2.0 1.0)) => nil
>  (eq (- 2.0 2.0) (- 2.0 2.0)) => t
>
> were true.

In which way is this different from

   (eq (cdr '(a)) (cdr '(b))) => t

?

Andreas.

-- 
Andreas Schwab, SuSE Labs, schwab@suse.de
SuSE Linux Products GmbH, Maxfeldstraße 5, 90409 Nürnberg, Germany
PGP key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 18:50     ` Juanma Barranquero
  2007-04-24 21:38       ` Andreas Schwab
@ 2007-04-24 21:39       ` Miles Bader
  2007-04-24 21:45         ` Juanma Barranquero
  2007-04-25  2:05       ` Richard Stallman
  2 siblings, 1 reply; 59+ messages in thread
From: Miles Bader @ 2007-04-24 21:39 UTC (permalink / raw)
  To: Juanma Barranquero; +Cc: Andreas Schwab, Dmitry Antipov, emacs-devel

"Juanma Barranquero" <lekktu@gmail.com> writes:
>> I see nothing wrong with that.
>
> I do see it. The issue is not string comparison, but object identity.
> I would be mightily surprised if
>
>  (eq (- 2.0 0.0) (- 2.0 0.0)) => nil
>  (eq (- 2.0 1.0) (- 2.0 1.0)) => nil
>  (eq (- 2.0 2.0) (- 2.0 2.0)) => t
>
> were true.

If so, it's because you misunderstand lisp.

There's absolutely nothing wrong with canonicalizing immutable objects
in lisp (and many implementations in fact do so).

-miles
-- 
Is it true that nothing can be known?  If so how do we know this?  -Woody Allen

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 21:39       ` Miles Bader
@ 2007-04-24 21:45         ` Juanma Barranquero
  2007-04-24 22:11           ` Miles Bader
  0 siblings, 1 reply; 59+ messages in thread
From: Juanma Barranquero @ 2007-04-24 21:45 UTC (permalink / raw)
  To: Miles Bader; +Cc: Andreas Schwab, Dmitry Antipov, emacs-devel

On 4/24/07, Miles Bader <miles@gnu.org> wrote:

> If so, it's because you misunderstand lisp.

Oh, I'm not a big expert in Lisp, but I don't think I'm
misunderstanding anything.

> There's absolutely nothing wrong with canonicalizing immutable objects
> in lisp (and many implementations in fact do so).

No doubt. But I would consider broken an implementation which
canonicalized 0.0 and not 1.0, for example.

             Juanma

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 21:38       ` Andreas Schwab
@ 2007-04-24 21:54         ` Juanma Barranquero
  2007-04-24 22:11           ` Andreas Schwab
  2007-04-24 21:57         ` David Kastrup
  1 sibling, 1 reply; 59+ messages in thread
From: Juanma Barranquero @ 2007-04-24 21:54 UTC (permalink / raw)
  To: Andreas Schwab; +Cc: Dmitry Antipov, emacs-devel

On 4/24/07, Andreas Schwab <schwab@suse.de> wrote:

> In which way is this different from
>
>    (eq (cdr '(a)) (cdr '(b))) => t
>

Well, that's non-optional, according to the Emacs Lisp documentation:
"Also, since symbol names are normally unique, if the arguments are
symbols with the same name, they are `eq'."

The doc also says: "For other types (e.g., lists, vectors, strings),
two arguments with the same contents or elements are not necessarily
`eq' to each other: they are `eq' only if they are the same object,
meaning that a change in the contents of one will be reflected by the
same change in the contents of the other."

So, as I said, it is an issue of object identity. I didn't say that
the results above would be "wrong", only surprising, and a noticeable
change in the current behaviour.

             Juanma

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 21:38       ` Andreas Schwab
  2007-04-24 21:54         ` Juanma Barranquero
@ 2007-04-24 21:57         ` David Kastrup
  2007-04-24 22:07           ` Lennart Borgman (gmail)
  2007-04-24 22:12           ` Andreas Schwab
  1 sibling, 2 replies; 59+ messages in thread
From: David Kastrup @ 2007-04-24 21:57 UTC (permalink / raw)
  To: Andreas Schwab; +Cc: Juanma Barranquero, Dmitry Antipov, emacs-devel

Andreas Schwab <schwab@suse.de> writes:

> "Juanma Barranquero" <lekktu@gmail.com> writes:
>
>> On 4/24/07, Andreas Schwab <schwab@suse.de> wrote:
>>
>>> I see nothing wrong with that.
>>
>> I do see it. The issue is not string comparison, but object identity.
>> It's no different from
>>
>> (eq 0.0 0.0) => nil
>>
>> I would be mightily surprised if
>>
>>  (eq (- 2.0 0.0) (- 2.0 0.0)) => nil
>>  (eq (- 2.0 1.0) (- 2.0 1.0)) => nil
>>  (eq (- 2.0 2.0) (- 2.0 2.0)) => t
>>
>> were true.
>
> In which way is this different from
>
>    (eq (cdr '(a)) (cdr '(b))) => t

'(a) is (cons 'a nil), '(b) is (cons 'b nil),
so indeed the cdr of both lists is the identically same value.  Lists
are _identically_ grounded.

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 21:57         ` David Kastrup
@ 2007-04-24 22:07           ` Lennart Borgman (gmail)
  2007-04-24 22:29             ` David Kastrup
  2007-04-24 22:12           ` Andreas Schwab
  1 sibling, 1 reply; 59+ messages in thread
From: Lennart Borgman (gmail) @ 2007-04-24 22:07 UTC (permalink / raw)
  To: David Kastrup
  Cc: Andreas Schwab, Dmitry Antipov, emacs-devel, Juanma Barranquero

David Kastrup wrote:

> '(a) is (cons 'a nil), '(b) is (cons 'b nil),
> so indeed the cdr of both lists is the identically same value.  Lists
> are _identically_ grounded.

And strings?

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 21:54         ` Juanma Barranquero
@ 2007-04-24 22:11           ` Andreas Schwab
  2007-04-24 22:54             ` Juanma Barranquero
  0 siblings, 1 reply; 59+ messages in thread
From: Andreas Schwab @ 2007-04-24 22:11 UTC (permalink / raw)
  To: Juanma Barranquero; +Cc: Dmitry Antipov, emacs-devel

"Juanma Barranquero" <lekktu@gmail.com> writes:

> The doc also says: "For other types (e.g., lists, vectors, strings),
> two arguments with the same contents or elements are not necessarily
> `eq' to each other: they are `eq' only if they are the same object,
> meaning that a change in the contents of one will be reflected by the
> same change in the contents of the other."
>
> So, as I said, it is an issue of object identity. I didn't say that
> the results above would be "wrong", only surprising, and a noticeable
> change in the current behaviour.

The key word in the documentation is "necessarily".  Since the objects we
talk about are immutable there is no conflict with the specification.

Andreas.

-- 
Andreas Schwab, SuSE Labs, schwab@suse.de
SuSE Linux Products GmbH, Maxfeldstraße 5, 90409 Nürnberg, Germany
PGP key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 21:45         ` Juanma Barranquero
@ 2007-04-24 22:11           ` Miles Bader
  2007-04-24 22:59             ` Juanma Barranquero
  0 siblings, 1 reply; 59+ messages in thread
From: Miles Bader @ 2007-04-24 22:11 UTC (permalink / raw)
  To: Juanma Barranquero; +Cc: Andreas Schwab, Dmitry Antipov, emacs-devel

"Juanma Barranquero" <lekktu@gmail.com> writes:
> Oh, I'm not a big expert in Lisp, but I don't think I'm
> misunderstanding anything.

You are misunderstanding what guarantees are made in lisp about object
identity.

> No doubt. But I would consider broken an implementation which
> canonicalized 0.0 and not 1.0, for example.

You're free to think what you like, but there are such implementations,
and they are not generally considered "broken."

For instance franz-lisp, which keeps "common" floating point numbers in
a small table for sharing, but heap-allocates all other floats; thus you
get something like (eq 0.0 0.0) => t, but (eq 1987.57 1987.57) => nil.

[There's also the traditional fixnum/bignum divide, which has a similar
effect on integers.]

Compiler optimizations can have similar effects.

-Miles

-- 
People who are more than casually interested in computers should have at
least some idea of what the underlying hardware is like.  Otherwise the
programs they write will be pretty weird.  -- Donald Knuth

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 21:57         ` David Kastrup
  2007-04-24 22:07           ` Lennart Borgman (gmail)
@ 2007-04-24 22:12           ` Andreas Schwab
  2007-04-24 22:31             ` David Kastrup
  1 sibling, 1 reply; 59+ messages in thread
From: Andreas Schwab @ 2007-04-24 22:12 UTC (permalink / raw)
  To: David Kastrup; +Cc: Juanma Barranquero, Dmitry Antipov, emacs-devel

David Kastrup <dak@gnu.org> writes:

> Andreas Schwab <schwab@suse.de> writes:
>
>> In which way is this different from
>>
>>    (eq (cdr '(a)) (cdr '(b))) => t
>
> '(a) is (cons 'a nil), '(b) is (cons 'b nil),
> so indeed the cdr of both lists is the identically same value.  Lists
> are _identically_ grounded.

That does not preclude makeing more objects identical.

Andreas.

-- 
Andreas Schwab, SuSE Labs, schwab@suse.de
SuSE Linux Products GmbH, Maxfeldstraße 5, 90409 Nürnberg, Germany
PGP key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 22:07           ` Lennart Borgman (gmail)
@ 2007-04-24 22:29             ` David Kastrup
  2007-04-24 22:35               ` Andreas Schwab
  2007-04-24 22:40               ` Lennart Borgman (gmail)
  0 siblings, 2 replies; 59+ messages in thread
From: David Kastrup @ 2007-04-24 22:29 UTC (permalink / raw)
  To: Lennart Borgman (gmail)
  Cc: Andreas Schwab, Dmitry Antipov, emacs-devel, Juanma Barranquero

"Lennart Borgman (gmail)" <lennart.borgman@gmail.com> writes:

> David Kastrup wrote:
>
>> '(a) is (cons 'a nil), '(b) is (cons 'b nil),
>> so indeed the cdr of both lists is the identically same value.  Lists
>> are _identically_ grounded.
>
> And strings?

Aren't grounded at all.  (concat "a" "b") does not make "b" part of
the result.  So (concat "a" "") and (concat "b" "") would not share
the same "" even if one replaced both of them by the same object.

Every created string is a separate object not containing any other
string as a part.  The same is not true for lists.  You can, for
example, modify a sublist and in the process change the list
containing it.

This does not happen for substrings.

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 22:12           ` Andreas Schwab
@ 2007-04-24 22:31             ` David Kastrup
  2007-04-24 22:56               ` Andreas Schwab
  0 siblings, 1 reply; 59+ messages in thread
From: David Kastrup @ 2007-04-24 22:31 UTC (permalink / raw)
  To: Andreas Schwab; +Cc: Juanma Barranquero, Dmitry Antipov, emacs-devel

Andreas Schwab <schwab@suse.de> writes:

> David Kastrup <dak@gnu.org> writes:
>
>> Andreas Schwab <schwab@suse.de> writes:
>>
>>> In which way is this different from
>>>
>>>    (eq (cdr '(a)) (cdr '(b))) => t
>>
>> '(a) is (cons 'a nil), '(b) is (cons 'b nil),
>> so indeed the cdr of both lists is the identically same value.  Lists
>> are _identically_ grounded.
>
> That does not preclude makeing more objects identical.

Why blame _me_ when _your_ example does not support your view?

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 22:29             ` David Kastrup
@ 2007-04-24 22:35               ` Andreas Schwab
  2007-04-25  0:55                 ` Kenichi Handa
  2007-04-24 22:40               ` Lennart Borgman (gmail)
  1 sibling, 1 reply; 59+ messages in thread
From: Andreas Schwab @ 2007-04-24 22:35 UTC (permalink / raw)
  To: David Kastrup
  Cc: Juanma Barranquero, Dmitry Antipov, Lennart Borgman (gmail),
	emacs-devel

David Kastrup <dak@gnu.org> writes:

> Aren't grounded at all.  (concat "a" "b") does not make "b" part of
> the result.  So (concat "a" "") and (concat "b" "") would not share
> the same "" even if one replaced both of them by the same object.
>
> Every created string is a separate object not containing any other
> string as a part.  The same is not true for lists.  You can, for
> example, modify a sublist and in the process change the list
> containing it.
>
> This does not happen for substrings.

Since you can't modify the empty string there is no contradiction.

Andreas.

-- 
Andreas Schwab, SuSE Labs, schwab@suse.de
SuSE Linux Products GmbH, Maxfeldstraße 5, 90409 Nürnberg, Germany
PGP key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 22:29             ` David Kastrup
  2007-04-24 22:35               ` Andreas Schwab
@ 2007-04-24 22:40               ` Lennart Borgman (gmail)
  1 sibling, 0 replies; 59+ messages in thread
From: Lennart Borgman (gmail) @ 2007-04-24 22:40 UTC (permalink / raw)
  To: David Kastrup
  Cc: Andreas Schwab, Dmitry Antipov, emacs-devel, Juanma Barranquero

David Kastrup wrote:
> "Lennart Borgman (gmail)" <lennart.borgman@gmail.com> writes:
> 
>> David Kastrup wrote:
>>
>>> '(a) is (cons 'a nil), '(b) is (cons 'b nil),
>>> so indeed the cdr of both lists is the identically same value.  Lists
>>> are _identically_ grounded.
>> And strings?
> 
> Aren't grounded at all.  (concat "a" "b") does not make "b" part of
> the result.  So (concat "a" "") and (concat "b" "") would not share
> the same "" even if one replaced both of them by the same object.
> 
> Every created string is a separate object not containing any other
> string as a part.  The same is not true for lists.  You can, for
> example, modify a sublist and in the process change the list
> containing it.
> 
> This does not happen for substrings.

I am not sure I understand that this means that "" and "" can't be seen 
as equal. Is there any theoritical wrong with thinking about a vector as 
a list with certain restrictions, properties and special access routines?

But I am not into this at all so I do not know.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 22:11           ` Andreas Schwab
@ 2007-04-24 22:54             ` Juanma Barranquero
  0 siblings, 0 replies; 59+ messages in thread
From: Juanma Barranquero @ 2007-04-24 22:54 UTC (permalink / raw)
  To: Andreas Schwab; +Cc: Dmitry Antipov, emacs-devel

On 4/25/07, Andreas Schwab <schwab@suse.de> wrote:

> The key word in the documentation is "necessarily".  Since the objects we
> talk about are immutable there is no conflict with the specification.

It should be clear by now that I already know that.

There would be a conflict, however, both with the current behaviour
and with some people's expectations.

             Juanma

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 22:31             ` David Kastrup
@ 2007-04-24 22:56               ` Andreas Schwab
  0 siblings, 0 replies; 59+ messages in thread
From: Andreas Schwab @ 2007-04-24 22:56 UTC (permalink / raw)
  To: David Kastrup; +Cc: Juanma Barranquero, Dmitry Antipov, emacs-devel

David Kastrup <dak@gnu.org> writes:

> Why blame _me_ when _your_ example does not support your view?

Why do you think anybody is blaming you?

Andreas.

-- 
Andreas Schwab, SuSE Labs, schwab@suse.de
SuSE Linux Products GmbH, Maxfeldstraße 5, 90409 Nürnberg, Germany
PGP key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 22:11           ` Miles Bader
@ 2007-04-24 22:59             ` Juanma Barranquero
  2007-04-24 23:37               ` Miles Bader
  0 siblings, 1 reply; 59+ messages in thread
From: Juanma Barranquero @ 2007-04-24 22:59 UTC (permalink / raw)
  To: Miles Bader; +Cc: Andreas Schwab, Dmitry Antipov, emacs-devel

On 4/25/07, Miles Bader <miles@gnu.org> wrote:

> You are misunderstanding what guarantees are made in lisp about object
> identity.

I don't think there's such thing as "lisp", but certainly I'm not
misunderstanding what guarantees are made about Emacs Lisp. I'm
talking about the "guarantee" implicit in its current behaviour. For
the umpteen time, I *know* (eq "" "") => t is not forbidden; just
unexpected.

> thus you
> get something like (eq 0.0 0.0) => t, but (eq 1987.57 1987.57) => nil.

That's the reason I talked about 1.0, not 1987.57. 1.0 is a small and
commonly used number. An implementation canonicalizing 0.0 could
reasonably be expected to do the same for several other "interesting"
real numbers.

> [There's also the traditional fixnum/bignum divide, which has a similar
> effect on integers.]

And that's the reason I used reals and not integers in my example.

Could we please stop Lisp 101?

             Juanma

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 22:59             ` Juanma Barranquero
@ 2007-04-24 23:37               ` Miles Bader
  2007-04-24 23:44                 ` Johan Bockgård
  0 siblings, 1 reply; 59+ messages in thread
From: Miles Bader @ 2007-04-24 23:37 UTC (permalink / raw)
  To: Juanma Barranquero; +Cc: Andreas Schwab, Dmitry Antipov, emacs-devel

"Juanma Barranquero" <lekktu@gmail.com> writes:
> I don't think there's such thing as "lisp", but certainly I'm not
> misunderstanding what guarantees are made about Emacs Lisp. I'm
> talking about the "guarantee" implicit in its current behaviour.

Huh?  That's not a guarantee, and absolutely should not be taken as one.

There's a _reason_ why eq traditionally has wide latitude for
implementation-dependent/undefined behavior in many cases.

> For the umpteen time, I *know* (eq "" "") => t is not forbidden; just
> unexpected.

I disagree.  I don't think most people expect much one way or the other
in this case, _especially_ because we're talking about lexical constants
(for instance, I expect very few people would be surprised if the
compiler merged various lexical constants).

[The exception of course would be rank beginners, who often --
incorrectly -- _expect_ exactly this behavior.]

> Could we please stop Lisp 101?

(rolls eyes)

-Miles
-- 
Come now, if we were really planning to harm you, would we be waiting here,
 beside the path, in the very darkest part of the forest?

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 23:37               ` Miles Bader
@ 2007-04-24 23:44                 ` Johan Bockgård
  2007-04-25  1:47                   ` Miles Bader
  2007-04-25 14:52                   ` Richard Stallman
  0 siblings, 2 replies; 59+ messages in thread
From: Johan Bockgård @ 2007-04-24 23:44 UTC (permalink / raw)
  To: emacs-devel

Miles Bader <miles@gnu.org> writes:

> I disagree.  I don't think most people expect much one way or the other
> in this case, _especially_ because we're talking about lexical constants
> (for instance, I expect very few people would be surprised if the
> compiler merged various lexical constants).

How about this then?

    (funcall
     (byte-compile
      (lambda ()
        (let ((s1 (concat "ab" "c"))
              (s2 (concat "a" "bc")))
          (eq s1 s2)))))

      => t

There used to be a note about this in FOR-RELEASE, but it was removed:

revision 1.411
date: 2006-11-18 22:07:44 +0100;  author: cyd;  state: Exp;  lines: +0 -3
** Prevent byte compiler from causing spurious string sharing
when it optimizes away calls to functions such as concat.
Fixed.


-- 
Johan Bockgård

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 22:35               ` Andreas Schwab
@ 2007-04-25  0:55                 ` Kenichi Handa
  2007-04-25  9:51                   ` Andreas Schwab
  0 siblings, 1 reply; 59+ messages in thread
From: Kenichi Handa @ 2007-04-25  0:55 UTC (permalink / raw)
  To: Andreas Schwab; +Cc: lekktu, dmantipov, lennart.borgman, emacs-devel

In article <jeslapcseg.fsf@sykes.suse.de>, Andreas Schwab <schwab@suse.de> writes:

> Since you can't modify the empty string there is no contradiction.

You can modify the multibyteness of an empty string, and a
unibyte empty string and a multibyte empty string behave a
little bit differently, for instance, when concatinated with
an unibyte 8-bit string.

---
Kenichi Handa
handa@m17n.org

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 23:44                 ` Johan Bockgård
@ 2007-04-25  1:47                   ` Miles Bader
  2007-04-25 14:52                   ` Richard Stallman
  1 sibling, 0 replies; 59+ messages in thread
From: Miles Bader @ 2007-04-25  1:47 UTC (permalink / raw)
  To: emacs-devel

bojohan+news@dd.chalmers.se (Johan Bockgård) writes:
>> I disagree.  I don't think most people expect much one way or the other
>> in this case, _especially_ because we're talking about lexical constants
>> (for instance, I expect very few people would be surprised if the
>> compiler merged various lexical constants).
>
> How about this then?
>
>         (let ((s1 (concat "ab" "c"))
>               (s2 (concat "a" "bc")))
>           (eq s1 s2)))))
..
> ** Prevent byte compiler from causing spurious string sharing
> when it optimizes away calls to functions such as concat.

S1 and S2 are not lexical constants -- and more importantly, the
`concat' function makes a specific guarantee that it will always return
a new string.

-Miles

-- 
`There are more things in heaven and earth, Horatio,
 Than are dreamt of in your philosophy.'

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 16:32 Using " Dmitry Antipov
  2007-04-24 17:05 ` Juanma Barranquero
  2007-04-24 17:48 ` Stefan Monnier
@ 2007-04-25  2:05 ` Richard Stallman
  2 siblings, 0 replies; 59+ messages in thread
From: Richard Stallman @ 2007-04-25  2:05 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: emacs-devel

In principle this seems like a good idea, for Emacs 23.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 17:05 ` Juanma Barranquero
  2007-04-24 18:11   ` Andreas Schwab
@ 2007-04-25  2:05   ` Richard Stallman
  1 sibling, 0 replies; 59+ messages in thread
From: Richard Stallman @ 2007-04-25  2:05 UTC (permalink / raw)
  To: Juanma Barranquero; +Cc: dmantipov, emacs-devel

    ELISP> (eq (substring "ab" 0) (substring "ab" 0))
    nil
    ELISP> (eq (substring "ab" 1) (substring "ab" 1))
    nil
    ELISP> (eq (substring "ab" 2) (substring "ab" 2))
    t

    That doesn't seem a nice behaviour.

Any program that _depends_ on eq to return nil in the last case 
is taking a risk.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 17:48 ` Stefan Monnier
@ 2007-04-25  2:05   ` Richard Stallman
  2007-04-26 14:24   ` Dmitry Antipov
  1 sibling, 0 replies; 59+ messages in thread
From: Richard Stallman @ 2007-04-25  2:05 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: dmantipov, emacs-devel

    this way, on 32bit systems, strings of up to 3 bytes can be represented with
    just a Lisp_String without any `sdata'.  On 64bit systems, this can be used
    for strings up to 7 bytes long (i.e. almost 50% of all allocated strings,
    IIRC).  And it can also be used for all the strings in the pure space (no
    matter how long), so it saves about 50KB of pure space (can't remember the
    exact number, but IIRC it was more than 10KB and less than 100KB).

Sounds good.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 18:50     ` Juanma Barranquero
  2007-04-24 21:38       ` Andreas Schwab
  2007-04-24 21:39       ` Miles Bader
@ 2007-04-25  2:05       ` Richard Stallman
  2007-04-25 12:00         ` Juanma Barranquero
  2 siblings, 1 reply; 59+ messages in thread
From: Richard Stallman @ 2007-04-25  2:05 UTC (permalink / raw)
  To: Juanma Barranquero; +Cc: schwab, dmantipov, emacs-devel

    I would be mightily surprised if

      (eq (- 2.0 0.0) (- 2.0 0.0)) => nil
      (eq (- 2.0 1.0) (- 2.0 1.0)) => nil
      (eq (- 2.0 2.0) (- 2.0 2.0)) => t

    were true.

You might well be surprised, given that you have learned to expect
something else.  But it would not be wrong.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: using empty_string as the only "" string
@ 2007-04-25  5:38 dmantipov
  2007-04-25  5:49 ` Miles Bader
                   ` (3 more replies)
  0 siblings, 4 replies; 59+ messages in thread
From: dmantipov @ 2007-04-25  5:38 UTC (permalink / raw)
  To: emacs-devel

That was an interesting discussion, thanks to all.

All CLs I've installed (clisp, cmucl and franz) gives (eq 0 0) => t and
(eq "" "") => nil. But a) we can tweak 'eq' to handle this special case
(looks poor, but just to purify the language) and b) Emacs isn't a CL
and should not obey CLtL2 completely, isn't it ?

Immediately after startup but before any user interaction, my emacs
binary creates >260 empty strings, and >60 of them survives the first GC.
Saving 960 bytes (on 32-bit system) of Lisp_Strings may be considered
marginal. But, for example, after you have gnus loaded, you will have
>1000 empty strings created, and >600 of them survives the next GC.
I don't agree that approx. 10K is a marginal space optimization even
if your desktop has 4G RAM.

Immediate (built into Lisp_String) short strings is a nice and
interesting idea too, IMHO.

I don't expect too much from the canonicalization of another objects,
'frequently-used' float numbers like 1.0 or 0.0 in particular. I believe
these objects are very rare (in comparison with empty strings) in the most
common situations, so it will be just 0.0001% over no-op.

> You can modify the multibyteness of an empty string, and a
> unibyte empty string and a multibyte empty string behave a
> little bit differently, for instance, when concatinated with
> an unibyte 8-bit string.

How you can modify the multibyteness of an empty string ? You can't aset
multibyte char (as well as anything else) into empty string, and conversion
functions like 'string-make-unibyte' or 'string-to-multibyte' always creates
new strings instead of touching an argument. Moreover, since "" is a
no-op in concatenation operations, it may be silently discarded without
looking into internal structure, isn't it ?

Dmitry

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: using empty_string as the only "" string
  2007-04-25  5:38 using empty_string as the only "" string dmantipov
@ 2007-04-25  5:49 ` Miles Bader
  2007-04-25 11:50 ` Juanma Barranquero
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 59+ messages in thread
From: Miles Bader @ 2007-04-25  5:49 UTC (permalink / raw)
  To: emacs-devel

"dmantipov" <dmantipov@yandex.ru> writes:
> All CLs I've installed (clisp, cmucl and franz) gives (eq 0 0) => t and
> (eq "" "") => nil. But a) we can tweak 'eq' to handle this special case
> (looks poor, but just to purify the language)

No, eq should not be "tweaked" (especially for something so silly as the
current discussion) -- it's supposed to be fast and straightforward, and
as long as it does the right thing in those cases where its behavior
_is_ defined, who cares if it reveals implementation idiosyncrasies in
case where it's not?  Some things in lisp (CL or otherwise) are defined
narrowly, but not everything is, and that's ok.

-Miles
-- 
We are all lying in the gutter, but some of us are looking at the stars.
-Oscar Wilde

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-25  0:55                 ` Kenichi Handa
@ 2007-04-25  9:51                   ` Andreas Schwab
  2007-04-25  9:58                     ` David Kastrup
  0 siblings, 1 reply; 59+ messages in thread
From: Andreas Schwab @ 2007-04-25  9:51 UTC (permalink / raw)
  To: Kenichi Handa; +Cc: lekktu, dmantipov, lennart.borgman, emacs-devel

Kenichi Handa <handa@m17n.org> writes:

> In article <jeslapcseg.fsf@sykes.suse.de>, Andreas Schwab <schwab@suse.de> writes:
>
>> Since you can't modify the empty string there is no contradiction.
>
> You can modify the multibyteness of an empty string,

Good point, that makes the optimisation indeed invalid.

Andreas.

-- 
Andreas Schwab, SuSE Labs, schwab@suse.de
SuSE Linux Products GmbH, Maxfeldstraße 5, 90409 Nürnberg, Germany
PGP key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-25  9:51                   ` Andreas Schwab
@ 2007-04-25  9:58                     ` David Kastrup
  2007-04-25 10:50                       ` Andreas Schwab
  0 siblings, 1 reply; 59+ messages in thread
From: David Kastrup @ 2007-04-25  9:58 UTC (permalink / raw)
  To: Andreas Schwab
  Cc: lekktu, emacs-devel, dmantipov, lennart.borgman, Kenichi Handa

Andreas Schwab <schwab@suse.de> writes:

> Kenichi Handa <handa@m17n.org> writes:
>
>> In article <jeslapcseg.fsf@sykes.suse.de>, Andreas Schwab <schwab@suse.de> writes:
>>
>>> Since you can't modify the empty string there is no contradiction.
>>
>> You can modify the multibyteness of an empty string,
>
> Good point, that makes the optimisation indeed invalid.

How can one modify the multibyteness of any string object?  As far as
I can see, one can only create copies with a different multibicity.

-- 
David Kastrup

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-25  9:58                     ` David Kastrup
@ 2007-04-25 10:50                       ` Andreas Schwab
  0 siblings, 0 replies; 59+ messages in thread
From: Andreas Schwab @ 2007-04-25 10:50 UTC (permalink / raw)
  To: David Kastrup
  Cc: lekktu, emacs-devel, dmantipov, lennart.borgman, Kenichi Handa

David Kastrup <dak@gnu.org> writes:

> Andreas Schwab <schwab@suse.de> writes:
>
>> Kenichi Handa <handa@m17n.org> writes:
>>
>>> In article <jeslapcseg.fsf@sykes.suse.de>, Andreas Schwab <schwab@suse.de> writes:
>>>
>>>> Since you can't modify the empty string there is no contradiction.
>>>
>>> You can modify the multibyteness of an empty string,
>>
>> Good point, that makes the optimisation indeed invalid.
>
> How can one modify the multibyteness of any string object?  As far as
> I can see, one can only create copies with a different multibicity.

Not any more with the proposed change in make_uninit_multibyte_string.

Andreas.

-- 
Andreas Schwab, SuSE Labs, schwab@suse.de
SuSE Linux Products GmbH, Maxfeldstraße 5, 90409 Nürnberg, Germany
PGP key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: using empty_string as the only "" string
  2007-04-25  5:38 using empty_string as the only "" string dmantipov
  2007-04-25  5:49 ` Miles Bader
@ 2007-04-25 11:50 ` Juanma Barranquero
  2007-04-25 11:56 ` Kenichi Handa
  2007-04-26  4:23 ` Richard Stallman
  3 siblings, 0 replies; 59+ messages in thread
From: Juanma Barranquero @ 2007-04-25 11:50 UTC (permalink / raw)
  To: dmantipov; +Cc: emacs-devel

On 4/25/07, dmantipov <dmantipov@yandex.ru> wrote:

> b) Emacs isn't a CL
> and should not obey CLtL2 completely, isn't it ?

I hope most CLs you use obey ANSI X3.226-1994, not CLtL2 :)

But anyway, I suggested elisp to continue to be compatible with
itself** not with CL.

             Juanma


**elisp, like Perl, is a one-implementation language, so grey areas in
the docs are implicitly defined by the interpreter's behaviour. Not
that they cannot be changed, of course; but doing so shouldn't be done
lightly.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: using empty_string as the only "" string
  2007-04-25  5:38 using empty_string as the only "" string dmantipov
  2007-04-25  5:49 ` Miles Bader
  2007-04-25 11:50 ` Juanma Barranquero
@ 2007-04-25 11:56 ` Kenichi Handa
  2007-04-25 13:22   ` Dmitry Antipov
  2007-04-26  4:23 ` Richard Stallman
  3 siblings, 1 reply; 59+ messages in thread
From: Kenichi Handa @ 2007-04-25 11:56 UTC (permalink / raw)
  To: dmantipov; +Cc: emacs-devel

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=ISO-2022-JP-2, Size: 1168 bytes --]

In article <462EE947.000007.15251@camay.yandex.ru>, "dmantipov" <dmantipov@yandex.ru> writes:

> > You can modify the multibyteness of an empty string, and a
> > unibyte empty string and a multibyte empty string behave a
> > little bit differently, for instance, when concatinated with
> > an unibyte 8-bit string.

> How you can modify the multibyteness of an empty string?

Ah, sorry, my mistake.  But your change make it impossible
to make an empty multibyte string.

> You can't aset
> multibyte char (as well as anything else) into empty string, and conversion
> functions like 'string-make-unibyte' or 'string-to-multibyte' always creates
> new strings instead of touching an argument. Moreover, since "" is a
> no-op in concatenation operations, it may be silently discarded without
> looking into internal structure, isn't it ?

Unfortunately no.  Currently Emacs behaves as this:

(concat "" "\300") => "\300"
(concat (string-to-multibyte "") "\300") => "^[$(D*"^[(B"

Of course, with more changes to alloc.c, we can keep unique
multibyte empty string and unique unibyte empty string.

But, is it really worth working on that?

---
Kenichi Handa
handa@m17n.org

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-25  2:05       ` Richard Stallman
@ 2007-04-25 12:00         ` Juanma Barranquero
  0 siblings, 0 replies; 59+ messages in thread
From: Juanma Barranquero @ 2007-04-25 12:00 UTC (permalink / raw)
  To: rms; +Cc: schwab, dmantipov, emacs-devel

On 4/25/07, Richard Stallman <rms@gnu.org> wrote:

> You might well be surprised, given that you have learned to expect
> something else.  But it would not be wrong.

I knew that. That's why I used "not nice", "unexpected", "surprising",
"incompatible with current behaviour". I didn't say "wrong" one single
time.

             Juanma

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: using empty_string as the only "" string
  2007-04-25 11:56 ` Kenichi Handa
@ 2007-04-25 13:22   ` Dmitry Antipov
  2007-04-25 16:07     ` Stefan Monnier
  0 siblings, 1 reply; 59+ messages in thread
From: Dmitry Antipov @ 2007-04-25 13:22 UTC (permalink / raw)
  To: Kenichi Handa; +Cc: emacs-devel

Kenichi Handa wrote:

> Unfortunately no.  Currently Emacs behaves as this:
> 
> (concat "" "\300") => "\300"
> (concat (string-to-multibyte "") "\300") => "À"

Hm. Is there any reason(s) to behave as you described ?

For example, if I want to make multibyte "ÀÈ", I would like to use

(string-to-multibyte (concat "\300" "\310")) => "\xc0\xc8" (why ?)

or

(concat (string-as-multibyte "\300") (string-as-multibyte "\310")) => "\xc0\xc8" (why ?)

instead of

(concat (string-to-multibyte "") "\300" "\310") => "ÀÈ",

similar to C:

int x, y;
float f;
...
f = (float)(x + y); // may overflow

or

f = (float)x + (float)y; // probably better

instead of strange

f = (float)0 + x + y;

> Of course, with more changes to alloc.c, we can keep unique
> multibyte empty string and unique unibyte empty string.

IMHO this is similar to have more than one Qnil or Qt - very
strange, indeed.

> But, is it really worth working on that ?

If you mean making two empty strings - obviously no.

Dmitry

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 23:44                 ` Johan Bockgård
  2007-04-25  1:47                   ` Miles Bader
@ 2007-04-25 14:52                   ` Richard Stallman
  2007-04-26 15:03                     ` Daniel Brockman
  1 sibling, 1 reply; 59+ messages in thread
From: Richard Stallman @ 2007-04-25 14:52 UTC (permalink / raw)
  To: Johan Bockgård; +Cc: emacs-devel

The issue is about null strings.  Non-null strings should
not be canonicalized because you can change the elements and text
properties in them.

Handa wrote:

    You can modify the multibyteness of an empty string, and a
    unibyte empty string and a multibyte empty string behave a
    little bit differently, for instance, when concatinated with
    an unibyte 8-bit string.

Thus, there would need to be two canonical null strings,
one unibyte and one multibyte.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: using empty_string as the only "" string
  2007-04-25 13:22   ` Dmitry Antipov
@ 2007-04-25 16:07     ` Stefan Monnier
  0 siblings, 0 replies; 59+ messages in thread
From: Stefan Monnier @ 2007-04-25 16:07 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: emacs-devel, Kenichi Handa

> For example, if I want to make multibyte "ÀÈ", I would like to use
> (string-to-multibyte (concat "\300" "\310")) => "\xc0\xc8" (why ?)

See the docstring of string-to-multibyte: it's basically equivalent to
(decode-coding-string STRING 'binary).
If you want your unibyte string interpreted as a latin-1 encoded text, then
say so explicitly:

  (decode-coding-string (concat "\300" "\310") 'latin-1)  =>  "ÀÈ"

> (concat (string-as-multibyte "\300") (string-as-multibyte "\310")) =>
> "\xc0\xc8" (why ?)

If you don't know, then please pretty please, stay away from
string-as-multibyte.


        Stefan

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: using empty_string as the only "" string
  2007-04-25  5:38 using empty_string as the only "" string dmantipov
                   ` (2 preceding siblings ...)
  2007-04-25 11:56 ` Kenichi Handa
@ 2007-04-26  4:23 ` Richard Stallman
  2007-04-26 13:03   ` Dmitry Antipov
  3 siblings, 1 reply; 59+ messages in thread
From: Richard Stallman @ 2007-04-26  4:23 UTC (permalink / raw)
  To: dmantipov; +Cc: emacs-devel

    How you can modify the multibyteness of an empty string ? You can't aset
    multibyte char (as well as anything else) into empty string, and conversion
    functions like 'string-make-unibyte' or 'string-to-multibyte' always creates
    new strings instead of touching an argument. Moreover, since "" is a
    no-op in concatenation operations, it may be silently discarded without
    looking into internal structure, isn't it ?

The multibyteness of a null string does affect concatenation.
But you are right, I believe, that it is impossible to alter the
multibyteness of an existing null string.  You can't do it with aset
because there are no positions you could store in.

Thus, it would be necessary to keep one canonical null unibyte string
and one canonical null multibyte string.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: using empty_string as the only "" string
  2007-04-26  4:23 ` Richard Stallman
@ 2007-04-26 13:03   ` Dmitry Antipov
  2007-04-27  6:00     ` Richard Stallman
  0 siblings, 1 reply; 59+ messages in thread
From: Dmitry Antipov @ 2007-04-26 13:03 UTC (permalink / raw)
  To: emacs-devel

Richard Stallman wrote:

> Thus, it would be necessary to keep one canonical null unibyte string
> and one canonical null multibyte string.

I was convinced in the sense of having two null strings.

Also note that canonicalization of null strings assumes that non-null interval
can't be attached to empty string regardless of it's multibyteness (at a first
glance, this is how the stuff ticks now, but I'm not sure about this). If this
is true, is it ok to allocate both null strings from pure space ?

Dmitry

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-24 17:48 ` Stefan Monnier
  2007-04-25  2:05   ` Richard Stallman
@ 2007-04-26 14:24   ` Dmitry Antipov
  1 sibling, 0 replies; 59+ messages in thread
From: Dmitry Antipov @ 2007-04-26 14:24 UTC (permalink / raw)
  To: emacs-devel

Stefan Monnier wrote:

> PS: But if you're interested in such small optimizations, I have another one
> in my local Emacs where the Lisp_String data type is changed to:
> 
>    struct Lisp_String
>      {
>        EMACS_INT size;
>        EMACS_INT size_byte : BITS_PER_EMACS_INT - 1;
>        unsigned inlined : 1;	/* 0 -> ptr, 1 -> chars; in union below.  */
>        INTERVAL intervals;		/* text properties in this string */
>        union
>        {
>          unsigned char *ptr;
>          unsigned char chars[STRING_MAXINLINE];
>        } data;
>      };
> 
> this way, on 32bit systems, strings of up to 3 bytes can be represented with
> just a Lisp_String without any `sdata'.  On 64bit systems, this can be used
> for strings up to 7 bytes long (i.e. almost 50% of all allocated strings,
> IIRC).  And it can also be used for all the strings in the pure space (no
> matter how long), so it saves about 50KB of pure space (can't remember the
> exact number, but IIRC it was more than 10KB and less than 100KB).

I'm interesting in _any_ optimization. Here is a brain-damaged :-) Lisp_String
I'm thinking about:

#define STRING_IMMEDIATE_SIZE (sizeof (EMACS_INT) * 3 - 2)

struct Lisp_String
   {
     union
     {
       /* Immediate string.  */
       struct
       {
	unsigned immediate : 1;
	unsigned gcmarkbit : 1;
	unsigned size : BITS_PER_CHAR - 1;
	unsigned size_byte : BITS_PER_CHAR - 1;
	unsigned char data[STRING_IMMEDIATE_SIZE];
       } __attribute__ ((packed)) imm;
       /* Contains pointer to sdata.  */
       struct
       {
	unsigned immediate : 1;
	unsigned gcmarkbit : 1;
	unsigned size : BITS_PER_EMACS_INT - 1;
	unsigned size_byte : BITS_PER_EMACS_INT - 1;
	unsigned char *data;
       } __attribute__ ((packed)) dat;
     } u;
     INTERVAL intervals;		/* text properties in this string */
   };

This gives 9-byte "immediate" string on 32-bit and 21-byte on 64-bit (excluding
trailing '\0'). This is not suitable for long pure strings, btw.

Strictly speaking, this is not an optimization - it saves space at the (minimal ?)
cost of speed since the most of string operations involves extra conditional
expression at least. For example,

#define STRING_BYTES(STR) ((STR)->size_byte < 0 ? (STR)->size : (STR)->size_byte)

becomes (over?)complicated

#define __IMM_P(STR) ((STR)->u.imm.immediate)
#define __IMMSIZE(STR) ((STR)->u.imm.size_byte < 0 ? (STR)->u.imm.size : (STR)->u.imm.size_byte)
#define __DATSIZE(STR) ((STR)->u.dat.size_byte < 0 ? (STR)->u.dat.size : (STR)->u.dat.size_byte)

#define STRING_BYTES(STR) (__IMM_P (STR) ? __IMMSIZE (str) : __DATSIZE (STR))

Dmitry

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-25 14:52                   ` Richard Stallman
@ 2007-04-26 15:03                     ` Daniel Brockman
  2007-04-27 20:40                       ` Richard Stallman
  0 siblings, 1 reply; 59+ messages in thread
From: Daniel Brockman @ 2007-04-26 15:03 UTC (permalink / raw)
  To: emacs-devel

Richard Stallman <rms@gnu.org> writes:

> Thus, there would need to be two canonical null strings,
> one unibyte and one multibyte.

Then what happens if you try to modify the multibyteness of
one of those canonical null strings?  Does the function that
performs this modification return a (possibly) new string?

-- 
Daniel Brockman <daniel@brockman.se>

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: using empty_string as the only "" string
  2007-04-26 13:03   ` Dmitry Antipov
@ 2007-04-27  6:00     ` Richard Stallman
  2007-04-27 10:04       ` Dmitry Antipov
  0 siblings, 1 reply; 59+ messages in thread
From: Richard Stallman @ 2007-04-27  6:00 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: emacs-devel

    Also note that canonicalization of null strings assumes that
    non-null interval can't be attached to empty string regardless of
    it's multibyteness (at a first glance, this is how the stuff ticks
    now, but I'm not sure about this).

Alas, I don't understand those words.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: using empty_string as the only "" string
  2007-04-27  6:00     ` Richard Stallman
@ 2007-04-27 10:04       ` Dmitry Antipov
  2007-04-27 10:29         ` David Kastrup
  2007-04-28  4:06         ` Richard Stallman
  0 siblings, 2 replies; 59+ messages in thread
From: Dmitry Antipov @ 2007-04-27 10:04 UTC (permalink / raw)
  Cc: emacs-devel

Richard Stallman wrote:
>     Also note that canonicalization of null strings assumes that
>     non-null interval can't be attached to empty string regardless of
>     it's multibyteness (at a first glance, this is how the stuff ticks
>     now, but I'm not sure about this).
> 
> Alas, I don't understand those words.

Is it possible that STRING_INTERVALS(s) is not NULL if 's' is an empty string,
unibyte or multibyte ? I believe no (at least, 'set-text-properties' can't
attach properties to empty string).

Otherwise it would be nearly impossible to canonicalize empty strings since each
empty string may have unique intervals.

Dmitry

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: using empty_string as the only "" string
  2007-04-27 10:04       ` Dmitry Antipov
@ 2007-04-27 10:29         ` David Kastrup
  2007-04-28  4:06         ` Richard Stallman
  1 sibling, 0 replies; 59+ messages in thread
From: David Kastrup @ 2007-04-27 10:29 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: emacs-devel

Dmitry Antipov <dmantipov@yandex.ru> writes:

> Richard Stallman wrote:
>>     Also note that canonicalization of null strings assumes that
>>     non-null interval can't be attached to empty string regardless of
>>     it's multibyteness (at a first glance, this is how the stuff ticks
>>     now, but I'm not sure about this).
>>
>> Alas, I don't understand those words.
>
> Is it possible that STRING_INTERVALS(s) is not NULL if 's' is an empty string,
> unibyte or multibyte ? I believe no (at least, 'set-text-properties' can't
> attach properties to empty string).
>
> Otherwise it would be nearly impossible to canonicalize empty
> strings since each empty string may have unique intervals.

Buffers have overlays (and text), strings merely have text and that
text can have text properties on characters.

If an empty string contains either, it would appear to be a bug.
Things are different in XEmacs, where both are represented by
"extents" which can be empty in strings, too.

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: Using empty_string as the only "" string
  2007-04-26 15:03                     ` Daniel Brockman
@ 2007-04-27 20:40                       ` Richard Stallman
  0 siblings, 0 replies; 59+ messages in thread
From: Richard Stallman @ 2007-04-27 20:40 UTC (permalink / raw)
  To: Daniel Brockman; +Cc: emacs-devel

    Then what happens if you try to modify the multibyteness of
    one of those canonical null strings?

As far as I know, there is no way to do that.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: using empty_string as the only "" string
  2007-04-27 10:04       ` Dmitry Antipov
  2007-04-27 10:29         ` David Kastrup
@ 2007-04-28  4:06         ` Richard Stallman
  2007-04-28  8:54           ` Dmitry Antipov
  1 sibling, 1 reply; 59+ messages in thread
From: Richard Stallman @ 2007-04-28  4:06 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: emacs-devel

    Is it possible that STRING_INTERVALS(s) is not NULL if 's' is an empty string,
    unibyte or multibyte ? I believe no (at least, 'set-text-properties' can't
    attach properties to empty string).

Text properties are attached to characters.  A null string has no characters
so it cannot have properties.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: using empty_string as the only "" string
  2007-04-28  4:06         ` Richard Stallman
@ 2007-04-28  8:54           ` Dmitry Antipov
  2007-04-28 18:35             ` Richard Stallman
  0 siblings, 1 reply; 59+ messages in thread
From: Dmitry Antipov @ 2007-04-28  8:54 UTC (permalink / raw)
  To: emacs-devel

[-- Attachment #1: Type: text/plain, Size: 331 bytes --]

Richard Stallman wrote:

> Text properties are attached to characters.  A null string has no characters
> so it cannot have properties.

Ok, so here is a version with two canonical strings allocated from pure space.
Both canonical strings shares XSTRING(s)->data, but it should be valid since
it can't be modified anyway.

Dmitry


[-- Attachment #2: empty_string_2.patch --]
[-- Type: text/plain, Size: 3796 bytes --]

Index: alloc.c
===================================================================
RCS file: /sources/emacs/emacs/src/alloc.c,v
retrieving revision 1.409
diff -u -r1.409 alloc.c
--- alloc.c	16 Apr 2007 03:09:33 -0000	1.409
+++ alloc.c	26 Apr 2007 13:22:16 -0000
@@ -1756,6 +1756,8 @@
   string_blocks = NULL;
   n_string_blocks = 0;
   string_free_list = NULL;
+  empty_string = make_pure_string ("", 0, 0, 0);
+  empty_multibyte_string = make_pure_string ("", 0, 0, 1);
 }
 
 
@@ -2479,6 +2481,9 @@
      int length;
 {
   Lisp_Object val;
+
+  if (!length)
+    return empty_string;
   val = make_uninit_multibyte_string (length, length);
   STRING_SET_UNIBYTE (val);
   return val;
@@ -2497,6 +2502,8 @@
 
   if (nchars < 0)
     abort ();
+  if (!nbytes)
+    return empty_multibyte_string;
 
   s = allocate_string ();
   allocate_string_data (s, nchars, nbytes);
Index: emacs.c
===================================================================
RCS file: /sources/emacs/emacs/src/emacs.c,v
retrieving revision 1.401
diff -u -r1.401 emacs.c
--- emacs.c	3 Apr 2007 15:25:28 -0000	1.401
+++ emacs.c	26 Apr 2007 13:22:19 -0000
@@ -133,8 +133,8 @@
 /* Hook run by `kill-emacs' before it does really anything.  */
 Lisp_Object Vkill_emacs_hook;
 
-/* An empty lisp string.  To avoid having to build any other.  */
-Lisp_Object empty_string;
+/* An empty lisp strings.  To avoid having to build any others.  */
+Lisp_Object empty_string, empty_multibyte_string;
 
 /* Search path separator.  */
 Lisp_Object Vpath_separator;
@@ -2468,9 +2468,6 @@
 The hook is not run in batch mode, i.e., if `noninteractive' is non-nil.  */);
   Vkill_emacs_hook = Qnil;
 
-  empty_string = build_string ("");
-  staticpro (&empty_string);
-
   DEFVAR_INT ("emacs-priority", &emacs_priority,
 	      doc: /* Priority for Emacs to run at.
 This value is effective only if set before Emacs is dumped,
Index: lisp.h
===================================================================
RCS file: /sources/emacs/emacs/src/lisp.h,v
retrieving revision 1.574
diff -u -r1.574 lisp.h
--- lisp.h      17 Mar 2007 18:27:10 -0000      1.574
+++ lisp.h      27 Apr 2007 15:23:33 -0000
@@ -701,7 +701,10 @@
 #endif /* not GC_CHECK_STRING_BYTES */
 
 /* Mark STR as a unibyte string.  */
-#define STRING_SET_UNIBYTE(STR)      (XSTRING (STR)->size_byte = -1)
+#define STRING_SET_UNIBYTE(STR)  \
+  do { if (EQ (STR, empty_multibyte_string))  \
+      (STR) = empty_string;  \
+    else XSTRING (STR)->size_byte = -1; } while (0)
 
 /* Get text properties.  */
 #define STRING_INTERVALS(STR)  (XSTRING (STR)->intervals + 0)
@@ -3060,7 +3063,8 @@
 /* defined in emacs.c */
 extern Lisp_Object decode_env_path P_ ((char *, char *));
 extern Lisp_Object Vinvocation_name, Vinvocation_directory;
-extern Lisp_Object Vinstallation_directory, empty_string;
+extern Lisp_Object Vinstallation_directory;
+extern Lisp_Object empty_string, empty_multibyte_string;
 EXFUN (Fkill_emacs, 1);
 #if HAVE_SETLOCALE
 void fixup_locale P_ ((void));
Index: lread.c
===================================================================
RCS file: /sources/emacs/emacs/src/lread.c,v
retrieving revision 1.369
diff -u -r1.369 lread.c
--- lread.c	28 Mar 2007 08:16:19 -0000	1.369
+++ lread.c	26 Apr 2007 13:22:35 -0000
@@ -4070,8 +4070,7 @@
 in order to do so.  However, if you want to customize which suffixes
 the loading functions recognize as compression suffixes, you should
 customize `jka-compr-load-suffixes' rather than the present variable.  */);
-  /* We don't use empty_string because it's not initialized yet.  */
-  Vload_file_rep_suffixes = Fcons (build_string (""), Qnil);
+  Vload_file_rep_suffixes = Fcons (empty_string, Qnil);
 
   DEFVAR_BOOL ("load-in-progress", &load_in_progress,
 	       doc: /* Non-nil iff inside of `load'.  */);

[-- Attachment #3: Type: text/plain, Size: 142 bytes --]

_______________________________________________
Emacs-devel mailing list
Emacs-devel@gnu.org
http://lists.gnu.org/mailman/listinfo/emacs-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: using empty_string as the only "" string
  2007-04-28  8:54           ` Dmitry Antipov
@ 2007-04-28 18:35             ` Richard Stallman
  2007-06-05 15:43               ` Juanma Barranquero
  0 siblings, 1 reply; 59+ messages in thread
From: Richard Stallman @ 2007-04-28 18:35 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: emacs-devel

    Ok, so here is a version with two canonical strings allocated from pure space.
    Both canonical strings shares XSTRING(s)->data, but it should be valid since
    it can't be modified anyway.

People could start experimenting with running with your patch.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: using empty_string as the only "" string
  2007-04-28 18:35             ` Richard Stallman
@ 2007-06-05 15:43               ` Juanma Barranquero
  2007-06-05 19:17                 ` Richard Stallman
  0 siblings, 1 reply; 59+ messages in thread
From: Juanma Barranquero @ 2007-06-05 15:43 UTC (permalink / raw)
  To: rms; +Cc: Dmitry Antipov, emacs-devel

On 4/28/07, Richard Stallman <rms@gnu.org> wrote:

>     Ok, so here is a version with two canonical strings allocated from pure space.
>     Both canonical strings shares XSTRING(s)->data, but it should be valid since
>     it can't be modified anyway.
>
> People could start experimenting with running with your patch.

I've been using the empty-string patch for a while and I have not
observed any bad behavior.

             Juanma

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: using empty_string as the only "" string
  2007-06-05 15:43               ` Juanma Barranquero
@ 2007-06-05 19:17                 ` Richard Stallman
  2007-06-05 19:45                   ` Juanma Barranquero
  0 siblings, 1 reply; 59+ messages in thread
From: Richard Stallman @ 2007-06-05 19:17 UTC (permalink / raw)
  To: Juanma Barranquero; +Cc: dmantipov, emacs-devel

    I've been using the empty-string patch for a while and I have not
    observed any bad behavior.

In that case, let's install it, presuming it has been changed to
maintain the two different empty strings (one unibyte and one
multibyte).  (We discussed that issue, but I don't recall whether this
change was made.)

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: using empty_string as the only "" string
  2007-06-05 19:17                 ` Richard Stallman
@ 2007-06-05 19:45                   ` Juanma Barranquero
  2007-06-06  1:17                     ` Stefan Monnier
  0 siblings, 1 reply; 59+ messages in thread
From: Juanma Barranquero @ 2007-06-05 19:45 UTC (permalink / raw)
  To: rms; +Cc: dmantipov, emacs-devel

On 6/5/07, Richard Stallman <rms@gnu.org> wrote:

> In that case, let's install it, presuming it has been changed to
> maintain the two different empty strings (one unibyte and one
> multibyte).

Yes, it has:

--- alloc.c     16 Apr 2007 03:09:33 -0000      1.409
+++ alloc.c     26 Apr 2007 13:22:16 -0000
@@ -1756,6 +1756,8 @@
  string_blocks = NULL;
  n_string_blocks = 0;
  string_free_list = NULL;
+  empty_string = make_pure_string ("", 0, 0, 0);
+  empty_multibyte_string = make_pure_string ("", 0, 0, 1);
 }


             Juanma

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: using empty_string as the only "" string
  2007-06-05 19:45                   ` Juanma Barranquero
@ 2007-06-06  1:17                     ` Stefan Monnier
  2007-06-06 11:04                       ` Juanma Barranquero
  0 siblings, 1 reply; 59+ messages in thread
From: Stefan Monnier @ 2007-06-06  1:17 UTC (permalink / raw)
  To: Juanma Barranquero; +Cc: dmantipov, rms, emacs-devel

> +  empty_string = make_pure_string ("", 0, 0, 0);
> +  empty_multibyte_string = make_pure_string ("", 0, 0, 1);

I believe empty_string should be called empty_unibyte_string, so as to make
sure that people choose the right multibyteness.


        Stefan

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: using empty_string as the only "" string
  2007-06-06  1:17                     ` Stefan Monnier
@ 2007-06-06 11:04                       ` Juanma Barranquero
  2007-06-06 22:09                         ` Richard Stallman
  0 siblings, 1 reply; 59+ messages in thread
From: Juanma Barranquero @ 2007-06-06 11:04 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: dmantipov, rms, emacs-devel

On 6/6/07, Stefan Monnier <monnier@iro.umontreal.ca> wrote:

> I believe empty_string should be called empty_unibyte_string, so as to make
> sure that people choose the right multibyteness.

Here's the patch I'm using. It'd be good if people could test it in
non-Windows builds.

BTW, Dmitry's contribution (other than the original idea, of course :)
is about 5 changed lines, 8 added, 2 removed. He has a previous tiny
patch (5 lines changed, 1 added, plus comment). I'm not sure whether
that requires signed papers or not.

             Juanma


Index: src/ChangeLog
===================================================================
RCS file: /cvsroot/emacs/emacs/src/ChangeLog,v
retrieving revision 1.5692
diff -u -2 -r1.5692 ChangeLog
--- src/ChangeLog	6 Jun 2007 08:33:32 -0000	1.5692
+++ src/ChangeLog	6 Jun 2007 10:34:41 -0000
@@ -1,2 +1,42 @@
+2007-06-06  Juanma Barranquero  <lekktu@gmail.com>
+
+	* sunfns.c (sel_read):
+	* xdisp.c (Fformat_mode_line):
+	* xselect.c (Fx_get_atom_name): Use empty_unibyte_string,
+	not make_string.
+
+	* callint.c (Fcall_interactively):
+	* editfns.c (Fdelete_and_extract_region):
+	* fns.c (Fmapconcat):
+	* keyboard.c (cmd_error_internal):
+	* lread.c (openp):
+	* xterm.c (x_term_init): Use empty_unibyte_string, not build_string.
+
+	* fileio.c (Fread_file_name):
+	* keymap.c (Fkey_description):
+	* minibuf.c (read_minibuf):
+	* search.c (wordify):
+	* xdisp.c (syms_of_xdisp):
+	* xfns.c (x_default_scroll_bar_color_parameter):
+	* xmenu.c (menu_help_callback): Use empty_unibyte_string,
+	not empty_string.
+
+2007-06-06  Dmitry Antipov  <dmitry.antipov@mail.ru>
+
+	* alloc.c (init_strings): Initialize canonical empty strings.
+	(make_uninit_string, make_uninit_multibyte_string): Return appropriate
+	canonical empty string when the requested size is 0.
+
+	* emacs.c (empty_unibyte_string): Rename from empty_string.
+	(empty_multibyte_string): New canonical empty string.
+	(syms_of_emacs): Don't initialize empty_string.
+
+	* lisp.h (STRING_SET_UNIBYTE): Return the canonical empty unibyte
+	string, if appropriate.
+	(empty_unibyte_string, empty_multibyte_string): New externs.
+	(empty_string): Remove extern.
+
+	* lread.c (syms_of_lread): Use empty_unibyte_string, not build_string.
+
 2007-06-06  YAMAMOTO Mitsuharu  <mituharu@math.s.chiba-u.ac.jp>

Index: src/alloc.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/alloc.c,v
retrieving revision 1.409
diff -u -2 -r1.409 alloc.c
--- src/alloc.c	16 Apr 2007 03:09:33 -0000	1.409
+++ src/alloc.c	6 Jun 2007 08:03:03 -0000
@@ -1757,4 +1757,6 @@
   n_string_blocks = 0;
   string_free_list = NULL;
+  empty_unibyte_string = make_pure_string ("", 0, 0, 0);
+  empty_multibyte_string = make_pure_string ("", 0, 0, 1);
 }

@@ -2480,4 +2482,7 @@
 {
   Lisp_Object val;
+
+  if (!length)
+    return empty_unibyte_string;
   val = make_uninit_multibyte_string (length, length);
   STRING_SET_UNIBYTE (val);
@@ -2498,4 +2503,6 @@
   if (nchars < 0)
     abort ();
+  if (!nbytes)
+    return empty_multibyte_string;

   s = allocate_string ();
Index: src/callint.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/callint.c,v
retrieving revision 1.150
diff -u -2 -r1.150 callint.c
--- src/callint.c	14 Jan 2007 03:24:37 -0000	1.150
+++ src/callint.c	6 Jun 2007 08:09:48 -0000
@@ -586,5 +586,5 @@
 				   default to directory alone. */
 	  args[i] = Fread_file_name (callint_message,
-				     Qnil, Qnil, Qnil, build_string (""), Qnil);
+				     Qnil, Qnil, Qnil, empty_unibyte_string, Qnil);
 	  break;

Index: src/editfns.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/editfns.c,v
retrieving revision 1.440
diff -u -2 -r1.440 editfns.c
--- src/editfns.c	8 May 2007 02:05:46 -0000	1.440
+++ src/editfns.c	6 Jun 2007 08:10:09 -0000
@@ -3043,5 +3043,5 @@
   validate_region (&start, &end);
   if (XINT (start) == XINT (end))
-    return build_string ("");
+    return empty_unibyte_string;
   return del_range_1 (XINT (start), XINT (end), 1, 1);
 }
Index: src/emacs.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/emacs.c,v
retrieving revision 1.401
diff -u -2 -r1.401 emacs.c
--- src/emacs.c	3 Apr 2007 15:25:28 -0000	1.401
+++ src/emacs.c	6 Jun 2007 08:03:41 -0000
@@ -134,6 +134,6 @@
 Lisp_Object Vkill_emacs_hook;

-/* An empty lisp string.  To avoid having to build any other.  */
-Lisp_Object empty_string;
+/* Empty lisp strings.  To avoid having to build any others.  */
+Lisp_Object empty_unibyte_string, empty_multibyte_string;

 /* Search path separator.  */
@@ -2469,7 +2469,4 @@
   Vkill_emacs_hook = Qnil;

-  empty_string = build_string ("");
-  staticpro (&empty_string);
-
   DEFVAR_INT ("emacs-priority", &emacs_priority,
 	      doc: /* Priority for Emacs to run at.
Index: src/fileio.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/fileio.c,v
retrieving revision 1.580
diff -u -2 -r1.580 fileio.c
--- src/fileio.c	22 Mar 2007 12:15:04 -0000	1.580
+++ src/fileio.c	6 Jun 2007 08:07:28 -0000
@@ -6431,5 +6431,5 @@
 	add_to_history = 1;

-      val = empty_string;
+      val = empty_unibyte_string;
     }

Index: src/fns.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/fns.c,v
retrieving revision 1.425
diff -u -2 -r1.425 fns.c
--- src/fns.c	26 May 2007 17:21:14 -0000	1.425
+++ src/fns.c	6 Jun 2007 08:10:25 -0000
@@ -3135,5 +3135,5 @@
   leni = XINT (len);
   nargs = leni + leni - 1;
-  if (nargs < 0) return build_string ("");
+  if (nargs < 0) return empty_unibyte_string;

   SAFE_ALLOCA_LISP (args, nargs);
Index: src/keyboard.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/keyboard.c,v
retrieving revision 1.904
diff -u -2 -r1.904 keyboard.c
--- src/keyboard.c	3 Jun 2007 00:57:11 -0000	1.904
+++ src/keyboard.c	6 Jun 2007 08:18:22 -0000
@@ -1250,5 +1250,5 @@
   if (!NILP (Vcommand_error_function))
     call3 (Vcommand_error_function, data,
-	   build_string (context ? context : ""),
+	   context ? build_string (context) : empty_unibyte_string,
 	   Vsignaling_function);
   /* If the window system or terminal frame hasn't been initialized
Index: src/keymap.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/keymap.c,v
retrieving revision 1.354
diff -u -2 -r1.354 keymap.c
--- src/keymap.c	7 May 2007 20:49:55 -0000	1.354
+++ src/keymap.c	6 Jun 2007 08:07:58 -0000
@@ -2189,5 +2189,5 @@
 	}
       else if (len == 0)
-	return empty_string;
+	return empty_unibyte_string;
       return Fconcat (len - 1, args);
     }
Index: src/lisp.h
===================================================================
RCS file: /cvsroot/emacs/emacs/src/lisp.h,v
retrieving revision 1.576
diff -u -2 -r1.576 lisp.h
--- src/lisp.h	20 May 2007 02:44:05 -0000	1.576
+++ src/lisp.h	6 Jun 2007 08:02:24 -0000
@@ -702,5 +702,8 @@

 /* Mark STR as a unibyte string.  */
-#define STRING_SET_UNIBYTE(STR)      (XSTRING (STR)->size_byte = -1)
+#define STRING_SET_UNIBYTE(STR)  \
+  do { if (EQ (STR, empty_multibyte_string))  \
+      (STR) = empty_unibyte_string;  \
+    else XSTRING (STR)->size_byte = -1; } while (0)

 /* Get text properties.  */
@@ -3061,5 +3064,6 @@
 extern Lisp_Object decode_env_path P_ ((char *, char *));
 extern Lisp_Object Vinvocation_name, Vinvocation_directory;
-extern Lisp_Object Vinstallation_directory, empty_string;
+extern Lisp_Object Vinstallation_directory;
+extern Lisp_Object empty_unibyte_string, empty_multibyte_string;
 EXFUN (Fkill_emacs, 1);
 #if HAVE_SETLOCALE
Index: src/lread.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/lread.c,v
retrieving revision 1.370
diff -u -2 -r1.370 lread.c
--- src/lread.c	28 Apr 2007 17:24:22 -0000	1.370
+++ src/lread.c	6 Jun 2007 08:10:43 -0000
@@ -1200,5 +1200,5 @@

       /* Loop over suffixes.  */
-      for (tail = NILP (suffixes) ? Fcons (build_string (""), Qnil) : suffixes;
+      for (tail = NILP (suffixes) ? Fcons (empty_unibyte_string,
Qnil) : suffixes;
 	   CONSP (tail); tail = XCDR (tail))
 	{
@@ -4071,6 +4071,5 @@
 the loading functions recognize as compression suffixes, you should
 customize `jka-compr-load-suffixes' rather than the present variable.  */);
-  /* We don't use empty_string because it's not initialized yet.  */
-  Vload_file_rep_suffixes = Fcons (build_string (""), Qnil);
+  Vload_file_rep_suffixes = Fcons (empty_unibyte_string, Qnil);

   DEFVAR_BOOL ("load-in-progress", &load_in_progress,
Index: src/minibuf.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/minibuf.c,v
retrieving revision 1.329
diff -u -2 -r1.329 minibuf.c
--- src/minibuf.c	19 Apr 2007 22:20:47 -0000	1.329
+++ src/minibuf.c	6 Jun 2007 08:08:10 -0000
@@ -530,5 +530,5 @@

   if (!STRINGP (prompt))
-    prompt = empty_string;
+    prompt = empty_unibyte_string;

   if (!enable_recursive_minibuffers
Index: src/search.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/search.c,v
retrieving revision 1.221
diff -u -2 -r1.221 search.c
--- src/search.c	14 Jan 2007 03:24:37 -0000	1.221
+++ src/search.c	6 Jun 2007 08:37:46 -0000
@@ -2096,5 +2096,5 @@
     word_count++;
   if (!word_count)
-    return empty_string;
+    return empty_unibyte_string;

   adjust = - punct_count + 5 * (word_count - 1) + 4;
Index: src/sunfns.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/sunfns.c,v
retrieving revision 1.32
diff -u -2 -r1.32 sunfns.c
--- src/sunfns.c	21 Jan 2007 04:18:15 -0000	1.32
+++ src/sunfns.c	6 Jun 2007 08:16:51 -0000
@@ -288,5 +288,5 @@
   register char *cp;

-  Current_Selection = make_string ("", 0);
+  Current_Selection = empty_unibyte_string;
   if (sel->sel_items <= 0)
     return (0);
Index: src/xdisp.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/xdisp.c,v
retrieving revision 1.1153
diff -u -2 -r1.1153 xdisp.c
--- src/xdisp.c	29 May 2007 23:19:43 -0000	1.1153
+++ src/xdisp.c	6 Jun 2007 08:15:05 -0000
@@ -17343,5 +17343,5 @@

   if (NILP (format))
-    return build_string ("");
+    return empty_unibyte_string;

   if (no_props)
@@ -17401,5 +17401,5 @@
       mode_line_string_list = Fnreverse (mode_line_string_list);
       str = Fmapconcat (intern ("identity"), mode_line_string_list,
-			make_string ("", 0));
+			empty_unibyte_string);
     }

@@ -24082,5 +24082,5 @@
     = Fcons (intern ("multiple-frames"),
 	     Fcons (build_string ("%b"),
-		    Fcons (Fcons (empty_string,
+		    Fcons (Fcons (empty_unibyte_string,
 				  Fcons (intern ("invocation-name"),
 					 Fcons (build_string ("@"),
Index: src/xfns.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/xfns.c,v
retrieving revision 1.682
diff -u -2 -r1.682 xfns.c
--- src/xfns.c	1 Jun 2007 04:00:27 -0000	1.682
+++ src/xfns.c	6 Jun 2007 08:08:40 -0000
@@ -1842,7 +1842,7 @@
 						  ? "foreground"
 						  : "background"),
-				    empty_string,
+				    empty_unibyte_string,
 				    build_string ("verticalScrollBar"),
-				    empty_string);
+				    empty_unibyte_string);
       if (!STRINGP (tem))
 	{
Index: src/xmenu.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/xmenu.c,v
retrieving revision 1.318
diff -u -2 -r1.318 xmenu.c
--- src/xmenu.c	27 Apr 2007 06:25:58 -0000	1.318
+++ src/xmenu.c	6 Jun 2007 08:08:50 -0000
@@ -3429,5 +3429,5 @@
   else if (EQ (first_item[0], Qquote))
     /* This shouldn't happen, see xmenu_show.  */
-    pane_name = empty_string;
+    pane_name = empty_unibyte_string;
   else
     pane_name = first_item[MENU_ITEMS_ITEM_NAME];
Index: src/xselect.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/xselect.c,v
retrieving revision 1.168
diff -u -2 -r1.168 xselect.c
--- src/xselect.c	6 Mar 2007 06:11:01 -0000	1.168
+++ src/xselect.c	6 Jun 2007 08:15:53 -0000
@@ -2679,5 +2679,5 @@

   if (atom && name) XFree (name);
-  if (NILP (ret)) ret = make_string ("", 0);
+  if (NILP (ret)) ret = empty_unibyte_string;

   UNBLOCK_INPUT;
Index: src/xterm.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/xterm.c,v
retrieving revision 1.946
diff -u -2 -r1.946 xterm.c
--- src/xterm.c	22 May 2007 08:29:09 -0000	1.946
+++ src/xterm.c	6 Jun 2007 10:21:55 -0000
@@ -10650,5 +10650,5 @@
 	    dpyinfo->kboard->Vsystem_key_alist
 	      = call1 (Qvendor_specific_keysyms,
-		       build_string (vendor ? vendor : ""));
+		       vendor ? build_string (vendor) : empty_unibyte_string);
 	    BLOCK_INPUT;
 	  }

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: using empty_string as the only "" string
  2007-06-06 11:04                       ` Juanma Barranquero
@ 2007-06-06 22:09                         ` Richard Stallman
  2007-06-08 15:49                           ` Juanma Barranquero
  0 siblings, 1 reply; 59+ messages in thread
From: Richard Stallman @ 2007-06-06 22:09 UTC (permalink / raw)
  To: Juanma Barranquero; +Cc: dmantipov, monnier, emacs-devel

    BTW, Dmitry's contribution (other than the original idea, of course :)
    is about 5 changed lines, 8 added, 2 removed. He has a previous tiny
    patch (5 lines changed, 1 added, plus comment). I'm not sure whether
    that requires signed papers or not.

We can scrape by with this new change, but anything more would need
papers.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: using empty_string as the only "" string
  2007-06-06 22:09                         ` Richard Stallman
@ 2007-06-08 15:49                           ` Juanma Barranquero
  2007-06-08 19:16                             ` Stefan Monnier
  0 siblings, 1 reply; 59+ messages in thread
From: Juanma Barranquero @ 2007-06-08 15:49 UTC (permalink / raw)
  To: rms; +Cc: dmantipov, monnier, emacs-devel

On 6/7/07, Richard Stallman <rms@gnu.org> wrote:

> We can scrape by with this new change, but anything more would need
> papers.

OK, I can install the change.

It's less than 40 changed lines, across 18 files, and a 20% of those
changes are simply the renaming empty_string => empty_unibyte_string.

Of the rest, most are using empty_unibyte_string instead of
make_string ("", 0) or build_string ("").

BTW, I see no consistency in the way empty_string was used before: at
times empty_string was used, others make_string ("", 0) or
build_string ("") were preferred. That causes this funny equality:

 ;; without the patch
 (eq "" "") => nil
 (eq (car (car (cdr (cdr icon-title-format)))) (key-description "")) => t

which I suppose more than offsets my unhappiness for this:

 ;; with the patch
 (eq "" (substring-no-properties "" 0 0)) => t

and the slight incompatibility that

 (condition-case err (store-substring "" 0 "a") (error err))
 => (args-out-of-range "" 0)                      ; before the patch
 => (error "Attempt to modify read-only object")  ; after the patch

Warning: the patch changes lisp.h and most src/*.c files do not have a
makefile dependency for it, so I suspect a full recompilation (or a
bootstrap) will be needed after the patch.

             Juanma

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: using empty_string as the only "" string
  2007-06-08 15:49                           ` Juanma Barranquero
@ 2007-06-08 19:16                             ` Stefan Monnier
  0 siblings, 0 replies; 59+ messages in thread
From: Stefan Monnier @ 2007-06-08 19:16 UTC (permalink / raw)
  To: Juanma Barranquero; +Cc: dmantipov, rms, emacs-devel

> BTW, I see no consistency in the way empty_string was used before: at
> times empty_string was used, others make_string ("", 0) or
> build_string ("") were preferred.

It's a "recent" addition and there was no effort to go back and change
pre-existing code to make use of it, so it's no surprise that
it's inconsistent.


        Stefan

^ permalink raw reply	[flat|nested] 59+ messages in thread

end of thread, other threads:[~2007-06-08 19:16 UTC | newest]

Thread overview: 59+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-04-25  5:38 using empty_string as the only "" string dmantipov
2007-04-25  5:49 ` Miles Bader
2007-04-25 11:50 ` Juanma Barranquero
2007-04-25 11:56 ` Kenichi Handa
2007-04-25 13:22   ` Dmitry Antipov
2007-04-25 16:07     ` Stefan Monnier
2007-04-26  4:23 ` Richard Stallman
2007-04-26 13:03   ` Dmitry Antipov
2007-04-27  6:00     ` Richard Stallman
2007-04-27 10:04       ` Dmitry Antipov
2007-04-27 10:29         ` David Kastrup
2007-04-28  4:06         ` Richard Stallman
2007-04-28  8:54           ` Dmitry Antipov
2007-04-28 18:35             ` Richard Stallman
2007-06-05 15:43               ` Juanma Barranquero
2007-06-05 19:17                 ` Richard Stallman
2007-06-05 19:45                   ` Juanma Barranquero
2007-06-06  1:17                     ` Stefan Monnier
2007-06-06 11:04                       ` Juanma Barranquero
2007-06-06 22:09                         ` Richard Stallman
2007-06-08 15:49                           ` Juanma Barranquero
2007-06-08 19:16                             ` Stefan Monnier
  -- strict thread matches above, loose matches on Subject: below --
2007-04-24 16:32 Using " Dmitry Antipov
2007-04-24 17:05 ` Juanma Barranquero
2007-04-24 18:11   ` Andreas Schwab
2007-04-24 18:50     ` Juanma Barranquero
2007-04-24 21:38       ` Andreas Schwab
2007-04-24 21:54         ` Juanma Barranquero
2007-04-24 22:11           ` Andreas Schwab
2007-04-24 22:54             ` Juanma Barranquero
2007-04-24 21:57         ` David Kastrup
2007-04-24 22:07           ` Lennart Borgman (gmail)
2007-04-24 22:29             ` David Kastrup
2007-04-24 22:35               ` Andreas Schwab
2007-04-25  0:55                 ` Kenichi Handa
2007-04-25  9:51                   ` Andreas Schwab
2007-04-25  9:58                     ` David Kastrup
2007-04-25 10:50                       ` Andreas Schwab
2007-04-24 22:40               ` Lennart Borgman (gmail)
2007-04-24 22:12           ` Andreas Schwab
2007-04-24 22:31             ` David Kastrup
2007-04-24 22:56               ` Andreas Schwab
2007-04-24 21:39       ` Miles Bader
2007-04-24 21:45         ` Juanma Barranquero
2007-04-24 22:11           ` Miles Bader
2007-04-24 22:59             ` Juanma Barranquero
2007-04-24 23:37               ` Miles Bader
2007-04-24 23:44                 ` Johan Bockgård
2007-04-25  1:47                   ` Miles Bader
2007-04-25 14:52                   ` Richard Stallman
2007-04-26 15:03                     ` Daniel Brockman
2007-04-27 20:40                       ` Richard Stallman
2007-04-25  2:05       ` Richard Stallman
2007-04-25 12:00         ` Juanma Barranquero
2007-04-25  2:05   ` Richard Stallman
2007-04-24 17:48 ` Stefan Monnier
2007-04-25  2:05   ` Richard Stallman
2007-04-26 14:24   ` Dmitry Antipov
2007-04-25  2:05 ` Richard Stallman

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).