unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
* new `obarray` type
@ 2017-03-13  1:36 Stefan Monnier
  2017-03-13 15:49 ` Eli Zaretskii
  2017-03-13 22:03 ` Alan Mackenzie
  0 siblings, 2 replies; 14+ messages in thread
From: Stefan Monnier @ 2017-03-13  1:36 UTC (permalink / raw)
  To: emacs-devel

The patch below introduces a new type for obarrays, distinct
from vectors.  Among other things, this makes it possible to print them
in a more useful way (it doesn't print the contents, only the size, so
the printed form is not computer-readable, but it's more
palatable to the user).

Printing obarrays in a `read`able way seems like something that should
be under the control of variable, since it's unclear in general what it
would mean (for abbrev-tables, it would probably mean to print the name
of every symbol, along with it value, function, and plist slots, but
doing that for the `obarray` variable doesn't seem right (and it's not
even clear what the `value` of each symbol in it should be, for
buffer-local symbols)).


        Stefan


diff --git a/lisp/emacs-lisp/cl-generic.el b/lisp/emacs-lisp/cl-generic.el
index 0ccf6a17ff..1739fbcc9f 100644
--- a/lisp/emacs-lisp/cl-generic.el
+++ b/lisp/emacs-lisp/cl-generic.el
@@ -1207,7 +1207,7 @@ cl--generic-typeof-types
     (process atom) (window atom) (subr atom) (compiled-function function atom)
     (buffer atom) (char-table array sequence atom)
     (bool-vector array sequence atom)
-    (frame atom) (hash-table atom) (terminal atom)
+    (frame atom) (hash-table atom) (terminal atom) (obarray atom)
     (thread atom) (mutex atom) (condvar atom)
     (font-spec atom) (font-entity atom) (font-object atom)
     (vector array sequence atom)
diff --git a/lisp/obarray.el b/lisp/obarray.el
index aaffe00a07..db13a17572 100644
--- a/lisp/obarray.el
+++ b/lisp/obarray.el
@@ -32,15 +32,7 @@ obarray-default-size
 
 (defun obarray-make (&optional size)
   "Return a new obarray of size SIZE or `obarray-default-size'."
-  (let ((size (or size obarray-default-size)))
-    (if (< 0 size)
-        (make-vector size 0)
-      (signal 'wrong-type-argument '(size 0)))))
-
-(defun obarrayp (object)
-  "Return t if OBJECT is an obarray."
-  (and (vectorp object)
-       (< 0 (length object))))
+  (make-obarray (or size obarray-default-size)))
 
 ;; Don’t use obarray as a variable name to avoid shadowing.
 (defun obarray-get (ob name)
diff --git a/src/alloc.c b/src/alloc.c
index 03774e60b6..5ace037351 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -3412,6 +3412,23 @@ See also the function `vector'.  */)
   return make_lisp_ptr (p, Lisp_Vectorlike);
 }
 
+DEFUN ("make-obarray", Fmake_obarray, Smake_obarray, 1, 1, 0,
+       doc: /* Return a newly created obarray of size LENGTH.  */)
+  (Lisp_Object length)
+{
+  CHECK_NATNUM (length);
+  EMACS_INT l = XFASTINT (length);
+  if (l >= (1 << PSEUDOVECTOR_SIZE_BITS))
+    error ("Obarray too large");
+  else if (l <= 0)
+    error ("Obarray too small");
+  struct Lisp_Vector *p = allocate_vector (l);
+  for (ptrdiff_t i = 0; i < l; i++)
+    p->contents[i] = make_number (0);
+  XSETPVECTYPE (p, PVEC_OBARRAY);
+  return make_lisp_ptr (p, Lisp_Vectorlike);
+}
+
 DEFUN ("vector", Fvector, Svector, 0, MANY, 0,
        doc: /* Return a newly created vector with specified arguments as elements.
 Any number of arguments, even zero arguments, are allowed.
@@ -7593,6 +7610,7 @@ The time is in seconds as a floating point value.  */);
   defsubr (&Smake_byte_code);
   defsubr (&Smake_list);
   defsubr (&Smake_vector);
+  defsubr (&Smake_obarray);
   defsubr (&Smake_string);
   defsubr (&Smake_bool_vector);
   defsubr (&Smake_symbol);
diff --git a/src/data.c b/src/data.c
index df0c3a92a9..183adeb1ea 100644
--- a/src/data.c
+++ b/src/data.c
@@ -250,6 +250,7 @@ for example, (type-of 1) returns `integer'.  */)
         case PVEC_WINDOW: return Qwindow;
         case PVEC_SUBR: return Qsubr;
         case PVEC_COMPILED: return Qcompiled_function;
+        case PVEC_OBARRAY: return Qobarray;
         case PVEC_BUFFER: return Qbuffer;
         case PVEC_CHAR_TABLE: return Qchar_table;
         case PVEC_BOOL_VECTOR: return Qbool_vector;
@@ -360,6 +361,17 @@ DEFUN ("vectorp", Fvectorp, Svectorp, 1, 1, 0,
   return Qnil;
 }
 
+DEFUN ("obarrayp", Fobarrayp, Sobarrayp, 1, 1, 0,
+       doc: /* Return t if OBJECT is an obarray.  */)
+  (Lisp_Object object)
+{
+  if (OBARRAYP (object))
+    return Qt;
+  if (VECTORP (object) && ASIZE (object) > 0) /* Backward compatibility.  */
+    return Qt;
+  return Qnil;
+}
+
 DEFUN ("stringp", Fstringp, Sstringp, 1, 1, 0,
        doc: /* Return t if OBJECT is a string.  */
        attributes: const)
@@ -3580,6 +3592,7 @@ syms_of_data (void)
   DEFSYM (Qsequencep, "sequencep");
   DEFSYM (Qbufferp, "bufferp");
   DEFSYM (Qvectorp, "vectorp");
+  DEFSYM (Qobarrayp, "obarrayp");
   DEFSYM (Qbool_vector_p, "bool-vector-p");
   DEFSYM (Qchar_or_string_p, "char-or-string-p");
   DEFSYM (Qmarkerp, "markerp");
@@ -3699,6 +3712,7 @@ syms_of_data (void)
 
   DEFSYM (Qdefun, "defun");
 
+  DEFSYM (Qobarray, "obarray");
   DEFSYM (Qfont_spec, "font-spec");
   DEFSYM (Qfont_entity, "font-entity");
   DEFSYM (Qfont_object, "font-object");
@@ -3727,6 +3741,7 @@ syms_of_data (void)
   defsubr (&Smultibyte_string_p);
   defsubr (&Sunibyte_string_p);
   defsubr (&Svectorp);
+  defsubr (&Sobarrayp);
   defsubr (&Schar_table_p);
   defsubr (&Svector_or_char_table_p);
   defsubr (&Sbool_vector_p);
diff --git a/src/lisp.h b/src/lisp.h
index 2f97fb8afa..b9a99523d2 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -883,6 +883,7 @@ enum pvec_type
   PVEC_THREAD,
   PVEC_MUTEX,
   PVEC_CONDVAR,
+  PVEC_OBARRAY,
 
   /* These should be last, check internal_equal to see why.  */
   PVEC_COMPILED,
@@ -2814,6 +2815,26 @@ COMPILEDP (Lisp_Object a)
 }
 
 INLINE bool
+OBARRAYP (Lisp_Object a)
+{
+  return PSEUDOVECTORP (a, PVEC_OBARRAY);
+}
+
+INLINE ptrdiff_t
+OBARRAY_SIZE (Lisp_Object obarray)
+{
+  return (OBARRAYP (obarray)
+          ? ASIZE (obarray) & PSEUDOVECTOR_SIZE_MASK
+          : gc_asize (obarray));
+}
+
+INLINE void
+CHECK_OBARRAY (Lisp_Object x)
+{
+  CHECK_TYPE (OBARRAYP (x), Qobarrayp, x);
+}
+
+INLINE bool
 FRAMEP (Lisp_Object a)
 {
   return PSEUDOVECTORP (a, PVEC_FRAME);
diff --git a/src/lread.c b/src/lread.c
index c8632399f7..1c788e5ce5 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -3772,12 +3767,13 @@ check_obarray (Lisp_Object obarray)
   /* We don't want to signal a wrong-type-argument error when we are
      shutting down due to a fatal error, and we don't want to hit
      assertions in VECTORP and ASIZE if the fatal error was during GC.  */
-  if (!fatal_error_in_progress
-      && (!VECTORP (obarray) || ASIZE (obarray) == 0))
+  if (!(fatal_error_in_progress
+        || OBARRAYP (obarray)
+        || (VECTORP (obarray) && ASIZE (obarray) > 0)))
     {
       /* If Vobarray is now invalid, force it to be valid.  */
       if (EQ (Vobarray, obarray)) Vobarray = initial_obarray;
-      wrong_type_argument (Qvectorp, obarray);
+      wrong_type_argument (Qobarrayp, obarray);
     }
   return obarray;
 }
@@ -3877,6 +3873,9 @@ it defaults to the value of `obarray'.  */)
 
   obarray = check_obarray (NILP (obarray) ? Vobarray : obarray);
   CHECK_STRING (string);
+  if (VECTORP (obarray)
+      && ASIZE (obarray) < (1 << PSEUDOVECTOR_SIZE_BITS))
+    XSETPVECTYPE (XVECTOR (obarray), PVEC_OBARRAY);
 
   tem = oblookup (obarray, SSDATA (string), SCHARS (string), SBYTES (string));
   if (!SYMBOLP (tem))
@@ -4004,7 +4003,7 @@ oblookup (Lisp_Object obarray, register const char *ptr, ptrdiff_t size, ptrdiff
 
   obarray = check_obarray (obarray);
   /* This is sometimes needed in the middle of GC.  */
-  obsize = gc_asize (obarray);
+  obsize = OBARRAY_SIZE (obarray);
   hash = hash_string (ptr, size_byte) % obsize;
   bucket = AREF (obarray, hash);
   oblookup_last_bucket_number = hash;
@@ -4031,8 +4030,8 @@ map_obarray (Lisp_Object obarray, void (*fn) (Lisp_Object, Lisp_Object), Lisp_Ob
 {
   ptrdiff_t i;
   register Lisp_Object tail;
-  CHECK_VECTOR (obarray);
-  for (i = ASIZE (obarray) - 1; i >= 0; i--)
+  CHECK_OBARRAY (obarray);
+  for (i = OBARRAY_SIZE (obarray) - 1; i >= 0; i--)
     {
       tail = AREF (obarray, i);
       if (SYMBOLP (tail))
@@ -4064,12 +4063,33 @@ OBARRAY defaults to the value of `obarray'.  */)
   return Qnil;
 }
 
-#define OBARRAY_SIZE 15121
+static void
+obarray_count_1 (Lisp_Object sym, Lisp_Object counter)
+{
+  Fsetcar (counter, make_number (1 + XFASTINT (XCAR (counter))));
+}
+
+DEFUN ("obarray-count", Fobarray_count, Sobarray_count, 1, 1, 0,
+       doc: /* Count number of element in OBARRAY.  */)
+  (Lisp_Object obarray)
+{
+  obarray = check_obarray (obarray);
+  Lisp_Object counter = Fcons (make_number (0), Qnil);
+  map_obarray (obarray, obarray_count_1, counter);
+  return XCAR (counter);
+}
+
+/* This was recently bumped to 15121, but now that we use PVEC_OBARRAY
+ * it needs to be smaller than 4096 (aka 1 << PSEUDOVECTOR_SIZE_BITS).
+ * FIXME: We could use a higher value by putting half the size bits
+ * in PSEUDOVECTOR_SIZE and the other alf in PSEUDOVECTOR_REST, or by
+ * moving the PVEC_OBARRAY to PSEUDOVECTOR_FLAG.  */
+#define OBARRAY_SIZE 4093
 
 void
 init_obarray (void)
 {
-  Vobarray = Fmake_vector (make_number (OBARRAY_SIZE), make_number (0));
+  Vobarray = Fmake_obarray (make_number (OBARRAY_SIZE));
   initial_obarray = Vobarray;
   staticpro (&initial_obarray);
 
@@ -4502,6 +4522,7 @@ syms_of_lread (void)
   defsubr (&Sread_event);
   defsubr (&Sget_file_char);
   defsubr (&Smapatoms);
+  defsubr (&Sobarray_count);
   defsubr (&Slocate_file_internal);
 
   DEFVAR_LISP ("obarray", Vobarray,
diff --git a/src/minibuf.c b/src/minibuf.c
index cc8252b068..01ab69cb6d 100644
--- a/src/minibuf.c
+++ b/src/minibuf.c
@@ -1216,7 +1216,7 @@ is used to further constrain the set of candidates.  */)
   ptrdiff_t compare, matchsize;
   enum { function_table, list_table, obarray_table, hash_table}
     type = (HASH_TABLE_P (collection) ? hash_table
-	    : VECTORP (collection) ? obarray_table
+	    : (OBARRAYP (collection) || VECTORP (collection)) ? obarray_table
 	    : ((NILP (collection)
 		|| (CONSP (collection) && !FUNCTIONP (collection)))
 	       ? list_table : function_table));
@@ -1237,7 +1237,7 @@ is used to further constrain the set of candidates.  */)
   if (type == obarray_table)
     {
       collection = check_obarray (collection);
-      obsize = ASIZE (collection);
+      obsize = OBARRAY_SIZE (collection);
       bucket = AREF (collection, idx);
     }
 
@@ -1473,7 +1473,7 @@ with a space are ignored unless STRING itself starts with a space.  */)
   Lisp_Object tail, elt, eltstring;
   Lisp_Object allmatches;
   int type = HASH_TABLE_P (collection) ? 3
-    : VECTORP (collection) ? 2
+    : (OBARRAYP (collection) || VECTORP (collection)) ? 2
     : NILP (collection) || (CONSP (collection) && !FUNCTIONP (collection));
   ptrdiff_t idx = 0, obsize = 0;
   ptrdiff_t bindcount = -1;
@@ -1490,7 +1490,7 @@ with a space are ignored unless STRING itself starts with a space.  */)
   if (type == 2)
     {
       collection = check_obarray (collection);
-      obsize = ASIZE (collection);
+      obsize = OBARRAY_SIZE (collection);
       bucket = AREF (collection, idx);
     }
 
@@ -1513,8 +1513,7 @@ with a space are ignored unless STRING itself starts with a space.  */)
 	{
 	  if (!EQ (bucket, zero))
 	    {
-	      if (!SYMBOLP (bucket))
-		error ("Bad data in guts of obarray");
+	      CHECK_SYMBOL (bucket);
 	      elt = bucket;
 	      eltstring = elt;
 	      if (XSYMBOL (bucket)->next)
@@ -1696,7 +1695,7 @@ the values STRING, PREDICATE and `lambda'.  */)
       if (NILP (tem))
 	return Qnil;
     }
-  else if (VECTORP (collection))
+  else if (OBARRAYP (collection) || VECTORP (collection))
     {
       /* Bypass intern-soft as that loses for nil.  */
       tem = oblookup (collection,
diff --git a/src/print.c b/src/print.c
index 5d4076c896..586e094ced 100644
--- a/src/print.c
+++ b/src/print.c
@@ -1855,6 +1855,19 @@ print_object (Lisp_Object obj, Lisp_Object printcharfun, bool escapeflag)
 	}
         break;
 
+      case PVEC_OBARRAY:
+	{
+          print_c_string ("#<obarray", printcharfun);
+	  print_c_string (" :size ", printcharfun);
+	  print_object (make_number (OBARRAY_SIZE (obj)),
+			printcharfun, escapeflag);
+	  print_c_string (" :count ", printcharfun);
+	  print_object (Fobarray_count (obj),
+			printcharfun, escapeflag);
+	  print_c_string (">", printcharfun);
+        }
+        break;
+
       case PVEC_BUFFER:
 	{
 	  if (!BUFFER_LIVE_P (XBUFFER (obj)))




^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: new `obarray` type
  2017-03-13  1:36 new `obarray` type Stefan Monnier
@ 2017-03-13 15:49 ` Eli Zaretskii
  2017-03-13 17:22   ` Stefan Monnier
  2017-03-13 22:03 ` Alan Mackenzie
  1 sibling, 1 reply; 14+ messages in thread
From: Eli Zaretskii @ 2017-03-13 15:49 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: emacs-devel

> From: Stefan Monnier <monnier@iro.umontreal.ca>
> Date: Sun, 12 Mar 2017 21:36:26 -0400
> 
> The patch below introduces a new type for obarrays, distinct
> from vectors.  Among other things, this makes it possible to print them
> in a more useful way (it doesn't print the contents, only the size, so
> the printed form is not computer-readable, but it's more
> palatable to the user).
> 
> Printing obarrays in a `read`able way seems like something that should
> be under the control of variable, since it's unclear in general what it
> would mean (for abbrev-tables, it would probably mean to print the name
> of every symbol, along with it value, function, and plist slots, but
> doing that for the `obarray` variable doesn't seem right (and it's not
> even clear what the `value` of each symbol in it should be, for
> buffer-local symbols)).

Let me be the devil's advocate: are there any clients of this change
other than abbrev-tables defined during the build time?  Because if
they are the only justification, then it's much easier to define them
in startup.el instead, which will make the problem go away.  Right?



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: new `obarray` type
  2017-03-13 15:49 ` Eli Zaretskii
@ 2017-03-13 17:22   ` Stefan Monnier
  0 siblings, 0 replies; 14+ messages in thread
From: Stefan Monnier @ 2017-03-13 17:22 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: emacs-devel

>> The patch below introduces a new type for obarrays, distinct
>> from vectors.  Among other things, this makes it possible to print them
>> in a more useful way (it doesn't print the contents, only the size, so
>> the printed form is not computer-readable, but it's more
>> palatable to the user).
>> Printing obarrays in a `read`able way seems like something that should
>> be under the control of variable, since it's unclear in general what it
>> would mean (for abbrev-tables, it would probably mean to print the name
>> of every symbol, along with it value, function, and plist slots, but
>> doing that for the `obarray` variable doesn't seem right (and it's not
>> even clear what the `value` of each symbol in it should be, for
>> buffer-local symbols)).
> Let me be the devil's advocate: are there any clients of this change
> other than abbrev-tables defined during the build time?

Actually, this change doesn't even solve the problem of dumping&reloading
abbrev-tables.  I think we'd be better off reimplementing abbrev tables
using something like hash-tables.

IOW, while this new obarray type was inspired by the work on dumping
Emacs, it doesn't really attempt to solve that problem.  All this does,
really, instead is to provide a separate type so that cl-defmethod can
dispatch on it, and so we can print it in a human-readable way and using
the usual #<...> notation that makes it clear that the output is not
`read`able.

> Because if they are the only justification, then it's much easier to
> define them in startup.el instead, which will make the problem go
> away.  Right?

Definitely.


        Stefan



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: new `obarray` type
  2017-03-13  1:36 new `obarray` type Stefan Monnier
  2017-03-13 15:49 ` Eli Zaretskii
@ 2017-03-13 22:03 ` Alan Mackenzie
  2017-03-14  1:46   ` Herring, Davis
  2017-03-14 12:52   ` Stefan Monnier
  1 sibling, 2 replies; 14+ messages in thread
From: Alan Mackenzie @ 2017-03-13 22:03 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: emacs-devel

Hello, Stefan.

On Sun, Mar 12, 2017 at 21:36:26 -0400, Stefan Monnier wrote:
> The patch below introduces a new type for obarrays, distinct
> from vectors.  Among other things, this makes it possible to print them
> in a more useful way (it doesn't print the contents, only the size, so
> the printed form is not computer-readable, but it's more
> palatable to the user).

It's not more palatable to this user.  It sounds more like "dumbing
down".  There are few things more frustrating whilst debugging than
having Emacs obfuscating information "for my own good".

Indeed, why not just print _all_ vectors by printing only their size?
The arguments which apply to obarrays surely apply equally to all
vectors.

Not rarely, particularly in CC Mode, I will be dealing with obarrays
with relatively small numbers of symbols.  Of course I want to see these
symbols' names when I ask for that obarray to be printed.  Being told
its "size" would be utterly useless and infuriating.  I suspect most
obarrays in existence (with the essential exception of Emacs's main
obarray) are relatively small, hence printing them as a vector is the
Right Thing to do.

I'm also not in favour of introducing another vector-like type without a
very good reason.  It feels a bit like XEmacs's introduction of a char
type, distinct from the integer type.  That was very clever, and no
doubt pleased academic computer scientists, but I doubt it brought much
benefit - it just created hassle, e.g. when looping through character
positions.  Would a new obarray type prevent any vector operations being
carried out on it, should any package do such things?  If so, that would
be a Bad Thing.

This change would create hassle in general for many packages, all of
which create obarrays with (make-vector LENGTH 0), and would need
changing to use `make-obarray'.  It would mean having to write yet more
compatibility macros (for the inevitable day when old style obarrays get
removed from Emacs).

This change would hinder debugging and development.  All for what?  

> Printing obarrays in a `read`able way seems like something that should
> be under the control of variable, since it's unclear in general what it
> would mean (for abbrev-tables, it would probably mean to print the name
> of every symbol, along with it value, function, and plist slots, but
> doing that for the `obarray` variable doesn't seem right (and it's not
> even clear what the `value` of each symbol in it should be, for
> buffer-local symbols)).

I'm against this change, at least at the moment.

>         Stefan

[ .... ]

-- 
Alan Mackenzie (Nuremberg, Germany).



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: new `obarray` type
  2017-03-13 22:03 ` Alan Mackenzie
@ 2017-03-14  1:46   ` Herring, Davis
  2017-03-14 12:52   ` Stefan Monnier
  1 sibling, 0 replies; 14+ messages in thread
From: Herring, Davis @ 2017-03-14  1:46 UTC (permalink / raw)
  To: Alan Mackenzie, Stefan Monnier; +Cc: emacs-devel@gnu.org

> Indeed, why not just print _all_ vectors by printing only their size?
> The arguments which apply to obarrays surely apply equally to all
> vectors.

No -- obarrays are special in that they have hash buckets chained with invisible link pointers.  It is a disservice to the user to have this appear as a normal vector.  (A minor one, though, since most users never look at it at all and some of the rest know the lie.)

> Not rarely, particularly in CC Mode, I will be dealing with obarrays
> with relatively small numbers of symbols.  Of course I want to see these
> symbols' names when I ask for that obarray to be printed.

And if it happens that your symbols suffer a hash collision?  How long will you spend wondering where your Nth expected symbol went?

> I suspect most obarrays in existence (with the essential exception of
> Emacs's main obarray) are relatively small, hence printing them as a
> vector is the Right Thing to do.

I don't think printing an arbitrary subset (dependent on the order of insertion) of the contents of an obarray can be considered the Right Thing.

> Would a new obarray type prevent any vector operations being carried
> out on it, should any package do such things?  If so, that would be a
> Bad Thing.

The Elisp Reference Manual says specifically not to try to edit an obarray (but to use [un]intern); what else would you call -- length (which counts buckets)?  aref (which, given a meaningless index, fetches the same random subset of symbols that would be printed)?

Davis


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: new `obarray` type
  2017-03-13 22:03 ` Alan Mackenzie
  2017-03-14  1:46   ` Herring, Davis
@ 2017-03-14 12:52   ` Stefan Monnier
  2017-03-14 20:14     ` Alan Mackenzie
  1 sibling, 1 reply; 14+ messages in thread
From: Stefan Monnier @ 2017-03-14 12:52 UTC (permalink / raw)
  To: Alan Mackenzie; +Cc: emacs-devel

> It's not more palatable to this user.  It sounds more like "dumbing
> down".  There are few things more frustrating whilst debugging than
> having Emacs obfuscating information "for my own good".

The good thing about having the `obarray` type is that e get to choose
how to print them.

I can easily make it so it prints all the symbols in it, would you
prefer that?

> Indeed, why not just print _all_ vectors by printing only their size?

The reason why the patch I sent only prints the number of entries is
that currently the way obarrays are printed you only get to see some of
the symbols but not all.  But I guess you're right: it would be more
useful to list all the symbols in it.
[ Time passes... Done!  ]

> Not rarely, particularly in CC Mode, I will be dealing with obarrays
> with relatively small numbers of symbols.

Regardless of what we decide to do with obarrays, I strongly recommend
you change cc-mode to use hash-tables instead.  My experience with EIEIO
(where I "recently" moved from obarrays to hash-tables) is that it's
measurably faster and the code tends to be clearer (tho that's clearly
subjective).

> Of course I want to see these symbols' names when I ask for that
> obarray to be printed.

With hash-tables, you'll them see all, properly printed and even `read`able!

> I'm also not in favour of introducing another vector-like type without a
> very good reason.

Obarrays are very weird, currently, because they combine "plain vectors"
and "plain symbols" in a tricky way.
- Have you ever tried to do `aref` on an obarray?
- The printout lists some of the symbols, but not all.  Which ones appear
  is arbitrary, unpredictable.
- Have you ever tried to put something else than 0 in an obarray slot?
- An obarray can lead to unexpected space behavior:

    (let* ((o (obarray-make))
           (s1 (intern "s1" o))
           (s2 (intern "s2" o)))
      (set s1 (make-list 100000 t))
      s2)

  might leave you with a 10000-element list preserved as "reachable" as
  long as `s2` is reachable, even tho it's clearly not reachable any more.

BTW, my patch doesn't address this GC problem yet.

> positions.  Would a new obarray type prevent any vector operations being
> carried out on it, should any package do such things?  If so, that would
> be a Bad Thing.

Currently, I haven't changed `aref` to work on obarrays, no.  I've never
ever seen code try to do that (I guess in theory it could be potentially
useful, tho I can't think of any operation you could implement reliably
using `aref` on obarrays would be `obarray-empty-p`).

I was planning on allowing `aset` in case some package uses it to do the
equivalent of `clrhash`, but I haven't yet found any package doing that,
so I haven't bothered either.

> This change would create hassle in general for many packages, all of
> which create obarrays with (make-vector LENGTH 0), and would need
> changing to use `make-obarray'.

Of course, (make-vector LENGTH 0) still works.  And there's
obarray-make, introduced in Emacs-25, IIRC.  But even if we deprecate
(make-vector LENGTH 0) you won't get any byte-compilation warning for it
since we can't detect whether a (make-vector LENGTH 0) is meant as an
obarray or as a normal array that happens to be filled with zeroes.

> It would mean having to write yet more compatibility macros (for the
> inevitable day when old style obarrays get removed from Emacs).

To the extent that we can't detect when make-vector is used for an
obarray, I expect that it'll take *many* years until we can drop support
for "old-style" obarrays, so I wouldn't worry about it.
I suspect that even cc-mode will have switch to hash-tables before
support for old-style obarrays is dropped.


        Stefan



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: new `obarray` type
  2017-03-14 12:52   ` Stefan Monnier
@ 2017-03-14 20:14     ` Alan Mackenzie
  2017-03-15 17:25       ` Stefan Monnier
  0 siblings, 1 reply; 14+ messages in thread
From: Alan Mackenzie @ 2017-03-14 20:14 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: emacs-devel

Hello, Stefan.

On Tue, Mar 14, 2017 at 08:52:59 -0400, Stefan Monnier wrote:
> > It's not more palatable to this user.  It sounds more like "dumbing
> > down".  There are few things more frustrating whilst debugging than
> > having Emacs obfuscating information "for my own good".

> The good thing about having the `obarray` type is that e get to choose
> how to print them.

> I can easily make it so it prints all the symbols in it, would you
> prefer that?

Yes, please!

> > Indeed, why not just print _all_ vectors by printing only their size?

> The reason why the patch I sent only prints the number of entries is
> that currently the way obarrays are printed you only get to see some of
> the symbols but not all.  But I guess you're right: it would be more
> useful to list all the symbols in it.
> [ Time passes... Done!  ]

Apologies for that.  I wasn't really aware that (currently) printing an
obarray only displays some of the symbols.

> > Not rarely, particularly in CC Mode, I will be dealing with obarrays
> > with relatively small numbers of symbols.

> Regardless of what we decide to do with obarrays, I strongly recommend
> you change cc-mode to use hash-tables instead.  My experience with EIEIO
> (where I "recently" moved from obarrays to hash-tables) is that it's
> measurably faster and the code tends to be clearer (tho that's clearly
> subjective).

There are currently four uses of (make-vector LENGTH 0) in CC Mode, at
least one of which, possibly two, genuinely deal with Lisp symbols.
Converting those to hash-tables would probably be a net loss, though
converting the ones which just use obarrays as a string look-up would
surely gain.

> > Of course I want to see these symbols' names when I ask for that
> > obarray to be printed.

> With hash-tables, you'll them see all, properly printed and even `read`able!

> > I'm also not in favour of introducing another vector-like type without a
> > very good reason.

> Obarrays are very weird, currently, because they combine "plain vectors"
> and "plain symbols" in a tricky way.
> - Have you ever tried to do `aref` on an obarray?
> - The printout lists some of the symbols, but not all.  Which ones appear
>   is arbitrary, unpredictable.
> - Have you ever tried to put something else than 0 in an obarray slot?
> - An obarray can lead to unexpected space behavior:

>     (let* ((o (obarray-make))
>            (s1 (intern "s1" o))
>            (s2 (intern "s2" o)))
>       (set s1 (make-list 100000 t))
>       s2)

>   might leave you with a 10000-element list preserved as "reachable" as
>   long as `s2` is reachable, even tho it's clearly not reachable any more.

> BTW, my patch doesn't address this GC problem yet.

> > positions.  Would a new obarray type prevent any vector operations being
> > carried out on it, should any package do such things?  If so, that would
> > be a Bad Thing.

> Currently, I haven't changed `aref` to work on obarrays, no.  I've never
> ever seen code try to do that (I guess in theory it could be potentially
> useful, tho I can't think of any operation you could implement reliably
> using `aref` on obarrays would be `obarray-empty-p`).

> I was planning on allowing `aset` in case some package uses it to do the
> equivalent of `clrhash`, but I haven't yet found any package doing that,
> so I haven't bothered either.

> > This change would create hassle in general for many packages, all of
> > which create obarrays with (make-vector LENGTH 0), and would need
> > changing to use `make-obarray'.

> Of course, (make-vector LENGTH 0) still works.  And there's
> obarray-make, introduced in Emacs-25, IIRC.  But even if we deprecate
> (make-vector LENGTH 0) you won't get any byte-compilation warning for it
> since we can't detect whether a (make-vector LENGTH 0) is meant as an
> obarray or as a normal array that happens to be filled with zeroes.

You could prohibit symbol operations on the "wrong" type of vector.

> > It would mean having to write yet more compatibility macros (for the
> > inevitable day when old style obarrays get removed from Emacs).

> To the extent that we can't detect when make-vector is used for an
> obarray, I expect that it'll take *many* years until we can drop support
> for "old-style" obarrays, so I wouldn't worry about it.
> I suspect that even cc-mode will have switch to hash-tables before
> support for old-style obarrays is dropped.

Hee!

Thanks for this reply.  I withdraw my objections to the new obarray
type.

>         Stefan

-- 
Alan Mackenzie (Nuremberg, Germany).



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: new `obarray` type
  2017-03-14 20:14     ` Alan Mackenzie
@ 2017-03-15 17:25       ` Stefan Monnier
  2017-03-15 18:19         ` Lars Brinkhoff
  2017-07-23 14:03         ` Converting CC Mode's obarrays to hash tables. [Was: new `obarray` type] Alan Mackenzie
  0 siblings, 2 replies; 14+ messages in thread
From: Stefan Monnier @ 2017-03-15 17:25 UTC (permalink / raw)
  To: Alan Mackenzie; +Cc: emacs-devel

> There are currently four uses of (make-vector LENGTH 0) in CC Mode, at
> least one of which, possibly two, genuinely deal with Lisp symbols.
> Converting those to hash-tables would probably be a net loss, though
> converting the ones which just use obarrays as a string look-up would
> surely gain.

I just tried such a conversion on all 4 uses.

The result isn't that bad, but indeed contrary to the EIEIO case, I get
a slight slow down (somewhat lost in the measurement noise, but still
a disappointment for me).  The culprit seems to be the use of cl-structs
instead of symbols in c-lang-constants (I can recover some of the speed
by adding (:type vector) (:named nil) to the defstruct definition).

In case you're interested in extracting the rest, find the patch below
(which was pretty thoroughly *un*tested),


        Stefan


diff --git a/lisp/progmodes/cc-defs.el b/lisp/progmodes/cc-defs.el
index 3fdd56124c..c4798c6108 100644
--- a/lisp/progmodes/cc-defs.el
+++ b/lisp/progmodes/cc-defs.el
@@ -2029,18 +2029,31 @@ c-add-language
     (error "Unknown base mode `%s'" base-mode))
   (put mode 'c-fallback-mode base-mode))
 
-(defvar c-lang-constants (make-vector 151 0))
-;;   Obarray used as a cache to keep track of the language constants.
+(defvar c-lang-constants (make-hash-table))
+;;   Hash-table used as a cache to keep track of the language constants.
 ;; The constants stored are those defined by `c-lang-defconst' and the values
 ;; computed by `c-lang-const'.  It's mostly used at compile time but it's not
 ;; stored in compiled files.
 
-;; The obarray contains all the language constants as symbols.  The
+;; The table contains all the language constants as `c-lang--const'.  The
 ;; value cells hold the evaluated values as alists where each car is
 ;; the mode name symbol and the corresponding cdr is the evaluated
 ;; value in that mode.  The property lists hold the source definitions
-;; and other miscellaneous data.  The obarray might also contain
-;; various other symbols, but those don't have any variable bindings.
+;; and other miscellaneous data.
+(defun c-lang--get-const (var)
+  (or (gethash var c-lang-constants)
+      (puthash var (c-lang--new-const) c-lang-constants)))
+(defun c-lang--forget-const (x) (remhash x c-lang-constants))
+(defun c-lang--map-const (f)
+  (maphash (lambda (sym _) (funcall f sym)) c-lang-constants))
+(cl-defstruct (c-lang--const
+	       ;; (:type vector) (:named nil)
+	       (:constructor c-lang--new-const ()))
+  (values nil :type alist)
+  dependents
+  source
+  documentation)
+
 
 (defvar c-lang-const-expansion nil)
 
@@ -2111,7 +2124,7 @@ c-lang-defconst
 earlier definition will not affect `c-lang-const' on the same
 constant.  A file is identified by its base name."
 
-  (let* ((sym (intern (symbol-name name) c-lang-constants))
+  (let* ((sym (c-lang--get-const name))
 	 ;; Make `c-lang-const' expand to a straightforward call to
 	 ;; `c-get-lang-constant' in `c--macroexpand-all' below.
 	 ;;
@@ -2139,7 +2152,7 @@ c-lang-defconst
       ;; The docstring is hardly used anywhere since there's no normal
       ;; symbol to attach it to.  It's primarily for getting the right
       ;; format in the source.
-      (put sym 'variable-documentation (car args))
+      (setf (c-lang--const-documentation sym) (car args))
       (setq args (cdr args)))
 
     (or args
@@ -2188,7 +2201,7 @@ c-lang-defconst
     ;; definitions for this symbol, to make sure the order in the
     ;; `source' property is correct even when files are loaded out of
     ;; order.
-    (setq pre-files (mapcar 'car (get sym 'source)))
+    (setq pre-files (mapcar #'car (c-lang--const-source sym)))
     (if (memq file pre-files)
 	;; This can happen when the source file (e.g. cc-langs.el) is first
 	;; loaded as source, setting a 'source property entry, and then itself
@@ -2210,8 +2223,8 @@ c-lang-defconst
 (defun c-define-lang-constant (name bindings &optional pre-files)
   ;; Used by `c-lang-defconst'.
 
-  (let* ((sym (intern (symbol-name name) c-lang-constants))
-	 (source (get sym 'source))
+  (let* ((sym (c-lang--get-const name))
+	 (source (c-lang--const-source sym))
 	 (file (intern
 		(or (c-get-current-file)
 		    (error "`c-lang-defconst' must be used in a file"))))
@@ -2228,28 +2241,28 @@ c-define-lang-constant
 	(unless (assq (car pre-files) source)
 	  (setq source (cons (list (car pre-files)) source)))
 	(setq pre-files (cdr pre-files)))
-      (put sym 'source (cons (setq elem (list file)) source)))
+      (setf (c-lang--const-source sym) (cons (setq elem (list file)) source)))
 
     (setcdr elem bindings)
 
-    ;; Bind the symbol as a variable, or clear any earlier evaluated
-    ;; value it has.
-    (set sym nil)
+    ;; Clear any earlier evaluated value it has.
+    (setf (c-lang--const-values sym) nil)
 
     ;; Clear the evaluated values that depend on this source.
-    (let ((agenda (get sym 'dependents))
-	  (visited (make-vector 101 0))
-	  ptr)
+    ;; NOTE: As far as I can tell, this code never does anything in
+    ;; "normal" use.  It's probably only used when loading a CC-mode
+    ;; version into an Emacs that already loaded another CC-mode version.
+    (let ((agenda (c-lang--const-dependents sym))
+	  ;; This is a set, implemented as a hash-table.
+	  ;; FIXME: Wouldn't a simple list be good enough?
+	  (visited (make-hash-table)))
       (while agenda
 	(setq sym (car agenda)
 	      agenda (cdr agenda))
-	(intern (symbol-name sym) visited)
-	(set sym nil)
-	(setq ptr (get sym 'dependents))
-	(while ptr
-	  (setq sym (car ptr)
-		ptr (cdr ptr))
-	  (unless (intern-soft (symbol-name sym) visited)
+	(puthash sym t visited)
+	(setf (c-lang--const-values sym) nil)
+	(dolist (sym (c-lang--const-dependents sym))
+	  (unless (gethash sym visited)
 	    (setq agenda (cons sym agenda))))))
 
     name))
@@ -2267,7 +2280,7 @@ c-lang-const
   (or (symbolp lang)
       (error "Not a symbol: %S" lang))
 
-  (let ((sym (intern (symbol-name name) c-lang-constants))
+  (let ((sym (c-lang--get-const name))
 	(mode (when lang (intern (concat (symbol-name lang) "-mode")))))
 
     (or (get mode 'c-mode-prefix) (null mode)
@@ -2293,7 +2306,7 @@ c-lang-const
 			     (if (eq file (car elem))
 				 nil	; Exclude our own file.
 			       (list (car elem))))
-			   (get sym 'source)))))
+			   (c-lang--const-source sym)))))
 
             ;; Make some effort to do a compact call to
             ;; `c-get-lang-constant' since it will be compiled in.
@@ -2336,8 +2349,8 @@ c-get-lang-constant
       (setq mode c-buffer-is-cc-mode)
       (error "No current language"))
 
-  (let* ((sym (intern (symbol-name name) c-lang-constants))
-	 (source (get sym 'source))
+  (let* ((sym (c-lang--get-const name))
+	 (source (c-lang--const-source sym))
 	 elem
 	 (eval-in-sym (and c-lang-constants-under-evaluation
 			   (caar c-lang-constants-under-evaluation))))
@@ -2345,8 +2358,7 @@ c-get-lang-constant
     ;; Record the dependencies between this symbol and the one we're
     ;; being evaluated in.
     (when eval-in-sym
-      (or (memq eval-in-sym (get sym 'dependents))
-	  (put sym 'dependents (cons eval-in-sym (get sym 'dependents)))))
+      (cl-pushnew eval-in-sym (c-lang--const-dependents sym)))
 
     ;; Make sure the source files have entries on the `source'
     ;; property so that loading will take place when necessary.
@@ -2361,8 +2373,7 @@ c-get-lang-constant
 	(set sym nil))
       (setq source-files (cdr source-files)))
 
-    (if (and (boundp sym)
-	     (setq elem (assq mode (symbol-value sym))))
+    (if (setq elem (assq mode (c-lang--const-values sym)))
 	(cdr elem)
 
       ;; Check if an evaluation of this symbol is already underway.
@@ -2418,10 +2429,10 @@ c-get-lang-constant
 	   ;; some caller higher up.
 	   (message "Eval error in the `c-lang-defconst' for `%S' in %s:"
 		    sym mode)
-	   (makunbound sym)
+	   (c-lang--forget-const name)
 	   (signal (car err) (cdr err))))
 
-	(set sym (cons (cons mode value) (symbol-value sym)))
+	(push (cons mode value) (c-lang--const-values sym))
 	value))))
 
 (defun c-find-assignment-for-mode (source-pos mode match-any-lang _name)
diff --git a/lisp/progmodes/cc-engine.el b/lisp/progmodes/cc-engine.el
index a5ade09791..abddbdf5bf 100644
--- a/lisp/progmodes/cc-engine.el
+++ b/lisp/progmodes/cc-engine.el
@@ -535,14 +535,14 @@ c-keyword-sym
   ;; Return non-nil if the string KEYWORD is a known keyword.  More
   ;; precisely, the value is the symbol for the keyword in
   ;; `c-keywords-obarray'.
-  (intern-soft keyword c-keywords-obarray))
+  (gethash keyword c-keywords-table))
 
 (defsubst c-keyword-member (keyword-sym lang-constant)
   ;; Return non-nil if the symbol KEYWORD-SYM, as returned by
   ;; `c-keyword-sym', is a member of LANG-CONSTANT, which is the name
   ;; of a language constant that ends with "-kwds".  If KEYWORD-SYM is
   ;; nil then the result is nil.
-  (get keyword-sym lang-constant))
+  (plist-get keyword-sym lang-constant))
 
 ;; String syntax chars, suitable for skip-syntax-(forward|backward).
 (defconst c-string-syntax (if (memq 'gen-string-delim c-emacs-features)
@@ -5991,7 +5991,8 @@ c-found-types
 
 (defsubst c-clear-found-types ()
   ;; Clears `c-found-types'.
-  (setq c-found-types (make-vector 53 0)))
+  ;; This is a set, implemented as a hash-table.
+  (setq c-found-types (make-hash-table :test #'equal)))
 
 (defun c-add-type (from to)
   ;; Add the given region as a type in `c-found-types'.  If the region
@@ -6005,31 +6006,30 @@ c-add-type
   ;;
   ;; This function might do hidden buffer changes.
   (let ((type (c-syntactic-content from to c-recognize-<>-arglists)))
-    (unless (intern-soft type c-found-types)
-      (unintern (substring type 0 -1) c-found-types)
-      (intern type c-found-types))))
+    (unless (gethash type c-found-types)
+      (remhash (substring type 0 -1) c-found-types)
+      (puthash type t c-found-types))))
 
 (defun c-unfind-type (name)
   ;; Remove the "NAME" from c-found-types, if present.
-  (unintern name c-found-types))
+  (remhash name c-found-types))
 
 (defsubst c-check-type (from to)
   ;; Return non-nil if the given region contains a type in
   ;; `c-found-types'.
   ;;
   ;; This function might do hidden buffer changes.
-  (intern-soft (c-syntactic-content from to c-recognize-<>-arglists)
-	       c-found-types))
+  (gethash (c-syntactic-content from to c-recognize-<>-arglists)
+	   c-found-types))
 
 (defun c-list-found-types ()
   ;; Return all the types in `c-found-types' as a sorted list of
   ;; strings.
   (let (type-list)
-    (mapatoms (lambda (type)
-		(setq type-list (cons (symbol-name type)
-				      type-list)))
-	      c-found-types)
-    (sort type-list 'string-lessp)))
+    (maphash (lambda (type _)
+		(push type type-list))
+	     c-found-types)
+    (sort type-list #'string-lessp)))
 
 ;; Shut up the byte compiler.
 (defvar c-maybe-stale-found-type)
diff --git a/lisp/progmodes/cc-langs.el b/lisp/progmodes/cc-langs.el
index 3b455fc090..8b6562f7df 100644
--- a/lisp/progmodes/cc-langs.el
+++ b/lisp/progmodes/cc-langs.el
@@ -164,13 +164,12 @@ c-lang-defvar
 	     (eq (car-safe val) 'c-lang-const)
 	     (eq (nth 1 val) var)
 	     (not (nth 2 val)))
-    ;; Special case: If there's no docstring and the value is a
-    ;; simple (c-lang-const foo) where foo is the same name as VAR
-    ;; then take the docstring from the language constant foo.
-    (setq doc (get (intern (symbol-name (nth 1 val)) c-lang-constants)
-		   'variable-documentation)))
-  (or (stringp doc)
-      (setq doc nil))
+      ;; Special case: If there's no docstring and the value is a
+      ;; simple (c-lang-const foo) where foo is the same name as VAR
+      ;; then take the docstring from the language constant foo.
+      (setq doc (c-lang--const-documentation (c-lang--get-const (nth 1 val)))))
+    (or (stringp doc)
+        (setq doc nil))
 
   (let ((elem (assq var (cdr c-lang-variable-inits))))
     (if elem
@@ -2700,13 +2699,12 @@ 'c-opt-op-identitier-prefix
   (defconst c-kwds-lang-consts
     ;; List of all the language constants that contain keyword lists.
     (let (list)
-      (mapatoms (lambda (sym)
-		  (when (and (boundp sym)
-			     (string-match "-kwds\\'" (symbol-name sym)))
-		    ;; Make the list of globally interned symbols
-		    ;; instead of ones interned in `c-lang-constants'.
-		    (setq list (cons (intern (symbol-name sym)) list))))
-		c-lang-constants)
+      (c-lang--map-const
+       (lambda (sym)
+	 (when (string-match "-kwds\\'" (symbol-name sym))
+	   ;; Make the list of globally interned symbols
+	   ;; instead of ones interned in `c-lang-constants'.
+	   (setq list (cons sym list)))))
       list)))
 
 (c-lang-defconst c-keywords
@@ -2749,8 +2747,8 @@ 'c-opt-op-identitier-prefix
 	    (setcdr elem (cons lang-const (cdr elem))))))
       result-alist))
 
-(c-lang-defvar c-keywords-obarray
-  ;; An obarray containing all keywords as symbols.  The property list
+(c-lang-defvar c-keywords-table
+  ;; A hash table containing all keywords as symbols.  The property list
   ;; of each symbol has a non-nil entry for the specific `*-kwds'
   ;; lists it's a member of.
   ;;
@@ -2767,22 +2765,23 @@ 'c-opt-op-identitier-prefix
 
   (let* ((alist (c-lang-const c-keyword-member-alist))
 	 kwd lang-const-list
-	 (obarray (make-vector (* (length alist) 2) 0)))
+	 (table (make-hash-table :test #'equal)))
     (while alist
       (setq kwd (caar alist)
 	    lang-const-list (cdar alist)
 	    alist (cdr alist))
-      (setplist (intern kwd obarray)
-		;; Emacs has an odd bug that causes `mapcan' to fail
-		;; with unintelligible errors.  (XEmacs works.)
-		;; (2015-06-24): This bug has not yet been fixed.
-		;;(mapcan (lambda (lang-const)
-		;;	      (list lang-const t))
-		;;	    lang-const-list)
-		(apply 'nconc (mapcar (lambda (lang-const)
+      (puthash kwd
+	       ;; Emacs has an odd bug that causes `mapcan' to fail
+	       ;; with unintelligible errors.  (XEmacs works.)
+	       ;; (2015-06-24): This bug has not yet been fixed.
+	       ;;(mapcan (lambda (lang-const)
+	       ;;	      (list lang-const t))
+	       ;;	    lang-const-list)
+	       (apply #'nconc (mapcar (lambda (lang-const)
 					(list lang-const t))
-				      lang-const-list))))
-    obarray))
+				      lang-const-list))
+	       table))
+    table))
 
 (c-lang-defconst c-regular-keywords-regexp
   ;; Adorned regexp matching all keywords that should be fontified



^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: new `obarray` type
  2017-03-15 17:25       ` Stefan Monnier
@ 2017-03-15 18:19         ` Lars Brinkhoff
  2017-03-15 19:24           ` (:named nil) in cl-defstruct (was: new `obarray` type) Stefan Monnier
  2017-07-23 14:03         ` Converting CC Mode's obarrays to hash tables. [Was: new `obarray` type] Alan Mackenzie
  1 sibling, 1 reply; 14+ messages in thread
From: Lars Brinkhoff @ 2017-03-15 18:19 UTC (permalink / raw)
  To: emacs-devel

Stefan Monnier <monnier@iro.umontreal.ca> writes:
> I can recover some of the speed by adding (:type vector) (:named nil)
> to the defstruct definition.

Does that work for you?  It seems to me that (:named nil) does the
opposite of what you would think.




^ permalink raw reply	[flat|nested] 14+ messages in thread

* (:named nil) in cl-defstruct (was: new `obarray` type)
  2017-03-15 18:19         ` Lars Brinkhoff
@ 2017-03-15 19:24           ` Stefan Monnier
  2017-03-15 19:39             ` Noam Postavsky
  0 siblings, 1 reply; 14+ messages in thread
From: Stefan Monnier @ 2017-03-15 19:24 UTC (permalink / raw)
  To: emacs-devel

>> I can recover some of the speed by adding (:type vector) (:named nil)
>> to the defstruct definition.
> Does that work for you?  It seems to me that (:named nil) does the
> opposite of what you would think.

Oh, right, indeed.  :named works by "present/absent", so you can use

    (:type vector)
    :named

for "named" and just

    (:type vector)

for "unnamed".

Common-Lisp doesn't allow (:named ...), but cl-macs.el treats it
as :named hence the confusion.  I think we should change this to either
disallow (:named ...) or to treat (:named nil) as a way to say "*not*
named".  The patch below does latter.  Any objection?


        Stefan


diff --git a/lisp/emacs-lisp/cl-macs.el b/lisp/emacs-lisp/cl-macs.el
index ee32c3444f..6e95154daa 100644
--- a/lisp/emacs-lisp/cl-macs.el
+++ b/lisp/emacs-lisp/cl-macs.el
@@ -2675,7 +2675,7 @@ cl-defstruct
 	      ((eq opt :type)
 	       (setq type (car args)))
 	      ((eq opt :named)
-	       (setq named t))
+	       (setq named (if args (car args) t)))
 	      ((eq opt :initial-offset)
 	       (setq descs (nconc (make-list (car args) '(cl-skip-slot))
 				  descs)))




^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: (:named nil) in cl-defstruct (was: new `obarray` type)
  2017-03-15 19:24           ` (:named nil) in cl-defstruct (was: new `obarray` type) Stefan Monnier
@ 2017-03-15 19:39             ` Noam Postavsky
  2017-03-15 20:28               ` (:named nil) in cl-defstruct Stefan Monnier
  0 siblings, 1 reply; 14+ messages in thread
From: Noam Postavsky @ 2017-03-15 19:39 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: Emacs developers

On Wed, Mar 15, 2017 at 3:24 PM, Stefan Monnier
<monnier@iro.umontreal.ca> wrote:
> Common-Lisp doesn't allow (:named ...), but cl-macs.el treats it
> as :named hence the confusion.  I think we should change this to either
> disallow (:named ...) or to treat (:named nil) as a way to say "*not*
> named".  The patch below does latter.  Any objection?

> -              (setq named t))
> +              (setq named (if args (car args) t)))

Hmm, if I read the code below correctly, this would allow (:named
something-else), but the struct wouldn't be named 'something-else'.
Which still seems potentially confusing for users.



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: (:named nil) in cl-defstruct
  2017-03-15 19:39             ` Noam Postavsky
@ 2017-03-15 20:28               ` Stefan Monnier
  0 siblings, 0 replies; 14+ messages in thread
From: Stefan Monnier @ 2017-03-15 20:28 UTC (permalink / raw)
  To: emacs-devel

>> Common-Lisp doesn't allow (:named ...), but cl-macs.el treats it
>> as :named hence the confusion.  I think we should change this to either
>> disallow (:named ...) or to treat (:named nil) as a way to say "*not*
>> named".  The patch below does latter.  Any objection?

>> -              (setq named t))
>> +              (setq named (if args (car args) t)))

> Hmm, if I read the code below correctly, this would allow (:named
> something-else), but the struct wouldn't be named 'something-else'.
> Which still seems potentially confusing for users.

I think it's OK: it says ":named" not ":name", so it naturally expects
a boolean.  And it's definitely no worse than what we have now.


        Stefan




^ permalink raw reply	[flat|nested] 14+ messages in thread

* Converting CC Mode's obarrays to hash tables.  [Was: new `obarray` type]
  2017-03-15 17:25       ` Stefan Monnier
  2017-03-15 18:19         ` Lars Brinkhoff
@ 2017-07-23 14:03         ` Alan Mackenzie
  2017-07-24 14:06           ` Stefan Monnier
  1 sibling, 1 reply; 14+ messages in thread
From: Alan Mackenzie @ 2017-07-23 14:03 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: emacs-devel

Hello, Stefan.

On Wed, Mar 15, 2017 at 13:25:08 -0400, Stefan Monnier wrote:
> > There are currently four uses of (make-vector LENGTH 0) in CC Mode, at
> > least one of which, possibly two, genuinely deal with Lisp symbols.
> > Converting those to hash-tables would probably be a net loss, though
> > converting the ones which just use obarrays as a string look-up would
> > surely gain.

> I just tried such a conversion on all 4 uses.

> The result isn't that bad, but indeed contrary to the EIEIO case, I get
> a slight slow down (somewhat lost in the measurement noise, but still
> a disappointment for me).  The culprit seems to be the use of cl-structs
> instead of symbols in c-lang-constants (I can recover some of the speed
> by adding (:type vector) (:named nil) to the defstruct definition).

I've just converted c-found-types to use a hash table.  The result was a
slight, but measurable and worthwhile, speed increase, of between 1% and
2%.

As for c-keywords-obarray (an obarray which has C (etc.) keywords as its
symbols, whose property lists contain the various keyword categories the
keywords belong to) - what is the benefit of holding collections of
symbols in hash tables rather than obarrays?  If this makes a program
faster, it suggests that the obarray implementation could be improved to
match the speed of the hash table implementation.  Why don't we store
the main Emacs obarray in a hash table, if this will increase speed?

> In case you're interested in extracting the rest, find the patch below
> (which was pretty thoroughly *un*tested),

Thanks, I read it.

>         Stefan

[ .... ]

-- 
Alan Mackenzie (Nuremberg, Germany).



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: Converting CC Mode's obarrays to hash tables. [Was: new `obarray` type]
  2017-07-23 14:03         ` Converting CC Mode's obarrays to hash tables. [Was: new `obarray` type] Alan Mackenzie
@ 2017-07-24 14:06           ` Stefan Monnier
  0 siblings, 0 replies; 14+ messages in thread
From: Stefan Monnier @ 2017-07-24 14:06 UTC (permalink / raw)
  To: emacs-devel

> As for c-keywords-obarray (an obarray which has C (etc.) keywords as its
> symbols, whose property lists contain the various keyword categories the
> keywords belong to) - what is the benefit of holding collections of
> symbols in hash tables rather than obarrays?

I'd return the question.  What's the benefit of using an extra
indirection through symbols to hold plists, instead of just using the
plist directly.

For reference, here's the relevant part of the patch:

    diff --git a/lisp/progmodes/cc-engine.el b/lisp/progmodes/cc-engine.el
    index a5ade09791..abddbdf5bf 100644
    --- a/lisp/progmodes/cc-engine.el
    +++ b/lisp/progmodes/cc-engine.el
    @@ -535,14 +535,14 @@ c-keyword-sym
       ;; Return non-nil if the string KEYWORD is a known keyword.  More
       ;; precisely, the value is the symbol for the keyword in
       ;; `c-keywords-obarray'.
    -  (intern-soft keyword c-keywords-obarray))
    +  (gethash keyword c-keywords-table))
     
     (defsubst c-keyword-member (keyword-sym lang-constant)
       ;; Return non-nil if the symbol KEYWORD-SYM, as returned by
       ;; `c-keyword-sym', is a member of LANG-CONSTANT, which is the name
       ;; of a language constant that ends with "-kwds".  If KEYWORD-SYM is
       ;; nil then the result is nil.
    -  (get keyword-sym lang-constant))
    +  (plist-get keyword-sym lang-constant))
     
     ;; String syntax chars, suitable for skip-syntax-(forward|backward).
     (defconst c-string-syntax (if (memq 'gen-string-delim c-emacs-features)
    
    diff --git a/lisp/progmodes/cc-langs.el b/lisp/progmodes/cc-langs.el
    index 3b455fc090..8b6562f7df 100644
    --- a/lisp/progmodes/cc-langs.el
    +++ b/lisp/progmodes/cc-langs.el
    @@ -2749,8 +2747,8 @@ 'c-opt-op-identitier-prefix
     	    (setcdr elem (cons lang-const (cdr elem))))))
           result-alist))
     
    -(c-lang-defvar c-keywords-obarray
    -  ;; An obarray containing all keywords as symbols.  The property list
    +(c-lang-defvar c-keywords-table
    +  ;; A hash table containing all keywords as symbols.  The property list
       ;; of each symbol has a non-nil entry for the specific `*-kwds'
       ;; lists it's a member of.
       ;;
    @@ -2767,22 +2765,23 @@ 'c-opt-op-identitier-prefix
     
       (let* ((alist (c-lang-const c-keyword-member-alist))
     	 kwd lang-const-list
    -	 (obarray (make-vector (* (length alist) 2) 0)))
    +	 (table (make-hash-table :test #'equal)))
         (while alist
           (setq kwd (caar alist)
     	    lang-const-list (cdar alist)
     	    alist (cdr alist))
    -      (setplist (intern kwd obarray)
    -		;; Emacs has an odd bug that causes `mapcan' to fail
    -		;; with unintelligible errors.  (XEmacs works.)
    -		;; (2015-06-24): This bug has not yet been fixed.
    -		;;(mapcan (lambda (lang-const)
    -		;;	      (list lang-const t))
    -		;;	    lang-const-list)
    -		(apply 'nconc (mapcar (lambda (lang-const)
    +      (puthash kwd
    +	       ;; Emacs has an odd bug that causes `mapcan' to fail
    +	       ;; with unintelligible errors.  (XEmacs works.)
    +	       ;; (2015-06-24): This bug has not yet been fixed.
    +	       ;;(mapcan (lambda (lang-const)
    +	       ;;	      (list lang-const t))
    +	       ;;	    lang-const-list)
    +	       (apply #'nconc (mapcar (lambda (lang-const)
     					(list lang-const t))
    -				      lang-const-list))))
    -    obarray))
    +				      lang-const-list))
    +	       table))
    +    table))
     
     (c-lang-defconst c-regular-keywords-regexp
       ;; Adorned regexp matching all keywords that should be fontified

BTW, having re-looked at the code, I see that those plists are really
justs sets, so it could be optimized even further, as in:

    diff --git a/lisp/progmodes/cc-engine.el b/lisp/progmodes/cc-engine.el
    index a5ade09791..abddbdf5bf 100644
    --- a/lisp/progmodes/cc-engine.el
    +++ b/lisp/progmodes/cc-engine.el
    @@ -535,14 +535,14 @@ c-keyword-sym
       ;; Return non-nil if the string KEYWORD is a known keyword.  More
       ;; precisely, the value is the symbol for the keyword in
       ;; `c-keywords-obarray'.
    -  (intern-soft keyword c-keywords-obarray))
    +  (gethash keyword c-keywords-table))
     
     (defsubst c-keyword-member (keyword-sym lang-constant)
       ;; Return non-nil if the symbol KEYWORD-SYM, as returned by
       ;; `c-keyword-sym', is a member of LANG-CONSTANT, which is the name
       ;; of a language constant that ends with "-kwds".  If KEYWORD-SYM is
       ;; nil then the result is nil.
    -  (get keyword-sym lang-constant))
    +  (memq keyword-sym lang-constant))
     
     ;; String syntax chars, suitable for skip-syntax-(forward|backward).
     (defconst c-string-syntax (if (memq 'gen-string-delim c-emacs-features)
    
    diff --git a/lisp/progmodes/cc-langs.el b/lisp/progmodes/cc-langs.el
    index 3b455fc090..8b6562f7df 100644
    --- a/lisp/progmodes/cc-langs.el
    +++ b/lisp/progmodes/cc-langs.el
    @@ -2749,8 +2747,8 @@ 'c-opt-op-identitier-prefix
     	    (setcdr elem (cons lang-const (cdr elem))))))
           result-alist))
     
    -(c-lang-defvar c-keywords-obarray
    -  ;; An obarray containing all keywords as symbols.  The property list
    +(c-lang-defvar c-keywords-table
    +  ;; A hash table containing all keywords as symbols.  The value
       ;; of each keyword is a set of symbols indicating presence in
       ;;    each `*-kwds' lists.
       ;;
    @@ -2767,22 +2765,23 @@ 'c-opt-op-identitier-prefix
     
       (let* ((alist (c-lang-const c-keyword-member-alist))
     	 kwd lang-const-list
    -	 (obarray (make-vector (* (length alist) 2) 0)))
    +	 (table (make-hash-table :test #'equal)))
         (while alist
           (setq kwd (caar alist)
     	    lang-const-list (cdar alist)
     	    alist (cdr alist))
    -      (setplist (intern kwd obarray)
    -		;; Emacs has an odd bug that causes `mapcan' to fail
    -		;; with unintelligible errors.  (XEmacs works.)
    -		;; (2015-06-24): This bug has not yet been fixed.
    -		;;(mapcan (lambda (lang-const)
    -		;;	      (list lang-const t))
    -		;;	    lang-const-list)
    -		(apply 'nconc (mapcar (lambda (lang-const)
    					(list lang-const t))
    -				      lang-const-list))))
    -    obarray))
    +      (puthash kwd (mapcar #'car lang-const-list)
    +	            table))
    +    table))
     
     (c-lang-defconst c-regular-keywords-regexp
       ;; Adorned regexp matching all keywords that should be fontified

An obarray, is just a hash-table indexed by strings where every hash
entry holds a 4-field struct (the fields are: `value`, `function`, and
`plist`, plus the read-only `name` field) with some restrictions (a
given such struct can only be placed in a single obarray; you can
remove such a symbol from an obarray, but you can't put it back in).

It's really rare for a program to need a hash-table where the values
carried by each entry happen to have just the shape of symbols.
It's common for the extra restrictions to be satisfied, but it's rare
for them to be useful.

c-lang-constants is one of the rare cases where you managed to make
fairly good use of all 3 fields.  Of course, rather than cl-structs, we
could use (uninterned) symbols in the hash-table to recover the speed
advantage of symbols (due to the fact that symbol-value,
get, and symbol-function have their own bytecode, and symbol-plist is
also hard-coded in C; so they are faster than cl-lib's struct-field
access which uses aref after manually checking that the object has the
expected type; and although `aref' also has its own bytecode it's made
slower by the need to handle various array types and to check array
bounds).

> If this makes a program faster, it suggests that the obarray
> implementation could be improved to match the speed of the hash
> table implementation.

Indeed.  But in most cases, the source code is made more clear when
using the hash-table API rather than the obarray API.

I think we'd be better off declaring the second arg of `intern' as obsolete.
Or make it accept a hash-table as second argument.

> Why don't we store the main Emacs obarray in a hash table, if this
> will increase speed?

Lack of time/motivation (at least on my part).


        Stefan




^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2017-07-24 14:06 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-03-13  1:36 new `obarray` type Stefan Monnier
2017-03-13 15:49 ` Eli Zaretskii
2017-03-13 17:22   ` Stefan Monnier
2017-03-13 22:03 ` Alan Mackenzie
2017-03-14  1:46   ` Herring, Davis
2017-03-14 12:52   ` Stefan Monnier
2017-03-14 20:14     ` Alan Mackenzie
2017-03-15 17:25       ` Stefan Monnier
2017-03-15 18:19         ` Lars Brinkhoff
2017-03-15 19:24           ` (:named nil) in cl-defstruct (was: new `obarray` type) Stefan Monnier
2017-03-15 19:39             ` Noam Postavsky
2017-03-15 20:28               ` (:named nil) in cl-defstruct Stefan Monnier
2017-07-23 14:03         ` Converting CC Mode's obarrays to hash tables. [Was: new `obarray` type] Alan Mackenzie
2017-07-24 14:06           ` Stefan Monnier

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).