unofficial mirror of bug-gnu-emacs@gnu.org 
 help / color / mirror / code / Atom feed
From: Pip Cet <pipcet@gmail.com>
To: Stefan Monnier <monnier@iro.umontreal.ca>
Cc: michael_heerdegen@web.de, npostavs@gmail.com, 36447@debbugs.gnu.org
Subject: bug#36447: 27.0.50; New "Unknown keyword" errors
Date: Fri, 5 Jul 2019 21:52:41 +0000	[thread overview]
Message-ID: <CAOqdjBda8_y1G4hf7y8-L3ec3yTfqYqMcbWjeMzATHfyr8Mz+A@mail.gmail.com> (raw)
In-Reply-To: <jwva7dsi6cw.fsf-monnier+emacs@gnu.org>

[-- Attachment #1: Type: text/plain, Size: 1096 bytes --]

On Fri, Jul 5, 2019 at 8:21 PM Stefan Monnier <monnier@iro.umontreal.ca> wrote:
> >> His patch can/should be made slightly more efficient by only doing the
> >> Fcopy_sequence on those hash-tables that are in purespace.
> >
> > How do we test for that?
>
> With PURE_P?

That only works before we dump, I think?

> > I'm not sure how difficult it would be, but we could dump the ->index,
> > ->next, ->hash vectors as Qnil (so not include the actual vectors in
> > the dump), which would make the dump slightly smaller and give us a
> > better test than h->count < 0:
>
> Except that it can't be done at the time of purecopy but only at the
> time we do the actual dump because the purecopied hashtable may be used
> in-between (which is also why count is kept positive by purecopy and is
> only made negative later).

Indeed. I'm attaching a proof of concept that we can simply freeze the
hash tables when dumping and thaw them when loading a dump, which
includes rehashing. Do you happen to know why it wasn't done that way?
I quite enjoyed removing all the scattered hash_rehash_if_needed()s.

[-- Attachment #2: 0001-rehash-hash-tables-eagerly-after-loading-the-pdumper.patch --]
[-- Type: text/x-patch, Size: 12704 bytes --]

From 83f915b32faf32e98ccd806179e379a13b76618d Mon Sep 17 00:00:00 2001
From: Pip Cet <pipcet@gmail.com>
Date: Fri, 5 Jul 2019 21:50:44 +0000
Subject: [PATCH] rehash hash tables eagerly after loading the pdumper dump.

---
 src/bytecode.c  |   1 -
 src/composite.c |   1 -
 src/emacs.c     |   1 +
 src/fns.c       |  27 +++-------
 src/lisp.h      |  15 ------
 src/minibuf.c   |   3 --
 src/pdumper.c   | 132 +++++++++++++++++++++++++++++++++---------------
 src/pdumper.h   |   1 +
 8 files changed, 102 insertions(+), 79 deletions(-)

diff --git a/src/bytecode.c b/src/bytecode.c
index 29dff44f00..9c72429e0c 100644
--- a/src/bytecode.c
+++ b/src/bytecode.c
@@ -1402,7 +1402,6 @@ #define DEFINE(name, value) LABEL (name) ,
             Lisp_Object v1 = POP;
             ptrdiff_t i;
             struct Lisp_Hash_Table *h = XHASH_TABLE (jmp_table);
-            hash_rehash_if_needed (h);
 
             /* h->count is a faster approximation for HASH_TABLE_SIZE (h)
                here. */
diff --git a/src/composite.c b/src/composite.c
index 183062de46..49a285cff0 100644
--- a/src/composite.c
+++ b/src/composite.c
@@ -654,7 +654,6 @@ gstring_lookup_cache (Lisp_Object header)
 composition_gstring_put_cache (Lisp_Object gstring, ptrdiff_t len)
 {
   struct Lisp_Hash_Table *h = XHASH_TABLE (gstring_hash_table);
-  hash_rehash_if_needed (h);
   Lisp_Object header = LGSTRING_HEADER (gstring);
   EMACS_UINT hash = h->test.hashfn (&h->test, header);
   if (len < 0)
diff --git a/src/emacs.c b/src/emacs.c
index 32bb57e272..a5969a96a3 100644
--- a/src/emacs.c
+++ b/src/emacs.c
@@ -1577,6 +1577,7 @@ main (int argc, char **argv)
   if (!initialized)
     {
       init_alloc_once ();
+      init_pdumper_once ();
       init_obarray_once ();
       init_eval_once ();
       init_charset_once ();
diff --git a/src/fns.c b/src/fns.c
index 2fc000a7f4..8b60311614 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -4218,6 +4218,9 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h)
     }
 }
 
+extern void
+hash_table_rehash (struct Lisp_Hash_Table *h);
+
 void
 hash_table_rehash (struct Lisp_Hash_Table *h)
 {
@@ -4226,12 +4229,11 @@ hash_table_rehash (struct Lisp_Hash_Table *h)
   /* Recompute the actual hash codes for each entry in the table.
      Order is still invalid.  */
   for (ptrdiff_t i = 0; i < size; ++i)
-    if (!NILP (HASH_HASH (h, i)))
-      {
-        Lisp_Object key = HASH_KEY (h, i);
-        EMACS_UINT hash_code = h->test.hashfn (&h->test, key);
-        set_hash_hash_slot (h, i, make_fixnum (hash_code));
-      }
+    {
+      Lisp_Object key = HASH_KEY (h, i);
+      EMACS_UINT hash_code = h->test.hashfn (&h->test, key);
+      set_hash_hash_slot (h, i, make_fixnum (hash_code));
+    }
 
   /* Reset the index so that any slot we don't fill below is marked
      invalid.  */
@@ -4247,13 +4249,6 @@ hash_table_rehash (struct Lisp_Hash_Table *h)
         set_hash_index_slot (h, start_of_bucket, i);
         eassert (HASH_NEXT (h, i) != i); /* Stop loops.  */
       }
-
-  /* Finally, mark the hash table as having a valid hash order.
-     Do this last so that if we're interrupted, we retry on next
-     access. */
-  eassert (h->count < 0);
-  h->count = -h->count;
-  eassert (!hash_rehash_needed_p (h));
 }
 
 /* Lookup KEY in hash table H.  If HASH is non-null, return in *HASH
@@ -4266,8 +4261,6 @@ hash_lookup (struct Lisp_Hash_Table *h, Lisp_Object key, EMACS_UINT *hash)
   EMACS_UINT hash_code;
   ptrdiff_t start_of_bucket, i;
 
-  hash_rehash_if_needed (h);
-
   hash_code = h->test.hashfn (&h->test, key);
   eassert ((hash_code & ~INTMASK) == 0);
   if (hash)
@@ -4296,8 +4289,6 @@ hash_put (struct Lisp_Hash_Table *h, Lisp_Object key, Lisp_Object value,
 {
   ptrdiff_t start_of_bucket, i;
 
-  hash_rehash_if_needed (h);
-
   eassert ((hash & ~INTMASK) == 0);
 
   /* Increment count after resizing because resizing may fail.  */
@@ -4331,8 +4322,6 @@ hash_remove_from_table (struct Lisp_Hash_Table *h, Lisp_Object key)
   ptrdiff_t start_of_bucket = hash_code % ASIZE (h->index);
   ptrdiff_t prev = -1;
 
-  hash_rehash_if_needed (h);
-
   for (ptrdiff_t i = HASH_INDEX (h, start_of_bucket);
        0 <= i;
        i = HASH_NEXT (h, i))
diff --git a/src/lisp.h b/src/lisp.h
index 1a1d8ee7e4..7b77dd97f5 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -2370,21 +2370,6 @@ HASH_TABLE_SIZE (const struct Lisp_Hash_Table *h)
   return ASIZE (h->next);
 }
 
-void hash_table_rehash (struct Lisp_Hash_Table *h);
-
-INLINE bool
-hash_rehash_needed_p (const struct Lisp_Hash_Table *h)
-{
-  return h->count < 0;
-}
-
-INLINE void
-hash_rehash_if_needed (struct Lisp_Hash_Table *h)
-{
-  if (hash_rehash_needed_p (h))
-    hash_table_rehash (h);
-}
-
 /* Default size for hash tables if not specified.  */
 
 enum DEFAULT_HASH_SIZE { DEFAULT_HASH_SIZE = 65 };
diff --git a/src/minibuf.c b/src/minibuf.c
index d932dbe8e2..a775d04332 100644
--- a/src/minibuf.c
+++ b/src/minibuf.c
@@ -1203,9 +1203,6 @@ DEFUN ("try-completion", Ftry_completion, Stry_completion, 2, 3, 0,
       bucket = AREF (collection, idx);
     }
 
-  if (HASH_TABLE_P (collection))
-    hash_rehash_if_needed (XHASH_TABLE (collection));
-
   while (1)
     {
       /* Get the next element of the alist, obarray, or hash-table.  */
diff --git a/src/pdumper.c b/src/pdumper.c
index c00f8a0af5..a3480b4b15 100644
--- a/src/pdumper.c
+++ b/src/pdumper.c
@@ -392,6 +392,8 @@ dump_fingerprint (const char *label, unsigned char const *xfingerprint)
      The start of the cold region is always aligned on a page
      boundary.  */
   dump_off cold_start;
+
+  dump_off hash_list;
 };
 
 /* Double-ended singly linked list.  */
@@ -549,6 +551,8 @@ dump_fingerprint (const char *label, unsigned char const *xfingerprint)
      heap objects.  */
   Lisp_Object bignum_data;
 
+  Lisp_Object hash_tables;
+
   unsigned number_hot_relocations;
   unsigned number_discardable_relocations;
 };
@@ -2647,42 +2651,61 @@ dump_hash_table_stable_p (const struct Lisp_Hash_Table *hash)
 
 /* Return a list of (KEY . VALUE) pairs in the given hash table.  */
 static Lisp_Object
-hash_table_contents (Lisp_Object table)
+hash_table_contents (struct Lisp_Hash_Table *h)
 {
   Lisp_Object contents = Qnil;
-  struct Lisp_Hash_Table *h = XHASH_TABLE (table);
   for (ptrdiff_t i = 0; i < HASH_TABLE_SIZE (h); ++i)
     if (!NILP (HASH_HASH (h, i)))
-      dump_push (&contents, Fcons (HASH_KEY (h, i), HASH_VALUE (h, i)));
+      {
+	dump_push (&contents, HASH_KEY (h, i));
+	dump_push (&contents, HASH_VALUE (h, i));
+      }
   return Fnreverse (contents);
 }
 
-/* Copy the given hash table, rehash it, and make sure that we can
-   look up all the values in the original.  */
+static dump_off
+dump_hash_table_list (struct dump_context *ctx)
+{
+  if (CONSP (ctx->hash_tables))
+    return dump_object (ctx, ctx->hash_tables);
+  else
+    return 0;
+}
+
 static void
-check_hash_table_rehash (Lisp_Object table_orig)
-{
-  hash_rehash_if_needed (XHASH_TABLE (table_orig));
-  Lisp_Object table_rehashed = Fcopy_hash_table (table_orig);
-  eassert (XHASH_TABLE (table_rehashed)->count >= 0);
-  XHASH_TABLE (table_rehashed)->count *= -1;
-  eassert (XHASH_TABLE (table_rehashed)->count <= 0);
-  hash_rehash_if_needed (XHASH_TABLE (table_rehashed));
-  eassert (XHASH_TABLE (table_rehashed)->count >= 0);
-  Lisp_Object expected_contents = hash_table_contents (table_orig);
-  while (!NILP (expected_contents))
+hash_table_freeze (struct Lisp_Hash_Table *h)
+{
+  if (h->count > 0)
+    h->count = -h->count;
+  h->key_and_value = hash_table_contents (h);
+  ptrdiff_t nkeys = XFIXNAT (Flength (h->key_and_value)) / 2;
+  if (nkeys == 0)
+    nkeys = 1;
+  h->index = h->next = h->hash = make_fixnum (nkeys);
+  h->count = nkeys;
+}
+
+/* Defined in fns.c.  */
+extern void hash_table_rehash (struct Lisp_Hash_Table *h);
+
+static void
+hash_table_thaw (struct Lisp_Hash_Table *h)
+{
+  Lisp_Object count = h->index;
+  h->index = Fmake_vector (h->index, make_fixnum (-1));
+  h->hash = Fmake_vector (h->hash, Qnil);
+  h->next = Fmake_vector (h->next, make_fixnum (-1));
+  Lisp_Object key_and_value = h->key_and_value;
+  h->key_and_value = Fmake_vector (make_fixnum (2 * XFIXNAT (count)), Qnil);
+  h->next_free = -1;
+  ptrdiff_t i = 0;
+  while (CONSP (key_and_value))
     {
-      Lisp_Object key_value_pair = dump_pop (&expected_contents);
-      Lisp_Object key = XCAR (key_value_pair);
-      Lisp_Object expected_value = XCDR (key_value_pair);
-      Lisp_Object arbitrary = Qdump_emacs_portable__sort_predicate_copied;
-      Lisp_Object found_value = Fgethash (key, table_rehashed, arbitrary);
-      eassert (EQ (expected_value, found_value));
-      Fremhash (key, table_rehashed);
+      ASET (h->key_and_value, i++, XCAR (key_and_value));
+      key_and_value = XCDR (key_and_value);
     }
 
-  eassert (EQ (Fhash_table_count (table_rehashed),
-               make_fixnum (0)));
+  hash_table_rehash (h);
 }
 
 static dump_off
@@ -2707,8 +2730,6 @@ dump_hash_table (struct dump_context *ctx,
         {
 	  eassert (offset == DUMP_OBJECT_ON_NORMAL_QUEUE
 		   || offset == DUMP_OBJECT_NOT_SEEN);
-          /* We still want to dump the actual keys and values now.  */
-          dump_enqueue_object (ctx, hash_in->key_and_value, WEIGHT_NONE);
           /* We'll get to the rest later.  */
           offset = DUMP_OBJECT_ON_HASH_TABLE_QUEUE;
           dump_remember_object (ctx, object, offset);
@@ -2717,22 +2738,11 @@ dump_hash_table (struct dump_context *ctx,
       return offset;
     }
 
-  if (PDUMPER_CHECK_REHASHING)
-    check_hash_table_rehash (make_lisp_ptr ((void *) hash_in, Lisp_Vectorlike));
-
   struct Lisp_Hash_Table hash_munged = *hash_in;
   struct Lisp_Hash_Table *hash = &hash_munged;
 
-  /* Remember to rehash this hash table on first access.  After a
-     dump reload, the hash table values will have changed, so we'll
-     need to rebuild the index.
-
-     TODO: for EQ and EQL hash tables, it should be possible to rehash
-     here using the preferred load address of the dump, eliminating
-     the need to rehash-on-access if we can load the dump where we
-     want.  */
-  if (hash->count > 0 && !is_stable)
-    hash->count = -hash->count;
+  hash_table_freeze (hash);
+  dump_push (&ctx->hash_tables, object);
 
   START_DUMP_PVEC (ctx, &hash->header, struct Lisp_Hash_Table, out);
   dump_pseudovector_lisp_fields (ctx, &out->header, &hash->header);
@@ -4141,6 +4151,20 @@ DEFUN ("dump-emacs-portable",
 	 || !NILP (ctx->deferred_hash_tables)
 	 || !NILP (ctx->deferred_symbols));
 
+  ctx->header.hash_list = ctx->offset;
+  dump_push (&ctx->hash_tables, Qnil);
+  dump_hash_table_list (ctx);
+
+  do
+    {
+      dump_drain_deferred_hash_tables (ctx);
+      dump_drain_deferred_symbols (ctx);
+      dump_drain_normal_queue (ctx);
+    }
+  while (!dump_queue_empty_p (&ctx->dump_queue)
+	 || !NILP (ctx->deferred_hash_tables)
+	 || !NILP (ctx->deferred_symbols));
+
   dump_sort_copied_objects (ctx);
 
   /* While we copy built-in symbols into the Emacs image, these
@@ -5429,6 +5453,13 @@ pdumper_load (const char *dump_filename)
   for (int i = 0; i < ARRAYELTS (sections); ++i)
     dump_mmap_reset (&sections[i]);
 
+  if (header->hash_list)
+    {
+      Lisp_Object hash_tables =
+	((Lisp_Object *)(dump_base + header->hash_list))[1];
+      Vpdumper_hash_tables = hash_tables;
+    }
+
   /* Run the functions Emacs registered for doing post-dump-load
      initialization.  */
   for (int i = 0; i < nr_dump_hooks; ++i)
@@ -5498,12 +5529,33 @@ DEFUN ("pdumper-stats", Fpdumper_stats, Spdumper_stats, 0, 0, 0,
 
 #endif /* HAVE_PDUMPER */
 
+
 \f
 
+static void thaw_hash_tables (void)
+{
+  Lisp_Object hash_tables = Vpdumper_hash_tables;
+  while (CONSP (hash_tables))
+    {
+      hash_table_thaw (XHASH_TABLE (XCAR (hash_tables)));
+      hash_tables = XCDR (hash_tables);
+    }
+}
+
+void
+init_pdumper_once (void)
+{
+  Vpdumper_hash_tables = Qnil;
+  pdumper_do_now_and_after_load (thaw_hash_tables);
+}
+
 void
 syms_of_pdumper (void)
 {
 #ifdef HAVE_PDUMPER
+  DEFVAR_LISP ("pdumper-hash-tables", Vpdumper_hash_tables,
+	       doc: /* A list of hash tables that need to be thawed after loading the pdump.  */);
+  Vpdumper_hash_tables = Qnil;
   defsubr (&Sdump_emacs_portable);
   defsubr (&Sdump_emacs_portable__sort_predicate);
   defsubr (&Sdump_emacs_portable__sort_predicate_copied);
diff --git a/src/pdumper.h b/src/pdumper.h
index ab2f426c1e..cfea06d33d 100644
--- a/src/pdumper.h
+++ b/src/pdumper.h
@@ -248,6 +248,7 @@ pdumper_clear_marks (void)
    file was loaded.  */
 extern void pdumper_record_wd (const char *);
 
+void init_pdumper_once (void);
 void syms_of_pdumper (void);
 
 INLINE_HEADER_END
-- 
2.20.1


  reply	other threads:[~2019-07-05 21:52 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-30 18:23 bug#36447: 27.0.50; New "Unknown keyword" errors Michael Heerdegen
2019-06-30 18:43 ` Eli Zaretskii
2019-06-30 21:44   ` Michael Heerdegen
2019-07-01 12:25     ` Noam Postavsky
2019-07-01 13:20       ` Pip Cet
2019-07-01 22:04       ` Michael Heerdegen
2019-07-02  1:59         ` Stefan Kangas
2019-07-02 14:17           ` Eli Zaretskii
2019-07-02 13:29         ` Pip Cet
2019-07-02 15:35           ` Michael Heerdegen
2019-07-02 16:20             ` Noam Postavsky
2019-07-02 22:50               ` Pip Cet
2019-07-03 11:57                 ` Pip Cet
2019-07-05  1:59                   ` Michael Heerdegen
2019-07-05  6:35                     ` Pip Cet
2019-07-05  7:50                       ` Eli Zaretskii
2019-07-05  8:12                         ` Pip Cet
2019-07-05  8:25                           ` Eli Zaretskii
2019-07-05  8:36                             ` Pip Cet
2019-07-05  8:41                               ` Eli Zaretskii
2019-07-05  9:09                                 ` Pip Cet
2019-07-05 12:23                                   ` Robert Pluim
2019-07-05 12:33                                   ` Eli Zaretskii
2019-07-05 13:41                                     ` Pip Cet
2019-07-05 18:00                                     ` Stefan Monnier
2019-07-05 18:07                                       ` Eli Zaretskii
2019-07-05 20:16                                         ` Stefan Monnier
2019-07-05 18:57                                       ` Pip Cet
2019-07-05 19:13                                         ` Eli Zaretskii
2019-07-05 20:21                                         ` Stefan Monnier
2019-07-05 21:52                                           ` Pip Cet [this message]
2019-07-05 22:10                                             ` Stefan Monnier
2019-07-06  6:45                                               ` Eli Zaretskii
2019-07-06 15:08                                                 ` Pip Cet
2019-07-09 21:05                                                   ` Stefan Monnier
2019-07-10  2:38                                                     ` Eli Zaretskii
2019-07-10  3:19                                                       ` Daniel Colascione
2019-07-10 15:01                                                         ` Pip Cet
2019-07-10 17:16                                                           ` Daniel Colascione
2019-07-10 20:14                                                             ` Pip Cet
2019-07-06 15:32                                             ` Michael Heerdegen
2019-07-08 17:30                                               ` Lars Ingebrigtsen
2019-07-08 17:58                                                 ` Pip Cet
2019-07-08 22:18                                                   ` Lars Ingebrigtsen
2019-07-08 22:25                                                     ` Noam Postavsky
2019-07-09 14:00                                                       ` Pip Cet
2019-07-10  3:01                                                         ` Daniel Colascione
2019-07-14 14:06                                                           ` Noam Postavsky
2019-07-08 23:22                                                     ` Stefan Monnier
2019-07-08 22:23                                                   ` Michael Heerdegen
2019-07-09 15:43                                                     ` Eli Zaretskii
2019-07-09 20:15                                                   ` Stefan Monnier
2019-07-05  7:55                       ` Katsumi Yamaoka

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAOqdjBda8_y1G4hf7y8-L3ec3yTfqYqMcbWjeMzATHfyr8Mz+A@mail.gmail.com \
    --to=pipcet@gmail.com \
    --cc=36447@debbugs.gnu.org \
    --cc=michael_heerdegen@web.de \
    --cc=monnier@iro.umontreal.ca \
    --cc=npostavs@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).