From: Pip Cet <pipcet@gmail.com>
To: Stefan Monnier <monnier@iro.umontreal.ca>
Cc: michael_heerdegen@web.de, npostavs@gmail.com, 36447@debbugs.gnu.org
Subject: bug#36447: 27.0.50; New "Unknown keyword" errors
Date: Fri, 5 Jul 2019 21:52:41 +0000 [thread overview]
Message-ID: <CAOqdjBda8_y1G4hf7y8-L3ec3yTfqYqMcbWjeMzATHfyr8Mz+A@mail.gmail.com> (raw)
In-Reply-To: <jwva7dsi6cw.fsf-monnier+emacs@gnu.org>
[-- Attachment #1: Type: text/plain, Size: 1096 bytes --]
On Fri, Jul 5, 2019 at 8:21 PM Stefan Monnier <monnier@iro.umontreal.ca> wrote:
> >> His patch can/should be made slightly more efficient by only doing the
> >> Fcopy_sequence on those hash-tables that are in purespace.
> >
> > How do we test for that?
>
> With PURE_P?
That only works before we dump, I think?
> > I'm not sure how difficult it would be, but we could dump the ->index,
> > ->next, ->hash vectors as Qnil (so not include the actual vectors in
> > the dump), which would make the dump slightly smaller and give us a
> > better test than h->count < 0:
>
> Except that it can't be done at the time of purecopy but only at the
> time we do the actual dump because the purecopied hashtable may be used
> in-between (which is also why count is kept positive by purecopy and is
> only made negative later).
Indeed. I'm attaching a proof of concept that we can simply freeze the
hash tables when dumping and thaw them when loading a dump, which
includes rehashing. Do you happen to know why it wasn't done that way?
I quite enjoyed removing all the scattered hash_rehash_if_needed()s.
[-- Attachment #2: 0001-rehash-hash-tables-eagerly-after-loading-the-pdumper.patch --]
[-- Type: text/x-patch, Size: 12704 bytes --]
From 83f915b32faf32e98ccd806179e379a13b76618d Mon Sep 17 00:00:00 2001
From: Pip Cet <pipcet@gmail.com>
Date: Fri, 5 Jul 2019 21:50:44 +0000
Subject: [PATCH] rehash hash tables eagerly after loading the pdumper dump.
---
src/bytecode.c | 1 -
src/composite.c | 1 -
src/emacs.c | 1 +
src/fns.c | 27 +++-------
src/lisp.h | 15 ------
src/minibuf.c | 3 --
src/pdumper.c | 132 +++++++++++++++++++++++++++++++++---------------
src/pdumper.h | 1 +
8 files changed, 102 insertions(+), 79 deletions(-)
diff --git a/src/bytecode.c b/src/bytecode.c
index 29dff44f00..9c72429e0c 100644
--- a/src/bytecode.c
+++ b/src/bytecode.c
@@ -1402,7 +1402,6 @@ #define DEFINE(name, value) LABEL (name) ,
Lisp_Object v1 = POP;
ptrdiff_t i;
struct Lisp_Hash_Table *h = XHASH_TABLE (jmp_table);
- hash_rehash_if_needed (h);
/* h->count is a faster approximation for HASH_TABLE_SIZE (h)
here. */
diff --git a/src/composite.c b/src/composite.c
index 183062de46..49a285cff0 100644
--- a/src/composite.c
+++ b/src/composite.c
@@ -654,7 +654,6 @@ gstring_lookup_cache (Lisp_Object header)
composition_gstring_put_cache (Lisp_Object gstring, ptrdiff_t len)
{
struct Lisp_Hash_Table *h = XHASH_TABLE (gstring_hash_table);
- hash_rehash_if_needed (h);
Lisp_Object header = LGSTRING_HEADER (gstring);
EMACS_UINT hash = h->test.hashfn (&h->test, header);
if (len < 0)
diff --git a/src/emacs.c b/src/emacs.c
index 32bb57e272..a5969a96a3 100644
--- a/src/emacs.c
+++ b/src/emacs.c
@@ -1577,6 +1577,7 @@ main (int argc, char **argv)
if (!initialized)
{
init_alloc_once ();
+ init_pdumper_once ();
init_obarray_once ();
init_eval_once ();
init_charset_once ();
diff --git a/src/fns.c b/src/fns.c
index 2fc000a7f4..8b60311614 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -4218,6 +4218,9 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h)
}
}
+extern void
+hash_table_rehash (struct Lisp_Hash_Table *h);
+
void
hash_table_rehash (struct Lisp_Hash_Table *h)
{
@@ -4226,12 +4229,11 @@ hash_table_rehash (struct Lisp_Hash_Table *h)
/* Recompute the actual hash codes for each entry in the table.
Order is still invalid. */
for (ptrdiff_t i = 0; i < size; ++i)
- if (!NILP (HASH_HASH (h, i)))
- {
- Lisp_Object key = HASH_KEY (h, i);
- EMACS_UINT hash_code = h->test.hashfn (&h->test, key);
- set_hash_hash_slot (h, i, make_fixnum (hash_code));
- }
+ {
+ Lisp_Object key = HASH_KEY (h, i);
+ EMACS_UINT hash_code = h->test.hashfn (&h->test, key);
+ set_hash_hash_slot (h, i, make_fixnum (hash_code));
+ }
/* Reset the index so that any slot we don't fill below is marked
invalid. */
@@ -4247,13 +4249,6 @@ hash_table_rehash (struct Lisp_Hash_Table *h)
set_hash_index_slot (h, start_of_bucket, i);
eassert (HASH_NEXT (h, i) != i); /* Stop loops. */
}
-
- /* Finally, mark the hash table as having a valid hash order.
- Do this last so that if we're interrupted, we retry on next
- access. */
- eassert (h->count < 0);
- h->count = -h->count;
- eassert (!hash_rehash_needed_p (h));
}
/* Lookup KEY in hash table H. If HASH is non-null, return in *HASH
@@ -4266,8 +4261,6 @@ hash_lookup (struct Lisp_Hash_Table *h, Lisp_Object key, EMACS_UINT *hash)
EMACS_UINT hash_code;
ptrdiff_t start_of_bucket, i;
- hash_rehash_if_needed (h);
-
hash_code = h->test.hashfn (&h->test, key);
eassert ((hash_code & ~INTMASK) == 0);
if (hash)
@@ -4296,8 +4289,6 @@ hash_put (struct Lisp_Hash_Table *h, Lisp_Object key, Lisp_Object value,
{
ptrdiff_t start_of_bucket, i;
- hash_rehash_if_needed (h);
-
eassert ((hash & ~INTMASK) == 0);
/* Increment count after resizing because resizing may fail. */
@@ -4331,8 +4322,6 @@ hash_remove_from_table (struct Lisp_Hash_Table *h, Lisp_Object key)
ptrdiff_t start_of_bucket = hash_code % ASIZE (h->index);
ptrdiff_t prev = -1;
- hash_rehash_if_needed (h);
-
for (ptrdiff_t i = HASH_INDEX (h, start_of_bucket);
0 <= i;
i = HASH_NEXT (h, i))
diff --git a/src/lisp.h b/src/lisp.h
index 1a1d8ee7e4..7b77dd97f5 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -2370,21 +2370,6 @@ HASH_TABLE_SIZE (const struct Lisp_Hash_Table *h)
return ASIZE (h->next);
}
-void hash_table_rehash (struct Lisp_Hash_Table *h);
-
-INLINE bool
-hash_rehash_needed_p (const struct Lisp_Hash_Table *h)
-{
- return h->count < 0;
-}
-
-INLINE void
-hash_rehash_if_needed (struct Lisp_Hash_Table *h)
-{
- if (hash_rehash_needed_p (h))
- hash_table_rehash (h);
-}
-
/* Default size for hash tables if not specified. */
enum DEFAULT_HASH_SIZE { DEFAULT_HASH_SIZE = 65 };
diff --git a/src/minibuf.c b/src/minibuf.c
index d932dbe8e2..a775d04332 100644
--- a/src/minibuf.c
+++ b/src/minibuf.c
@@ -1203,9 +1203,6 @@ DEFUN ("try-completion", Ftry_completion, Stry_completion, 2, 3, 0,
bucket = AREF (collection, idx);
}
- if (HASH_TABLE_P (collection))
- hash_rehash_if_needed (XHASH_TABLE (collection));
-
while (1)
{
/* Get the next element of the alist, obarray, or hash-table. */
diff --git a/src/pdumper.c b/src/pdumper.c
index c00f8a0af5..a3480b4b15 100644
--- a/src/pdumper.c
+++ b/src/pdumper.c
@@ -392,6 +392,8 @@ dump_fingerprint (const char *label, unsigned char const *xfingerprint)
The start of the cold region is always aligned on a page
boundary. */
dump_off cold_start;
+
+ dump_off hash_list;
};
/* Double-ended singly linked list. */
@@ -549,6 +551,8 @@ dump_fingerprint (const char *label, unsigned char const *xfingerprint)
heap objects. */
Lisp_Object bignum_data;
+ Lisp_Object hash_tables;
+
unsigned number_hot_relocations;
unsigned number_discardable_relocations;
};
@@ -2647,42 +2651,61 @@ dump_hash_table_stable_p (const struct Lisp_Hash_Table *hash)
/* Return a list of (KEY . VALUE) pairs in the given hash table. */
static Lisp_Object
-hash_table_contents (Lisp_Object table)
+hash_table_contents (struct Lisp_Hash_Table *h)
{
Lisp_Object contents = Qnil;
- struct Lisp_Hash_Table *h = XHASH_TABLE (table);
for (ptrdiff_t i = 0; i < HASH_TABLE_SIZE (h); ++i)
if (!NILP (HASH_HASH (h, i)))
- dump_push (&contents, Fcons (HASH_KEY (h, i), HASH_VALUE (h, i)));
+ {
+ dump_push (&contents, HASH_KEY (h, i));
+ dump_push (&contents, HASH_VALUE (h, i));
+ }
return Fnreverse (contents);
}
-/* Copy the given hash table, rehash it, and make sure that we can
- look up all the values in the original. */
+static dump_off
+dump_hash_table_list (struct dump_context *ctx)
+{
+ if (CONSP (ctx->hash_tables))
+ return dump_object (ctx, ctx->hash_tables);
+ else
+ return 0;
+}
+
static void
-check_hash_table_rehash (Lisp_Object table_orig)
-{
- hash_rehash_if_needed (XHASH_TABLE (table_orig));
- Lisp_Object table_rehashed = Fcopy_hash_table (table_orig);
- eassert (XHASH_TABLE (table_rehashed)->count >= 0);
- XHASH_TABLE (table_rehashed)->count *= -1;
- eassert (XHASH_TABLE (table_rehashed)->count <= 0);
- hash_rehash_if_needed (XHASH_TABLE (table_rehashed));
- eassert (XHASH_TABLE (table_rehashed)->count >= 0);
- Lisp_Object expected_contents = hash_table_contents (table_orig);
- while (!NILP (expected_contents))
+hash_table_freeze (struct Lisp_Hash_Table *h)
+{
+ if (h->count > 0)
+ h->count = -h->count;
+ h->key_and_value = hash_table_contents (h);
+ ptrdiff_t nkeys = XFIXNAT (Flength (h->key_and_value)) / 2;
+ if (nkeys == 0)
+ nkeys = 1;
+ h->index = h->next = h->hash = make_fixnum (nkeys);
+ h->count = nkeys;
+}
+
+/* Defined in fns.c. */
+extern void hash_table_rehash (struct Lisp_Hash_Table *h);
+
+static void
+hash_table_thaw (struct Lisp_Hash_Table *h)
+{
+ Lisp_Object count = h->index;
+ h->index = Fmake_vector (h->index, make_fixnum (-1));
+ h->hash = Fmake_vector (h->hash, Qnil);
+ h->next = Fmake_vector (h->next, make_fixnum (-1));
+ Lisp_Object key_and_value = h->key_and_value;
+ h->key_and_value = Fmake_vector (make_fixnum (2 * XFIXNAT (count)), Qnil);
+ h->next_free = -1;
+ ptrdiff_t i = 0;
+ while (CONSP (key_and_value))
{
- Lisp_Object key_value_pair = dump_pop (&expected_contents);
- Lisp_Object key = XCAR (key_value_pair);
- Lisp_Object expected_value = XCDR (key_value_pair);
- Lisp_Object arbitrary = Qdump_emacs_portable__sort_predicate_copied;
- Lisp_Object found_value = Fgethash (key, table_rehashed, arbitrary);
- eassert (EQ (expected_value, found_value));
- Fremhash (key, table_rehashed);
+ ASET (h->key_and_value, i++, XCAR (key_and_value));
+ key_and_value = XCDR (key_and_value);
}
- eassert (EQ (Fhash_table_count (table_rehashed),
- make_fixnum (0)));
+ hash_table_rehash (h);
}
static dump_off
@@ -2707,8 +2730,6 @@ dump_hash_table (struct dump_context *ctx,
{
eassert (offset == DUMP_OBJECT_ON_NORMAL_QUEUE
|| offset == DUMP_OBJECT_NOT_SEEN);
- /* We still want to dump the actual keys and values now. */
- dump_enqueue_object (ctx, hash_in->key_and_value, WEIGHT_NONE);
/* We'll get to the rest later. */
offset = DUMP_OBJECT_ON_HASH_TABLE_QUEUE;
dump_remember_object (ctx, object, offset);
@@ -2717,22 +2738,11 @@ dump_hash_table (struct dump_context *ctx,
return offset;
}
- if (PDUMPER_CHECK_REHASHING)
- check_hash_table_rehash (make_lisp_ptr ((void *) hash_in, Lisp_Vectorlike));
-
struct Lisp_Hash_Table hash_munged = *hash_in;
struct Lisp_Hash_Table *hash = &hash_munged;
- /* Remember to rehash this hash table on first access. After a
- dump reload, the hash table values will have changed, so we'll
- need to rebuild the index.
-
- TODO: for EQ and EQL hash tables, it should be possible to rehash
- here using the preferred load address of the dump, eliminating
- the need to rehash-on-access if we can load the dump where we
- want. */
- if (hash->count > 0 && !is_stable)
- hash->count = -hash->count;
+ hash_table_freeze (hash);
+ dump_push (&ctx->hash_tables, object);
START_DUMP_PVEC (ctx, &hash->header, struct Lisp_Hash_Table, out);
dump_pseudovector_lisp_fields (ctx, &out->header, &hash->header);
@@ -4141,6 +4151,20 @@ DEFUN ("dump-emacs-portable",
|| !NILP (ctx->deferred_hash_tables)
|| !NILP (ctx->deferred_symbols));
+ ctx->header.hash_list = ctx->offset;
+ dump_push (&ctx->hash_tables, Qnil);
+ dump_hash_table_list (ctx);
+
+ do
+ {
+ dump_drain_deferred_hash_tables (ctx);
+ dump_drain_deferred_symbols (ctx);
+ dump_drain_normal_queue (ctx);
+ }
+ while (!dump_queue_empty_p (&ctx->dump_queue)
+ || !NILP (ctx->deferred_hash_tables)
+ || !NILP (ctx->deferred_symbols));
+
dump_sort_copied_objects (ctx);
/* While we copy built-in symbols into the Emacs image, these
@@ -5429,6 +5453,13 @@ pdumper_load (const char *dump_filename)
for (int i = 0; i < ARRAYELTS (sections); ++i)
dump_mmap_reset (§ions[i]);
+ if (header->hash_list)
+ {
+ Lisp_Object hash_tables =
+ ((Lisp_Object *)(dump_base + header->hash_list))[1];
+ Vpdumper_hash_tables = hash_tables;
+ }
+
/* Run the functions Emacs registered for doing post-dump-load
initialization. */
for (int i = 0; i < nr_dump_hooks; ++i)
@@ -5498,12 +5529,33 @@ DEFUN ("pdumper-stats", Fpdumper_stats, Spdumper_stats, 0, 0, 0,
#endif /* HAVE_PDUMPER */
+
\f
+static void thaw_hash_tables (void)
+{
+ Lisp_Object hash_tables = Vpdumper_hash_tables;
+ while (CONSP (hash_tables))
+ {
+ hash_table_thaw (XHASH_TABLE (XCAR (hash_tables)));
+ hash_tables = XCDR (hash_tables);
+ }
+}
+
+void
+init_pdumper_once (void)
+{
+ Vpdumper_hash_tables = Qnil;
+ pdumper_do_now_and_after_load (thaw_hash_tables);
+}
+
void
syms_of_pdumper (void)
{
#ifdef HAVE_PDUMPER
+ DEFVAR_LISP ("pdumper-hash-tables", Vpdumper_hash_tables,
+ doc: /* A list of hash tables that need to be thawed after loading the pdump. */);
+ Vpdumper_hash_tables = Qnil;
defsubr (&Sdump_emacs_portable);
defsubr (&Sdump_emacs_portable__sort_predicate);
defsubr (&Sdump_emacs_portable__sort_predicate_copied);
diff --git a/src/pdumper.h b/src/pdumper.h
index ab2f426c1e..cfea06d33d 100644
--- a/src/pdumper.h
+++ b/src/pdumper.h
@@ -248,6 +248,7 @@ pdumper_clear_marks (void)
file was loaded. */
extern void pdumper_record_wd (const char *);
+void init_pdumper_once (void);
void syms_of_pdumper (void);
INLINE_HEADER_END
--
2.20.1
next prev parent reply other threads:[~2019-07-05 21:52 UTC|newest]
Thread overview: 53+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-06-30 18:23 bug#36447: 27.0.50; New "Unknown keyword" errors Michael Heerdegen
2019-06-30 18:43 ` Eli Zaretskii
2019-06-30 21:44 ` Michael Heerdegen
2019-07-01 12:25 ` Noam Postavsky
2019-07-01 13:20 ` Pip Cet
2019-07-01 22:04 ` Michael Heerdegen
2019-07-02 1:59 ` Stefan Kangas
2019-07-02 14:17 ` Eli Zaretskii
2019-07-02 13:29 ` Pip Cet
2019-07-02 15:35 ` Michael Heerdegen
2019-07-02 16:20 ` Noam Postavsky
2019-07-02 22:50 ` Pip Cet
2019-07-03 11:57 ` Pip Cet
2019-07-05 1:59 ` Michael Heerdegen
2019-07-05 6:35 ` Pip Cet
2019-07-05 7:50 ` Eli Zaretskii
2019-07-05 8:12 ` Pip Cet
2019-07-05 8:25 ` Eli Zaretskii
2019-07-05 8:36 ` Pip Cet
2019-07-05 8:41 ` Eli Zaretskii
2019-07-05 9:09 ` Pip Cet
2019-07-05 12:23 ` Robert Pluim
2019-07-05 12:33 ` Eli Zaretskii
2019-07-05 13:41 ` Pip Cet
2019-07-05 18:00 ` Stefan Monnier
2019-07-05 18:07 ` Eli Zaretskii
2019-07-05 20:16 ` Stefan Monnier
2019-07-05 18:57 ` Pip Cet
2019-07-05 19:13 ` Eli Zaretskii
2019-07-05 20:21 ` Stefan Monnier
2019-07-05 21:52 ` Pip Cet [this message]
2019-07-05 22:10 ` Stefan Monnier
2019-07-06 6:45 ` Eli Zaretskii
2019-07-06 15:08 ` Pip Cet
2019-07-09 21:05 ` Stefan Monnier
2019-07-10 2:38 ` Eli Zaretskii
2019-07-10 3:19 ` Daniel Colascione
2019-07-10 15:01 ` Pip Cet
2019-07-10 17:16 ` Daniel Colascione
2019-07-10 20:14 ` Pip Cet
2019-07-06 15:32 ` Michael Heerdegen
2019-07-08 17:30 ` Lars Ingebrigtsen
2019-07-08 17:58 ` Pip Cet
2019-07-08 22:18 ` Lars Ingebrigtsen
2019-07-08 22:25 ` Noam Postavsky
2019-07-09 14:00 ` Pip Cet
2019-07-10 3:01 ` Daniel Colascione
2019-07-14 14:06 ` Noam Postavsky
2019-07-08 23:22 ` Stefan Monnier
2019-07-08 22:23 ` Michael Heerdegen
2019-07-09 15:43 ` Eli Zaretskii
2019-07-09 20:15 ` Stefan Monnier
2019-07-05 7:55 ` Katsumi Yamaoka
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://www.gnu.org/software/emacs/
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=CAOqdjBda8_y1G4hf7y8-L3ec3yTfqYqMcbWjeMzATHfyr8Mz+A@mail.gmail.com \
--to=pipcet@gmail.com \
--cc=36447@debbugs.gnu.org \
--cc=michael_heerdegen@web.de \
--cc=monnier@iro.umontreal.ca \
--cc=npostavs@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://git.savannah.gnu.org/cgit/emacs.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).