unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
From: Helmut Eller <eller.helmut@gmail.com>
To: Stefan Monnier <monnier@iro.umontreal.ca>
Cc: Stefan Kangas <stefankangas@gmail.com>,
	 emacs-devel@gnu.org,  Pip Cet <pipcet@protonmail.com>
Subject: Re: Merging scratch/no-purespace to remove unexec and purespace
Date: Sat, 21 Dec 2024 18:41:12 +0100	[thread overview]
Message-ID: <87o71553yf.fsf@gmail.com> (raw)
In-Reply-To: <87seql7a3o.fsf@gmail.com> (Helmut Eller's message of "Wed, 18 Dec 2024 07:56:27 +0100")

[-- Attachment #1: Type: text/plain, Size: 889 bytes --]

On Wed, Dec 18 2024, Helmut Eller wrote:

[...]
> So the pdumper copies objects from purespace to the dump like normal
> objects; when loading the dump, purespace stays empty.
>
> I had (wrongly) assumed that the pdumper creates a separate section for
> pure objects.  Creating such a section sounds easy enough (hmm, maybe
> not so easy because of hashtables).  Still not sure if it would be worth
> the effort.

Out of curiosity, I implemented such a section with the attached patch.
It seems that it would save ~2ms per collection cycle.

To measure this, I compared the output of 

 ./src/emacs -Q --batch --eval \
 '(let* ((stats (benchmark-run 1000 (garbage-collect)))) 
     (message "%s" (/ (elt stats 2) (elt stats 1))))'

between the versions with and without the patch.  The results was:

   without-pure-section: 0.006251480181
      with-pure-section: 0.003986384857

Helmut


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-WIP-create-pure-space-in-dump.patch --]
[-- Type: text/x-diff, Size: 10755 bytes --]

From ad64bcdfa2d91e59e64adbcf896ddf66bba725cb Mon Sep 17 00:00:00 2001
From: Helmut Eller <eller.helmut@gmail.com>
Date: Sat, 21 Dec 2024 16:06:45 +0100
Subject: [PATCH] WIP create pure space in dump

---
 src/alloc.c    |   9 +++++
 src/frame.c    |   7 +++-
 src/pdumper.c  | 108 +++++++++++++++++++++++++++++++++++++++++++------
 src/puresize.h |   8 +++-
 4 files changed, 118 insertions(+), 14 deletions(-)

diff --git a/src/alloc.c b/src/alloc.c
index 4fab0d54248..6f57563d80c 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -386,7 +386,12 @@ static char *spare_memory[7];
    remapping on more recent systems because this is less important
    nowadays than in the days of small memories and timesharing.  */
 
+#if PDUMPER_PURE == 1
+EMACS_INT *pure;
+#else
 EMACS_INT pure[(PURESIZE + sizeof (EMACS_INT) - 1) / sizeof (EMACS_INT)] = {1,};
+#endif
+
 #define PUREBEG (char *) pure
 
 /* Pointer to the pure area, and its size.  */
@@ -8111,6 +8116,10 @@ init_alloc_once (void)
 static void
 init_alloc_once_for_pdumper (void)
 {
+#ifdef PDUMPER_PURE
+  pure = xzalloc ((PURESIZE + sizeof (EMACS_INT) - 1));
+  pure[0] = 2;
+#endif
   purebeg = PUREBEG;
   pure_size = PURESIZE;
   mem_init ();
diff --git a/src/frame.c b/src/frame.c
index f22bd501a8d..0bef8ba8661 100644
--- a/src/frame.c
+++ b/src/frame.c
@@ -53,6 +53,7 @@ along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
 #include "widget.h"
 #endif
 #include "pdumper.h"
+#include "puresize.h"
 
 /* The currently selected frame.  */
 Lisp_Object selected_frame;
@@ -1201,7 +1202,11 @@ make_initial_frame (void)
   Vframe_list = Fcons (frame, Vframe_list);
 
   tty_frame_count = 1;
-  fset_name (f, build_pure_c_string ("F1"));
+
+  if (PDUMPER_PURE)
+    fset_name (f, build_string ("F1"));
+  else
+    fset_name (f, build_pure_c_string ("F1"));
 
   SET_FRAME_VISIBLE (f, 1);
 
diff --git a/src/pdumper.c b/src/pdumper.c
index c8baa311854..47fa979da8c 100644
--- a/src/pdumper.c
+++ b/src/pdumper.c
@@ -45,6 +45,7 @@ along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
 #include "thread.h"
 #include "bignum.h"
 #include "treesit.h"
+#include "puresize.h"
 
 #ifdef CHECK_STRUCTS
 # include "dmpstruct.h"
@@ -399,6 +400,9 @@ struct dump_header
 
   /* Offset of a vector of the dumped hash tables.  */
   dump_off hash_list;
+
+  dump_off pure_start;
+  dump_off pure_end;
 };
 
 /* Double-ended singly linked list.  */
@@ -480,6 +484,7 @@ struct dump_flags
   bool_bf defer_cold_objects : 1;
   /* Punt on copied objects: defer them to ctx->copied_queue.  */
   bool_bf defer_copied_objects : 1;
+  bool_bf defer_pure_objects : 1;
 };
 
 /* Information we use while we dump.  Note that we're not the garbage
@@ -546,6 +551,7 @@ struct dump_context
   Lisp_Object copied_queue;
   /* Queue of cold objects to dump.  */
   Lisp_Object cold_queue;
+  Lisp_Object pure_queue;
 
   /* Relocations in the dump.  */
   Lisp_Object dump_relocs[RELOC_NUM_PHASES];
@@ -576,7 +582,8 @@ struct dump_context
    are physical dump offsets.  */
 enum dump_object_special_offset
   {
-   DUMP_OBJECT_IS_RUNTIME_MAGIC = -6,
+   DUMP_OBJECT_IS_RUNTIME_MAGIC = -7,
+   DUMP_OBJECT_ON_PURE_QUEUE = -6,
    DUMP_OBJECT_ON_COPIED_QUEUE = -5,
    DUMP_OBJECT_ON_HASH_TABLE_QUEUE = -4,
    DUMP_OBJECT_ON_SYMBOL_QUEUE = -3,
@@ -2620,15 +2627,18 @@ dump_vectorlike_generic (struct dump_context *ctx,
       skip = 0;
     }
 
-  /* We may have written a non-Lisp vector prefix above.  If we have,
-     pad to the lisp content start with zero, and make sure we didn't
-     scribble beyond that start.  */
-  dump_off prefix_size = ctx->offset - prefix_start_offset;
-  eassert (prefix_size > 0);
-  dump_off skip_start = ptrdiff_t_to_dump_off ((char *) &v->contents[skip]
-					       - (char *) v);
-  eassert (skip_start >= prefix_size);
-  dump_write_zero (ctx, skip_start - prefix_size);
+  if (ctx->flags.dump_object_contents)
+    {
+      /* We may have written a non-Lisp vector prefix above.  If we have,
+	 pad to the lisp content start with zero, and make sure we didn't
+	 scribble beyond that start.  */
+      dump_off prefix_size = ctx->offset - prefix_start_offset;
+      eassert (prefix_size > 0);
+      dump_off skip_start = ptrdiff_t_to_dump_off ((char *) &v->contents[skip]
+						   - (char *) v);
+      eassert (skip_start >= prefix_size);
+      dump_write_zero (ctx, skip_start - prefix_size);
+    }
 
   /* dump_object_start isn't what records conservative-GC object
      starts --- dump_object_1 does --- so the hack below of using
@@ -3148,6 +3158,24 @@ dump_vectorlike (struct dump_context *ctx,
   error_unsupported_dump_object (ctx, lv, msg);
 }
 
+static bool
+is_pure_object (Lisp_Object x)
+{
+  switch (XTYPE (x))
+    {
+    case Lisp_Symbol: return PURE_P (XSYMBOL (x));
+    case Lisp_String: return PURE_P (XSTRING (x));
+    case Lisp_Cons: return PURE_P (XCONS (x));
+    case Lisp_Vectorlike: return PURE_P (XVECTOR (x));
+      //case Lisp_Float: return PURE_P (XFLOAT (x));
+    case Lisp_Float: return false;
+    case Lisp_Int0: return false;
+    case Lisp_Int1: return false;
+    case Lisp_Type_Unused0: emacs_abort ();
+    }
+  emacs_abort ();
+}
+
 /* Add an object to the dump.
 
    CTX is the dump context; OBJECT is the object to add.  Normally,
@@ -3218,6 +3246,25 @@ dump_object (struct dump_context *ctx, Lisp_Object object)
       return offset;
     }
 
+  if (is_pure_object (object) && ctx->flags.defer_pure_objects)
+    {
+      if (offset != DUMP_OBJECT_ON_PURE_QUEUE)
+        {
+	  eassert (offset == DUMP_OBJECT_ON_NORMAL_QUEUE
+		   || offset == DUMP_OBJECT_NOT_SEEN);
+          dump_push (&ctx->pure_queue, object);
+	  offset = DUMP_OBJECT_ON_PURE_QUEUE;
+          dump_remember_object (ctx, object, offset);
+
+	  struct dump_flags old_flags = ctx->flags;
+	  ctx->flags.dump_object_contents = false;
+          ctx->flags.defer_pure_objects = false;
+          dump_object (ctx, object);
+          ctx->flags = old_flags;
+	}
+      return offset;
+    }
+
   /* Object needs to be dumped.  */
   if (dump_set_referrer (ctx))
     ctx->current_referrer = object;
@@ -3606,6 +3653,25 @@ dump_drain_cold_data (struct dump_context *ctx)
   ctx->flags = old_flags;
 }
 
+static void
+dump_drain_pure_data (struct dump_context *ctx)
+{
+  Lisp_Object pure_queue = Fnreverse (ctx->pure_queue);
+  ctx->pure_queue = Qnil;
+
+  struct dump_flags old_flags = ctx->flags;
+
+  /* Actually dump pure objects instead of deferring them.  */
+  ctx->flags.defer_pure_objects = false;
+
+  while (!NILP (pure_queue))
+    {
+      Lisp_Object item = dump_pop (&pure_queue);
+      dump_object (ctx, item);
+    }
+  ctx->flags = old_flags;
+}
+
 static void
 read_ptr_raw_and_lv (const void *mem,
                      enum Lisp_Type type,
@@ -4195,6 +4261,7 @@ types.  */)
   ctx->symbol_aux = Qnil;
   ctx->copied_queue = Qnil;
   ctx->cold_queue = Qnil;
+  ctx->pure_queue = Qnil;
   for (int i = 0; i < RELOC_NUM_PHASES; ++i)
     ctx->dump_relocs[i] = Qnil;
   ctx->object_starts = Qnil;
@@ -4216,6 +4283,7 @@ types.  */)
   /* These objects go into special sections.  */
   ctx->flags.defer_cold_objects = true;
   ctx->flags.defer_copied_objects = true;
+  ctx->flags.defer_pure_objects = true;
 
   ctx->current_referrer = Qnil;
   if (!NILP (track_referrers))
@@ -4297,6 +4365,11 @@ types.  */)
   ctx->header.hash_list = ctx->offset;
   dump_hash_table_list (ctx);
 
+  ctx->header.pure_start = ctx->offset;
+  dump_drain_pure_data (ctx);
+  ctx->header.pure_end = ctx->offset;
+  dump_write_zero (ctx, PURESIZE - (ctx->offset - ctx->header.pure_start));
+
   /* dump_hash_table_list just adds a new vector to the dump but all
      its content should already have been in the dump, so it doesn't
      add anything to any queue.  */
@@ -4378,6 +4451,7 @@ types.  */)
   eassert (dump_queue_empty_p (&ctx->dump_queue));
   eassert (NILP (ctx->copied_queue));
   eassert (NILP (ctx->cold_queue));
+  eassert (NILP (ctx->pure_queue));
   eassert (NILP (ctx->deferred_symbols));
   eassert (NILP (ctx->deferred_hash_tables));
   eassert (NILP (ctx->fixups));
@@ -4401,13 +4475,16 @@ types.  */)
     header_bytes = header_end - header_start,
     hot_bytes = hot_end - hot_start,
     discardable_bytes = discardable_end - ctx->header.discardable_start,
-    cold_bytes = cold_end - ctx->header.cold_start;
+    cold_bytes = cold_end - ctx->header.cold_start,
+    pure_bytes = ctx->header.pure_end - ctx->header.pure_start;
   fprintf (stderr,
 	   ("Dump complete\n"
 	    "Byte counts: header=%"PRIdDUMP_OFF" hot=%"PRIdDUMP_OFF
 	    " discardable=%"PRIdDUMP_OFF" cold=%"PRIdDUMP_OFF"\n"
+	    " pure=%"PRIdDUMP_OFF"\n"
 	    "Reloc counts: hot=%"PRIdDUMP_OFF" discardable=%"PRIdDUMP_OFF"\n"),
 	   header_bytes, hot_bytes, discardable_bytes, cold_bytes,
+	   pure_bytes,
            number_hot_relocations,
            number_discardable_relocations);
 
@@ -5270,7 +5347,8 @@ pdumper_find_object_type_impl (const void *obj)
     return PDUMPER_NO_OBJECT;
   ptrdiff_t bitno = offset / DUMP_ALIGNMENT;
   if (offset < dump_private.header.discardable_start
-      && !dump_bitset_bit_set_p (&dump_private.last_mark_bits, bitno))
+      && !dump_bitset_bit_set_p (&dump_private.last_mark_bits, bitno)
+      && !PURE_P (obj))
     return PDUMPER_NO_OBJECT;
   const struct dump_reloc *reloc =
     dump_find_relocation (&dump_private.header.object_starts, offset);
@@ -5814,6 +5892,12 @@ pdumper_load (const char *dump_filename, char *argv0)
   dump_do_all_dump_reloc_for_phase (header, dump_base, LATE_RELOCS);
   dump_do_all_dump_reloc_for_phase (header, dump_base, VERY_LATE_RELOCS);
 
+  if (PDUMPER_PURE)
+    {
+      xfree (pure);
+      pure = (EMACS_INT *)(dump_base + header->pure_start);
+    }
+
   /* Run the functions Emacs registered for doing post-dump-load
      initialization.  */
   for (int i = 0; i < nr_dump_late_hooks; ++i)
diff --git a/src/puresize.h b/src/puresize.h
index d7d8f0b4eec..1c906536eec 100644
--- a/src/puresize.h
+++ b/src/puresize.h
@@ -23,6 +23,8 @@ along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
 
 INLINE_HEADER_BEGIN
 
+#define PDUMPER_PURE 1
+
 /* Define PURESIZE, the number of bytes of pure Lisp code to leave space for.
 
    At one point, this was defined in config.h, meaning that changing
@@ -79,13 +81,17 @@ INLINE_HEADER_BEGIN
 
 extern AVOID pure_write_error (Lisp_Object);
 
+#if PDUMPER_PURE == 1
+extern EMACS_INT *pure;
+#else
 extern EMACS_INT pure[];
+#endif
 
 /* The puresize_h_* macros are private to this include file.  */
 
 /* True if PTR is pure.  */
 
-#define puresize_h_PURE_P(ptr) \
+#define puresize_h_PURE_P(ptr)				\
   ((uintptr_t) (ptr) - (uintptr_t) pure <= PURESIZE)
 
 INLINE bool
-- 
2.39.5


  reply	other threads:[~2024-12-21 17:41 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-12-17 10:47 Merging scratch/no-purespace to remove unexec and purespace Stefan Kangas
2024-12-17 13:12 ` Gerd Möllmann
2024-12-17 14:20   ` Gerd Möllmann
2024-12-17 14:30     ` Gerd Möllmann
2024-12-17 17:56       ` Gerd Möllmann
2024-12-17 18:50         ` Eli Zaretskii
2024-12-17 18:56           ` Gerd Möllmann
2024-12-18 12:55             ` Andrea Corallo
2024-12-18 14:03               ` Gerd Möllmann
2024-12-18 16:05                 ` Pip Cet via Emacs development discussions.
2024-12-18 16:30                   ` Gerd Möllmann
2024-12-18 16:25                 ` Pip Cet via Emacs development discussions.
2024-12-18 22:27                   ` Andrea Corallo
2024-12-19  9:28                     ` Pip Cet via Emacs development discussions.
2024-12-19 10:38                       ` Andrea Corallo
2024-12-19 10:50                       ` Stefan Kangas
2024-12-19 12:08                         ` Pip Cet via Emacs development discussions.
2024-12-19 17:55                           ` Stefan Kangas
2024-12-19 20:13                             ` Pip Cet via Emacs development discussions.
2024-12-20 15:59                               ` Stefan Monnier
2024-12-20 16:22                                 ` Pip Cet via Emacs development discussions.
2024-12-20 17:25                                   ` Gerd Möllmann
2024-12-20 20:35                                     ` Andrea Corallo
2024-12-20 20:39                                       ` Pip Cet via Emacs development discussions.
2024-12-21  6:33                                         ` Gerd Möllmann
2024-12-21  6:56                                         ` Andrea Corallo
2024-12-20 20:38                                     ` Pip Cet via Emacs development discussions.
2024-12-20 20:57                                       ` Gerd Möllmann
2024-12-20  8:42                             ` Pip Cet via Emacs development discussions.
2024-12-18  0:18         ` Stefan Kangas
2024-12-17 19:30 ` Helmut Eller
2024-12-17 20:47   ` Stefan Monnier
2024-12-18  2:15     ` Stefan Kangas
2024-12-18  7:11       ` Helmut Eller
2024-12-18 13:35         ` Pip Cet via Emacs development discussions.
2024-12-18  6:56     ` Helmut Eller
2024-12-21 17:41       ` Helmut Eller [this message]
2024-12-21 18:32         ` Gerd Möllmann
2024-12-21 22:19         ` Pip Cet via Emacs development discussions.
2024-12-22  1:28         ` Stefan Kangas
2024-12-22 11:12           ` Pip Cet via Emacs development discussions.
2024-12-22 13:07             ` Eli Zaretskii
2024-12-22 14:12               ` Pip Cet via Emacs development discussions.
2024-12-22 15:51             ` Stefan Monnier
2024-12-22 17:09               ` Gerd Möllmann
2024-12-22 17:10               ` Pip Cet via Emacs development discussions.
2024-12-22 13:13           ` Pip Cet via Emacs development discussions.
2024-12-22 14:16           ` Helmut Eller
2024-12-18  9:30   ` Pip Cet via Emacs development discussions.
2024-12-18  0:50 ` Po Lu
2024-12-18  2:12   ` Stefan Kangas
2024-12-18 21:26   ` Stefan Monnier

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87o71553yf.fsf@gmail.com \
    --to=eller.helmut@gmail.com \
    --cc=emacs-devel@gnu.org \
    --cc=monnier@iro.umontreal.ca \
    --cc=pipcet@protonmail.com \
    --cc=stefankangas@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).