unofficial mirror of bug-gnu-emacs@gnu.org 
 help / color / mirror / code / Atom feed
From: Daniel Colascione <dancol@dancol.org>
To: Stefan Monnier <monnier@iro.umontreal.ca>
Cc: Dmitry Antipov <dmantipov@yandex.ru>, 17168@debbugs.gnu.org
Subject: bug#17168: 24.3.50; Segfault at mark_object
Date: Sat, 05 Apr 2014 15:37:37 -0700	[thread overview]
Message-ID: <534085B1.9070307@dancol.org> (raw)
In-Reply-To: <533DB4F0.20706@dancol.org>

[-- Attachment #1: Type: text/plain, Size: 16519 bytes --]

On 04/03/2014 12:22 PM, Daniel Colascione wrote:
> On 04/03/2014 12:21 PM, Stefan Monnier wrote:
>>> Sure; I don't think it's too late to take pure storage out of 24.4
>>
>> It is definitely too late for that.
> 
> Okay. Let's try your proposed solution then. I'll see whether I can code
> something up today.

The patch came out more complicated than I'd hoped. Basically, we define
a new variable Vpure_reachable, accessible only from C. Early in
startup, we make it a plain list and cons reachable but non-pure objects
from Fpurecopy onto it. Once we have hash tables available, we turn it
into a hash table. At the end of loadup, instead of just setting
purify-flag to nil, we call a new subr finalize-pure-storage.

finalize-pure-storage sets purify-flag to nil by side effect and, as new
behavior, makes purify-flag constant so that it can never again become
non-nil. Before returning, finalize-pure-storage also turns
Vpure_reachable into a vector *in pure storage* of objects we need to
keep around. Fgarbage_collect knows how to mark objects in
Vpure_reachable and understands that if Vpure_reachable is a vector, its
contents should be marked, not the vector itself.

This scheme works and passes Dmitry's test, but the resulting
Vpure_reachable vector has over 8,000 items. Most of these items are
ordinary interned symbols. As an optimization, when we build the final
vector form of Fpure_reachable, we see whether each item is a symbol
interned in the initial obarray. If it is, then instead of adding it to
the vector, we mark the symbol as un-uninternable, and add code to
Funintern to look for this new flag. After this optimization,
Vpure-reachable only has 251 elements.

Please review.

=== modified file 'lisp/loadup.el'
--- lisp/loadup.el	2014-02-10 01:34:22 +0000
+++ lisp/loadup.el	2014-04-05 22:24:34 +0000
@@ -56,7 +56,7 @@
 	  t))
     (let ((dir (car load-path)))
       ;; We'll probably overflow the pure space.
-      (setq purify-flag nil)
+      (finalize-pure-storage)
       (setq load-path (list (expand-file-name "." dir)
 			    (expand-file-name "emacs-lisp" dir)
 			    (expand-file-name "language" dir)
@@ -389,12 +389,11 @@
     (message "Pure-hashed: %d strings, %d vectors, %d conses, %d
bytecodes, %d others"
              strings vectors conses bytecodes others)))

-;; Avoid error if user loads some more libraries now and make sure the
-;; hash-consing hash table is GC'd.
-(setq purify-flag nil)
-
-(if (null (garbage-collect))
-    (setq pure-space-overflow t))
+;; Runs garbage-collect and sets purify-flag to nil by side effect.
+(when (and purify-flag
+           (progn (finalize-pure-storage)
+                  (not (garbage-collect))))
+  (setq pure-space-overflow t))

 (if (or (member (nth 3 command-line-args) '("dump" "bootstrap"))
 	(member (nth 4 command-line-args) '("dump" "bootstrap")))

=== modified file 'src/alloc.c'
--- src/alloc.c	2014-04-03 09:50:58 +0000
+++ src/alloc.c	2014-04-05 22:30:18 +0000
@@ -173,6 +173,14 @@
 static char *purebeg;
 static ptrdiff_t pure_size;

+/* Data structure holding non-pure objects reachable from objects in
+   pure storage.  Initially a list, since we need this data structure
+   before we've initialized enough of Emacs to make hash tables.  We
+   transform it into a hash table when hash tables become available.
+   In `finalize-pure-storage', we turn Vpure_reachable into a vector in
+   pure storage.  */
+static Lisp_Object Vpure_reachable;
+
 /* Number of bytes of pure storage used before pure storage overflowed.
    If this is non-zero, this implies that an overflow occurred.  */

@@ -196,6 +204,8 @@

 const char *pending_malloc_warning;

+static Lisp_Object purecopy_1 (Lisp_Object obj, bool top_level);
+
 #if 0 /* Normally, pointer sanity only on request... */
 #ifdef ENABLE_CHECKING
 #define SUSPICIOUS_OBJECT_CHECKING 1
@@ -5228,8 +5238,8 @@
   Lisp_Object new;
   struct Lisp_Cons *p = pure_alloc (sizeof *p, Lisp_Cons);
   XSETCONS (new, p);
-  XSETCAR (new, Fpurecopy (car));
-  XSETCDR (new, Fpurecopy (cdr));
+  XSETCAR (new, purecopy_1 (car, false));
+  XSETCDR (new, purecopy_1 (cdr, false));
   return new;
 }

@@ -5261,12 +5271,8 @@
   return new;
 }

-
-DEFUN ("purecopy", Fpurecopy, Spurecopy, 1, 1, 0,
-       doc: /* Make a copy of object OBJ in pure storage.
-Recursively copies contents of vectors and cons cells.
-Does not copy symbols.  Copies strings without text properties.  */)
-  (register Lisp_Object obj)
+static Lisp_Object
+purecopy_1 (Lisp_Object obj, bool top_level)
 {
   if (NILP (Vpurify_flag))
     return obj;
@@ -5300,7 +5306,7 @@
 	size &= PSEUDOVECTOR_SIZE_MASK;
       vec = XVECTOR (make_pure_vector (size));
       for (i = 0; i < size; i++)
-	vec->contents[i] = Fpurecopy (AREF (obj, i));
+	vec->contents[i] = purecopy_1 (AREF (obj, i), false);
       if (COMPILEDP (obj))
 	{
 	  XSETPVECTYPE (vec, PVEC_COMPILED);
@@ -5311,9 +5317,20 @@
     }
   else if (MARKERP (obj))
     error ("Attempt to copy a marker to pure storage");
-  else
+  else if (top_level)
     /* Not purified, don't hash-cons.  */
     return obj;
+  else if (!INTEGERP (obj) && !EQ (obj, Qt) && !EQ (obj, Qnil))
+    {
+      /* Object is reachable from a pure object, so we need remember
+         it as a GC root: we don't mark pure objects themselves.  */
+      if (NILP (Vpure_reachable) || CONSP (Vpure_reachable))
+        Vpure_reachable = Fcons (obj, Vpure_reachable);
+      else
+        Fputhash (obj, Qnil, Vpure_reachable);
+
+      return obj;
+    }

   if (HASH_TABLE_P (Vpurify_flag)) /* Hash consing.  */
     Fputhash (obj, obj, Vpurify_flag);
@@ -5322,6 +5339,73 @@
 }


+DEFUN ("purecopy", Fpurecopy, Spurecopy, 1, 1, 0,
+       doc: /* Make a copy of object OBJ in pure storage.
+Recursively copies contents of vectors and cons cells.
+Does not copy symbols.  Copies strings without text properties.  */)
+  (register Lisp_Object obj)
+{
+  return purecopy_1 (obj, true);
+}
+
+DEFUN ("finalize-pure-storage", Ffinalize_pure_storage,
+       Sfinalize_pure_storage, 0, 0, 0,
+       doc: /* Finishes building pure storage.
+May be called only once, with purify-flag non-nil.  */)
+     (void)
+{
+  struct Lisp_Hash_Table *h;
+  ptrdiff_t nr_reachable;
+  Lisp_Object new_pure_reachable;
+  Lisp_Object reachable_object;
+  ptrdiff_t i;
+  Lisp_Object reachable_objects;
+
+  if (NILP (Vpurify_flag))
+    error ("Purification not started");
+
+  eassert (HASH_TABLE_P (Vpure_reachable));
+  h = XHASH_TABLE (Vpure_reachable);
+
+  reachable_objects = Qnil;
+  nr_reachable = 0;
+
+  for (i = 0; i < HASH_TABLE_SIZE (h); ++i)
+    if (!NILP (HASH_HASH (h, i)))
+      {
+        reachable_object = HASH_KEY (h, i);
+        if (SYMBOLP (reachable_object))
+          {
+            if (SYMBOL_INTERNED_IN_INITIAL_OBARRAY_P (reachable_object))
+              XSYMBOL (reachable_object)->interned =
+                SYMBOL_INTERNED_IN_INITIAL_OBARRAY_CANNOT_UNINTERN;
+
+            if (XSYMBOL (reachable_object)->interned
+                == SYMBOL_INTERNED_IN_INITIAL_OBARRAY_CANNOT_UNINTERN)
+              {
+                /* No need to remember this object, since it's already
+                   on the main obarray and won't be uninterned.  */
+                continue;
+              }
+          }
+
+        nr_reachable += 1;
+        reachable_objects = Fcons (reachable_object, reachable_objects);
+      }
+
+  new_pure_reachable = make_pure_vector (nr_reachable);
+  for (i = 0; CONSP (reachable_objects); ++i)
+    {
+      XVECTOR (new_pure_reachable)->contents[i] = XCAR (reachable_objects);
+      reachable_objects = XCDR (reachable_objects);
+    }
+
+  XSYMBOL (intern_c_string ("purify-flag"))->constant = 1;
+  Vpurify_flag = Qnil;
+  Vpure_reachable = new_pure_reachable;
+  return Qnil;
+}
+
 \f
 /***********************************************************************
 			  Protection from GC
@@ -5578,6 +5662,19 @@
   for (i = 0; i < staticidx; i++)
     mark_object (*staticvec[i]);

+  if (VECTORP (Vpure_reachable))
+    {
+      /* Vpure_reachable is a pure-allocated vector of objects
+         reachable from pure storage.  We can't mark it, but we can
+         mark its contents.  */
+      struct Lisp_Vector* pv = XVECTOR (Vpure_reachable);
+      eassert (PURE_POINTER_P (pv));
+      for (i = 0; i < pv->header.size; ++i)
+        mark_object (pv->contents[i]);
+    }
+  else
+    mark_object (Vpure_reachable);
+
   mark_specpdl ();
   mark_terminals ();
   mark_kboards ();
@@ -6581,12 +6678,7 @@

       for (; sym < end; ++sym)
         {
-          /* Check if the symbol was created during loadup.  In such a case
-             it might be pointed to by pure bytecode which we don't trace,
-             so we conservatively assume that it is live.  */
-          bool pure_p = PURE_POINTER_P (XSTRING (sym->s.name));
-
-          if (!sym->s.gcmarkbit && !pure_p)
+          if (!sym->s.gcmarkbit)
             {
               if (sym->s.redirect == SYMBOL_LOCALIZED)
                 xfree (SYMBOL_BLV (&sym->s));
@@ -6600,8 +6692,6 @@
           else
             {
               ++num_used;
-              if (!pure_p)
-                eassert (!STRING_MARKED_P (XSTRING (sym->s.name)));
               sym->s.gcmarkbit = 0;
               /* Attempt to catch bogus objects.  */
               eassert (valid_lisp_object_p (sym->s.function) >= 1);
@@ -6922,6 +7012,9 @@
   /* Used to do Vpurify_flag = Qt here, but Qt isn't set up yet!  */
   purebeg = PUREBEG;
   pure_size = PURESIZE;
+#ifdef ENABLE_CHECKING
+  Vpure_reachable = make_number (-1);
+#endif

 #if GC_MARK_STACK || defined GC_MALLOC_CHECK
   mem_init ();
@@ -6941,6 +7034,39 @@
 }

 void
+init_alloc_once_post_obarray (void)
+{
+  /* This function is called after Qnil and Qt make sense.  Qt is
+  correct even if CANNOT_DUMP.  loadup.el will set to nil at end.  */
+  Vpurify_flag = Qt;
+  Vpure_reachable = Qnil;
+  /* We don't need to staticpro Vpure_reachable as we mark is specially
+     in Fgarbage_collect.  */
+}
+
+void
+init_alloc_once_post_hash_tables (void)
+{
+  /* This function is called after hash tables become available.  Make
+     Vpure_reachable a hash table for more efficiency.  */
+  Lisp_Object reachable_list = Vpure_reachable;
+  Lisp_Object new_pure_reachable =
+    make_hash_table (hashtest_eq,
+                     make_number (DEFAULT_HASH_SIZE),
+                     make_float (DEFAULT_REHASH_SIZE),
+                     make_float (DEFAULT_REHASH_THRESHOLD),
+                     Qnil);
+
+  while (CONSP (reachable_list))
+    {
+      Fputhash (XCAR (reachable_list), Qnil, new_pure_reachable);
+      reachable_list = XCDR (reachable_list);
+    }
+
+  Vpure_reachable = new_pure_reachable;
+}
+
+void
 init_alloc (void)
 {
   gcprolist = 0;
@@ -7068,6 +7194,7 @@
   defsubr (&Smake_symbol);
   defsubr (&Smake_marker);
   defsubr (&Spurecopy);
+  defsubr (&Sfinalize_pure_storage);
   defsubr (&Sgarbage_collect);
   defsubr (&Smemory_limit);
   defsubr (&Smemory_use_counts);

=== modified file 'src/emacs.c'
--- src/emacs.c	2014-04-03 07:14:02 +0000
+++ src/emacs.c	2014-04-05 20:33:09 +0000
@@ -1171,6 +1171,7 @@
     {
       init_alloc_once ();
       init_obarray ();
+      init_alloc_once_post_obarray ();
       init_eval_once ();
       init_charset_once ();
       init_coding_once ();
@@ -1198,6 +1199,7 @@
       /* Called before syms_of_fileio, because it sets up
Qerror_condition.  */
       syms_of_data ();
       syms_of_fns ();	   /* Before syms_of_charset which uses
hashtables.  */
+      init_alloc_once_post_hash_tables ();
       syms_of_fileio ();
       /* Before syms_of_coding to initialize Vgc_cons_threshold.  */
       syms_of_alloc ();
@@ -2078,7 +2080,6 @@
 You must run Emacs in batch mode in order to dump it.  */)
   (Lisp_Object filename, Lisp_Object symfile)
 {
-  Lisp_Object tem;
   Lisp_Object symbol;
   ptrdiff_t count = SPECPDL_INDEX ();

@@ -2090,6 +2091,9 @@
   if (!might_dump)
     error ("Emacs can be dumped only once");

+  if (!NILP (Vpurify_flag))
+    error ("Purification must have completed before dumping");
+
 #ifdef GNU_LINUX

   /* Warn if the gap between BSS end and heap start is larger than
this.  */
@@ -2127,9 +2131,6 @@
 	}
     }

-  tem = Vpurify_flag;
-  Vpurify_flag = Qnil;
-
 #ifdef HAVE_TZSET
   set_time_zone_rule (dump_tz);
 #ifndef LOCALTIME_CACHE
@@ -2173,8 +2174,6 @@
   reset_image_types ();
 #endif

-  Vpurify_flag = tem;
-
   return unbind_to (count, Qnil);
 }


=== modified file 'src/fns.c'
--- src/fns.c	2014-04-01 20:18:12 +0000
+++ src/fns.c	2014-04-05 21:39:19 +0000
@@ -3483,8 +3483,9 @@
 			 Low-level Functions
  ***********************************************************************/

-static struct hash_table_test hashtest_eq;
-struct hash_table_test hashtest_eql, hashtest_equal;
+struct hash_table_test hashtest_eq;
+struct hash_table_test hashtest_eql;
+struct hash_table_test hashtest_equal;

 /* Compare KEY1 which has hash code HASH1 and KEY2 with hash code
    HASH2 in hash table H using `eql'.  Value is true if KEY1 and

=== modified file 'src/lisp.h'
--- src/lisp.h	2014-04-03 00:18:08 +0000
+++ src/lisp.h	2014-04-05 22:13:57 +0000
@@ -1537,7 +1537,8 @@
 {
   SYMBOL_UNINTERNED = 0,
   SYMBOL_INTERNED = 1,
-  SYMBOL_INTERNED_IN_INITIAL_OBARRAY = 2
+  SYMBOL_INTERNED_IN_INITIAL_OBARRAY = 2,
+  SYMBOL_INTERNED_IN_INITIAL_OBARRAY_CANNOT_UNINTERN = 3
 };

 enum symbol_redirect
@@ -1658,7 +1659,14 @@
 INLINE bool
 SYMBOL_INTERNED_IN_INITIAL_OBARRAY_P (Lisp_Object sym)
 {
-  return XSYMBOL (sym)->interned == SYMBOL_INTERNED_IN_INITIAL_OBARRAY;
+  return XSYMBOL (sym)->interned >= SYMBOL_INTERNED_IN_INITIAL_OBARRAY;
+}
+
+INLINE bool
+SYMBOL_CANNOT_UNINTERN_P (Lisp_Object sym)
+{
+  return XSYMBOL (sym)->interned ==
+    SYMBOL_INTERNED_IN_INITIAL_OBARRAY_CANNOT_UNINTERN;
 }

 /* Value is non-zero if symbol is considered a constant, i.e. its
@@ -3450,7 +3458,7 @@
 ptrdiff_t hash_lookup (struct Lisp_Hash_Table *, Lisp_Object,
EMACS_UINT *);
 ptrdiff_t hash_put (struct Lisp_Hash_Table *, Lisp_Object, Lisp_Object,
 		    EMACS_UINT);
-extern struct hash_table_test hashtest_eql, hashtest_equal;
+extern struct hash_table_test hashtest_eq, hashtest_eql, hashtest_equal;

 extern Lisp_Object substring_both (Lisp_Object, ptrdiff_t, ptrdiff_t,
 				   ptrdiff_t, ptrdiff_t);
@@ -3741,6 +3749,8 @@
 extern void free_marker (Lisp_Object);
 extern void free_cons (struct Lisp_Cons *);
 extern void init_alloc_once (void);
+extern void init_alloc_once_post_obarray (void);
+extern void init_alloc_once_post_hash_tables (void);
 extern void init_alloc (void);
 extern void syms_of_alloc (void);
 extern struct buffer * allocate_buffer (void);

=== modified file 'src/lread.c'
--- src/lread.c	2014-02-25 22:51:34 +0000
+++ src/lread.c	2014-04-05 22:11:09 +0000
@@ -3895,10 +3895,17 @@
   if (SYMBOLP (name) && !EQ (name, tem))
     return Qnil;

-  /* There are plenty of other symbols which will screw up the Emacs
-     session if we unintern them, as well as even more ways to use
-     `setq' or `fset' or whatnot to make the Emacs session
-     unusable.  Let's not go down this silly road.  --Stef  */
+  if (XSYMBOL (tem)->interned
+      == SYMBOL_INTERNED_IN_INITIAL_OBARRAY_CANNOT_UNINTERN)
+    {
+      /* We can't unintern this symbol because pure storage might
+         refer to it.  If we were to allow uninterning, we'd have to
+         remember these symbols as GC roots elsewhere, and if the user
+         later re-interned them, the core functionality would refer to
+         symbols with a different name.  */
+      error ("Attempt to unintern symbol in Emacs core");
+    }
+
   /* if (EQ (tem, Qnil) || EQ (tem, Qt))
        error ("Attempt to unintern t or nil"); */

@@ -4052,9 +4059,6 @@
   XSYMBOL (Qnil)->declared_special = 1;
   XSYMBOL (Qt)->constant = 1;

-  /* Qt is correct even if CANNOT_DUMP.  loadup.el will set to nil at
end.  */
-  Vpurify_flag = Qt;
-
   DEFSYM (Qvariable_documentation, "variable-documentation");

   read_buffer = xmalloc (size);



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 901 bytes --]

  reply	other threads:[~2014-04-05 22:37 UTC|newest]

Thread overview: 59+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-04-02  7:44 bug#17168: 24.3.50; Segfault at mark_object Nicolas Richard
2014-04-02 15:53 ` Daniel Colascione
2014-04-02 17:59   ` Nicolas Richard
2014-04-02 16:29 ` Dmitry Antipov
2014-04-02 19:46   ` Daniel Colascione
2014-04-02 20:33     ` Daniel Colascione
2014-04-02 20:57       ` Nicolas Richard
2014-04-02 21:50         ` Daniel Colascione
2014-04-02 23:24           ` Stefan Monnier
2014-04-03  0:28             ` Daniel Colascione
2014-04-02 20:37     ` Eli Zaretskii
2014-04-02 20:40       ` Daniel Colascione
2014-04-02 20:55         ` Eli Zaretskii
2014-04-03  6:59         ` Dmitry Antipov
2014-04-03  7:04           ` Dmitry Antipov
2014-04-03  7:55             ` Daniel Colascione
2014-04-03  9:08               ` Daniel Colascione
2014-04-03 14:03                 ` Dmitry Antipov
2014-04-03 15:42                   ` Stefan Monnier
2014-04-03 16:47                     ` Daniel Colascione
2014-04-03 17:49                       ` Dmitry Antipov
2014-04-03 17:51                         ` Daniel Colascione
2014-04-03 19:21                           ` Stefan Monnier
2014-04-03 19:22                             ` Daniel Colascione
2014-04-05 22:37                               ` Daniel Colascione [this message]
2014-04-06  5:05                                 ` Dmitry Antipov
2014-04-06  5:11                                   ` Daniel Colascione
2014-04-06 18:00                                     ` Richard Stallman
2014-04-06 18:10                                       ` Daniel Colascione
2014-04-06 19:06                                         ` Eli Zaretskii
2014-04-07  7:49                                         ` martin rudalics
2014-04-07  8:18                                           ` Dmitry Antipov
2014-04-07  9:20                                             ` martin rudalics
2014-04-06 12:36                                 ` Stefan Monnier
2014-04-06 15:06                                   ` Eli Zaretskii
2014-04-06 15:59                                     ` Daniel Colascione
2014-04-06 16:19                                       ` Eli Zaretskii
2014-04-06 16:24                                         ` Daniel Colascione
2014-04-06 16:29                                           ` Eli Zaretskii
2014-04-06 16:37                                             ` Daniel Colascione
2014-04-06 16:59                                               ` Eli Zaretskii
2014-04-06 17:11                                                 ` Daniel Colascione
2014-04-06 19:44                                                 ` Stefan Monnier
2014-04-06 19:42                                     ` Stefan Monnier
2014-04-06 15:46                                   ` Daniel Colascione
2014-04-06 19:58                                     ` Stefan Monnier
2014-04-06 20:13                                       ` Daniel Colascione
2014-04-06 20:53                                         ` Daniel Colascione
2014-04-06 21:08                                         ` Stefan Monnier
2014-04-06 21:37                                           ` Daniel Colascione
2014-04-07 16:28                                             ` Stefan Monnier
2014-04-07 19:06                                               ` Daniel Colascione
2014-04-07 20:42                                                 ` Stefan Monnier
2014-04-08  7:14                                                 ` martin rudalics
2014-04-08  8:47                                                   ` Daniel Colascione
2014-04-06 18:01                                   ` Richard Stallman
2014-04-06 19:58                                     ` Stefan Monnier
2014-04-07 16:56                                       ` Richard Stallman
2014-04-02 20:49     ` Nicolas Richard

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=534085B1.9070307@dancol.org \
    --to=dancol@dancol.org \
    --cc=17168@debbugs.gnu.org \
    --cc=dmantipov@yandex.ru \
    --cc=monnier@iro.umontreal.ca \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).