On 04/03/2014 12:22 PM, Daniel Colascione wrote: > On 04/03/2014 12:21 PM, Stefan Monnier wrote: >>> Sure; I don't think it's too late to take pure storage out of 24.4 >> >> It is definitely too late for that. > > Okay. Let's try your proposed solution then. I'll see whether I can code > something up today. The patch came out more complicated than I'd hoped. Basically, we define a new variable Vpure_reachable, accessible only from C. Early in startup, we make it a plain list and cons reachable but non-pure objects from Fpurecopy onto it. Once we have hash tables available, we turn it into a hash table. At the end of loadup, instead of just setting purify-flag to nil, we call a new subr finalize-pure-storage. finalize-pure-storage sets purify-flag to nil by side effect and, as new behavior, makes purify-flag constant so that it can never again become non-nil. Before returning, finalize-pure-storage also turns Vpure_reachable into a vector *in pure storage* of objects we need to keep around. Fgarbage_collect knows how to mark objects in Vpure_reachable and understands that if Vpure_reachable is a vector, its contents should be marked, not the vector itself. This scheme works and passes Dmitry's test, but the resulting Vpure_reachable vector has over 8,000 items. Most of these items are ordinary interned symbols. As an optimization, when we build the final vector form of Fpure_reachable, we see whether each item is a symbol interned in the initial obarray. If it is, then instead of adding it to the vector, we mark the symbol as un-uninternable, and add code to Funintern to look for this new flag. After this optimization, Vpure-reachable only has 251 elements. Please review. === modified file 'lisp/loadup.el' --- lisp/loadup.el 2014-02-10 01:34:22 +0000 +++ lisp/loadup.el 2014-04-05 22:24:34 +0000 @@ -56,7 +56,7 @@ t)) (let ((dir (car load-path))) ;; We'll probably overflow the pure space. - (setq purify-flag nil) + (finalize-pure-storage) (setq load-path (list (expand-file-name "." dir) (expand-file-name "emacs-lisp" dir) (expand-file-name "language" dir) @@ -389,12 +389,11 @@ (message "Pure-hashed: %d strings, %d vectors, %d conses, %d bytecodes, %d others" strings vectors conses bytecodes others))) -;; Avoid error if user loads some more libraries now and make sure the -;; hash-consing hash table is GC'd. -(setq purify-flag nil) - -(if (null (garbage-collect)) - (setq pure-space-overflow t)) +;; Runs garbage-collect and sets purify-flag to nil by side effect. +(when (and purify-flag + (progn (finalize-pure-storage) + (not (garbage-collect)))) + (setq pure-space-overflow t)) (if (or (member (nth 3 command-line-args) '("dump" "bootstrap")) (member (nth 4 command-line-args) '("dump" "bootstrap"))) === modified file 'src/alloc.c' --- src/alloc.c 2014-04-03 09:50:58 +0000 +++ src/alloc.c 2014-04-05 22:30:18 +0000 @@ -173,6 +173,14 @@ static char *purebeg; static ptrdiff_t pure_size; +/* Data structure holding non-pure objects reachable from objects in + pure storage. Initially a list, since we need this data structure + before we've initialized enough of Emacs to make hash tables. We + transform it into a hash table when hash tables become available. + In `finalize-pure-storage', we turn Vpure_reachable into a vector in + pure storage. */ +static Lisp_Object Vpure_reachable; + /* Number of bytes of pure storage used before pure storage overflowed. If this is non-zero, this implies that an overflow occurred. */ @@ -196,6 +204,8 @@ const char *pending_malloc_warning; +static Lisp_Object purecopy_1 (Lisp_Object obj, bool top_level); + #if 0 /* Normally, pointer sanity only on request... */ #ifdef ENABLE_CHECKING #define SUSPICIOUS_OBJECT_CHECKING 1 @@ -5228,8 +5238,8 @@ Lisp_Object new; struct Lisp_Cons *p = pure_alloc (sizeof *p, Lisp_Cons); XSETCONS (new, p); - XSETCAR (new, Fpurecopy (car)); - XSETCDR (new, Fpurecopy (cdr)); + XSETCAR (new, purecopy_1 (car, false)); + XSETCDR (new, purecopy_1 (cdr, false)); return new; } @@ -5261,12 +5271,8 @@ return new; } - -DEFUN ("purecopy", Fpurecopy, Spurecopy, 1, 1, 0, - doc: /* Make a copy of object OBJ in pure storage. -Recursively copies contents of vectors and cons cells. -Does not copy symbols. Copies strings without text properties. */) - (register Lisp_Object obj) +static Lisp_Object +purecopy_1 (Lisp_Object obj, bool top_level) { if (NILP (Vpurify_flag)) return obj; @@ -5300,7 +5306,7 @@ size &= PSEUDOVECTOR_SIZE_MASK; vec = XVECTOR (make_pure_vector (size)); for (i = 0; i < size; i++) - vec->contents[i] = Fpurecopy (AREF (obj, i)); + vec->contents[i] = purecopy_1 (AREF (obj, i), false); if (COMPILEDP (obj)) { XSETPVECTYPE (vec, PVEC_COMPILED); @@ -5311,9 +5317,20 @@ } else if (MARKERP (obj)) error ("Attempt to copy a marker to pure storage"); - else + else if (top_level) /* Not purified, don't hash-cons. */ return obj; + else if (!INTEGERP (obj) && !EQ (obj, Qt) && !EQ (obj, Qnil)) + { + /* Object is reachable from a pure object, so we need remember + it as a GC root: we don't mark pure objects themselves. */ + if (NILP (Vpure_reachable) || CONSP (Vpure_reachable)) + Vpure_reachable = Fcons (obj, Vpure_reachable); + else + Fputhash (obj, Qnil, Vpure_reachable); + + return obj; + } if (HASH_TABLE_P (Vpurify_flag)) /* Hash consing. */ Fputhash (obj, obj, Vpurify_flag); @@ -5322,6 +5339,73 @@ } +DEFUN ("purecopy", Fpurecopy, Spurecopy, 1, 1, 0, + doc: /* Make a copy of object OBJ in pure storage. +Recursively copies contents of vectors and cons cells. +Does not copy symbols. Copies strings without text properties. */) + (register Lisp_Object obj) +{ + return purecopy_1 (obj, true); +} + +DEFUN ("finalize-pure-storage", Ffinalize_pure_storage, + Sfinalize_pure_storage, 0, 0, 0, + doc: /* Finishes building pure storage. +May be called only once, with purify-flag non-nil. */) + (void) +{ + struct Lisp_Hash_Table *h; + ptrdiff_t nr_reachable; + Lisp_Object new_pure_reachable; + Lisp_Object reachable_object; + ptrdiff_t i; + Lisp_Object reachable_objects; + + if (NILP (Vpurify_flag)) + error ("Purification not started"); + + eassert (HASH_TABLE_P (Vpure_reachable)); + h = XHASH_TABLE (Vpure_reachable); + + reachable_objects = Qnil; + nr_reachable = 0; + + for (i = 0; i < HASH_TABLE_SIZE (h); ++i) + if (!NILP (HASH_HASH (h, i))) + { + reachable_object = HASH_KEY (h, i); + if (SYMBOLP (reachable_object)) + { + if (SYMBOL_INTERNED_IN_INITIAL_OBARRAY_P (reachable_object)) + XSYMBOL (reachable_object)->interned = + SYMBOL_INTERNED_IN_INITIAL_OBARRAY_CANNOT_UNINTERN; + + if (XSYMBOL (reachable_object)->interned + == SYMBOL_INTERNED_IN_INITIAL_OBARRAY_CANNOT_UNINTERN) + { + /* No need to remember this object, since it's already + on the main obarray and won't be uninterned. */ + continue; + } + } + + nr_reachable += 1; + reachable_objects = Fcons (reachable_object, reachable_objects); + } + + new_pure_reachable = make_pure_vector (nr_reachable); + for (i = 0; CONSP (reachable_objects); ++i) + { + XVECTOR (new_pure_reachable)->contents[i] = XCAR (reachable_objects); + reachable_objects = XCDR (reachable_objects); + } + + XSYMBOL (intern_c_string ("purify-flag"))->constant = 1; + Vpurify_flag = Qnil; + Vpure_reachable = new_pure_reachable; + return Qnil; +} + /*********************************************************************** Protection from GC @@ -5578,6 +5662,19 @@ for (i = 0; i < staticidx; i++) mark_object (*staticvec[i]); + if (VECTORP (Vpure_reachable)) + { + /* Vpure_reachable is a pure-allocated vector of objects + reachable from pure storage. We can't mark it, but we can + mark its contents. */ + struct Lisp_Vector* pv = XVECTOR (Vpure_reachable); + eassert (PURE_POINTER_P (pv)); + for (i = 0; i < pv->header.size; ++i) + mark_object (pv->contents[i]); + } + else + mark_object (Vpure_reachable); + mark_specpdl (); mark_terminals (); mark_kboards (); @@ -6581,12 +6678,7 @@ for (; sym < end; ++sym) { - /* Check if the symbol was created during loadup. In such a case - it might be pointed to by pure bytecode which we don't trace, - so we conservatively assume that it is live. */ - bool pure_p = PURE_POINTER_P (XSTRING (sym->s.name)); - - if (!sym->s.gcmarkbit && !pure_p) + if (!sym->s.gcmarkbit) { if (sym->s.redirect == SYMBOL_LOCALIZED) xfree (SYMBOL_BLV (&sym->s)); @@ -6600,8 +6692,6 @@ else { ++num_used; - if (!pure_p) - eassert (!STRING_MARKED_P (XSTRING (sym->s.name))); sym->s.gcmarkbit = 0; /* Attempt to catch bogus objects. */ eassert (valid_lisp_object_p (sym->s.function) >= 1); @@ -6922,6 +7012,9 @@ /* Used to do Vpurify_flag = Qt here, but Qt isn't set up yet! */ purebeg = PUREBEG; pure_size = PURESIZE; +#ifdef ENABLE_CHECKING + Vpure_reachable = make_number (-1); +#endif #if GC_MARK_STACK || defined GC_MALLOC_CHECK mem_init (); @@ -6941,6 +7034,39 @@ } void +init_alloc_once_post_obarray (void) +{ + /* This function is called after Qnil and Qt make sense. Qt is + correct even if CANNOT_DUMP. loadup.el will set to nil at end. */ + Vpurify_flag = Qt; + Vpure_reachable = Qnil; + /* We don't need to staticpro Vpure_reachable as we mark is specially + in Fgarbage_collect. */ +} + +void +init_alloc_once_post_hash_tables (void) +{ + /* This function is called after hash tables become available. Make + Vpure_reachable a hash table for more efficiency. */ + Lisp_Object reachable_list = Vpure_reachable; + Lisp_Object new_pure_reachable = + make_hash_table (hashtest_eq, + make_number (DEFAULT_HASH_SIZE), + make_float (DEFAULT_REHASH_SIZE), + make_float (DEFAULT_REHASH_THRESHOLD), + Qnil); + + while (CONSP (reachable_list)) + { + Fputhash (XCAR (reachable_list), Qnil, new_pure_reachable); + reachable_list = XCDR (reachable_list); + } + + Vpure_reachable = new_pure_reachable; +} + +void init_alloc (void) { gcprolist = 0; @@ -7068,6 +7194,7 @@ defsubr (&Smake_symbol); defsubr (&Smake_marker); defsubr (&Spurecopy); + defsubr (&Sfinalize_pure_storage); defsubr (&Sgarbage_collect); defsubr (&Smemory_limit); defsubr (&Smemory_use_counts); === modified file 'src/emacs.c' --- src/emacs.c 2014-04-03 07:14:02 +0000 +++ src/emacs.c 2014-04-05 20:33:09 +0000 @@ -1171,6 +1171,7 @@ { init_alloc_once (); init_obarray (); + init_alloc_once_post_obarray (); init_eval_once (); init_charset_once (); init_coding_once (); @@ -1198,6 +1199,7 @@ /* Called before syms_of_fileio, because it sets up Qerror_condition. */ syms_of_data (); syms_of_fns (); /* Before syms_of_charset which uses hashtables. */ + init_alloc_once_post_hash_tables (); syms_of_fileio (); /* Before syms_of_coding to initialize Vgc_cons_threshold. */ syms_of_alloc (); @@ -2078,7 +2080,6 @@ You must run Emacs in batch mode in order to dump it. */) (Lisp_Object filename, Lisp_Object symfile) { - Lisp_Object tem; Lisp_Object symbol; ptrdiff_t count = SPECPDL_INDEX (); @@ -2090,6 +2091,9 @@ if (!might_dump) error ("Emacs can be dumped only once"); + if (!NILP (Vpurify_flag)) + error ("Purification must have completed before dumping"); + #ifdef GNU_LINUX /* Warn if the gap between BSS end and heap start is larger than this. */ @@ -2127,9 +2131,6 @@ } } - tem = Vpurify_flag; - Vpurify_flag = Qnil; - #ifdef HAVE_TZSET set_time_zone_rule (dump_tz); #ifndef LOCALTIME_CACHE @@ -2173,8 +2174,6 @@ reset_image_types (); #endif - Vpurify_flag = tem; - return unbind_to (count, Qnil); } === modified file 'src/fns.c' --- src/fns.c 2014-04-01 20:18:12 +0000 +++ src/fns.c 2014-04-05 21:39:19 +0000 @@ -3483,8 +3483,9 @@ Low-level Functions ***********************************************************************/ -static struct hash_table_test hashtest_eq; -struct hash_table_test hashtest_eql, hashtest_equal; +struct hash_table_test hashtest_eq; +struct hash_table_test hashtest_eql; +struct hash_table_test hashtest_equal; /* Compare KEY1 which has hash code HASH1 and KEY2 with hash code HASH2 in hash table H using `eql'. Value is true if KEY1 and === modified file 'src/lisp.h' --- src/lisp.h 2014-04-03 00:18:08 +0000 +++ src/lisp.h 2014-04-05 22:13:57 +0000 @@ -1537,7 +1537,8 @@ { SYMBOL_UNINTERNED = 0, SYMBOL_INTERNED = 1, - SYMBOL_INTERNED_IN_INITIAL_OBARRAY = 2 + SYMBOL_INTERNED_IN_INITIAL_OBARRAY = 2, + SYMBOL_INTERNED_IN_INITIAL_OBARRAY_CANNOT_UNINTERN = 3 }; enum symbol_redirect @@ -1658,7 +1659,14 @@ INLINE bool SYMBOL_INTERNED_IN_INITIAL_OBARRAY_P (Lisp_Object sym) { - return XSYMBOL (sym)->interned == SYMBOL_INTERNED_IN_INITIAL_OBARRAY; + return XSYMBOL (sym)->interned >= SYMBOL_INTERNED_IN_INITIAL_OBARRAY; +} + +INLINE bool +SYMBOL_CANNOT_UNINTERN_P (Lisp_Object sym) +{ + return XSYMBOL (sym)->interned == + SYMBOL_INTERNED_IN_INITIAL_OBARRAY_CANNOT_UNINTERN; } /* Value is non-zero if symbol is considered a constant, i.e. its @@ -3450,7 +3458,7 @@ ptrdiff_t hash_lookup (struct Lisp_Hash_Table *, Lisp_Object, EMACS_UINT *); ptrdiff_t hash_put (struct Lisp_Hash_Table *, Lisp_Object, Lisp_Object, EMACS_UINT); -extern struct hash_table_test hashtest_eql, hashtest_equal; +extern struct hash_table_test hashtest_eq, hashtest_eql, hashtest_equal; extern Lisp_Object substring_both (Lisp_Object, ptrdiff_t, ptrdiff_t, ptrdiff_t, ptrdiff_t); @@ -3741,6 +3749,8 @@ extern void free_marker (Lisp_Object); extern void free_cons (struct Lisp_Cons *); extern void init_alloc_once (void); +extern void init_alloc_once_post_obarray (void); +extern void init_alloc_once_post_hash_tables (void); extern void init_alloc (void); extern void syms_of_alloc (void); extern struct buffer * allocate_buffer (void); === modified file 'src/lread.c' --- src/lread.c 2014-02-25 22:51:34 +0000 +++ src/lread.c 2014-04-05 22:11:09 +0000 @@ -3895,10 +3895,17 @@ if (SYMBOLP (name) && !EQ (name, tem)) return Qnil; - /* There are plenty of other symbols which will screw up the Emacs - session if we unintern them, as well as even more ways to use - `setq' or `fset' or whatnot to make the Emacs session - unusable. Let's not go down this silly road. --Stef */ + if (XSYMBOL (tem)->interned + == SYMBOL_INTERNED_IN_INITIAL_OBARRAY_CANNOT_UNINTERN) + { + /* We can't unintern this symbol because pure storage might + refer to it. If we were to allow uninterning, we'd have to + remember these symbols as GC roots elsewhere, and if the user + later re-interned them, the core functionality would refer to + symbols with a different name. */ + error ("Attempt to unintern symbol in Emacs core"); + } + /* if (EQ (tem, Qnil) || EQ (tem, Qt)) error ("Attempt to unintern t or nil"); */ @@ -4052,9 +4059,6 @@ XSYMBOL (Qnil)->declared_special = 1; XSYMBOL (Qt)->constant = 1; - /* Qt is correct even if CANNOT_DUMP. loadup.el will set to nil at end. */ - Vpurify_flag = Qt; - DEFSYM (Qvariable_documentation, "variable-documentation"); read_buffer = xmalloc (size);