unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
* [PATCH] shrink struct vectorlike_header
@ 2012-10-11  6:41 Dmitry Antipov
  2012-10-11 12:57 ` Stefan Monnier
  2012-10-11 16:42 ` [PATCH] shrink struct vectorlike_header Eli Zaretskii
  0 siblings, 2 replies; 19+ messages in thread
From: Dmitry Antipov @ 2012-10-11  6:41 UTC (permalink / raw)
  To: Emacs development discussions

[-- Attachment #1: Type: text/plain, Size: 292 bytes --]

This is an attempt to shrink struct vectorlike_header to the only
'size' field. For pseudovectors, 'size' field is rearranged to have
one more bitfield to record the size of non-Lisp_Object area, thus
giving the possibility to calculate the memory footprint of any
vectorlike object.

Dmitry

[-- Attachment #2: shrink_vectorlike_header.patch --]
[-- Type: text/plain, Size: 20545 bytes --]

=== modified file 'src/alloc.c'
--- src/alloc.c	2012-10-10 15:31:21 +0000
+++ src/alloc.c	2012-10-11 06:09:56 +0000
@@ -2040,7 +2040,7 @@
   val = Fmake_vector (make_number (length_in_elts + extra_bool_elts), Qnil);
 
   /* No Lisp_Object to trace in there.  */
-  XSETPVECTYPESIZE (XVECTOR (val), PVEC_BOOL_VECTOR, 0);
+  XSETPVECTYPESIZE (XVECTOR (val), PVEC_BOOL_VECTOR, 0, 0);
 
   p = XBOOL_VECTOR (val);
   p->size = XFASTINT (length);
@@ -2619,15 +2619,37 @@
 
 #define VINDEX(nbytes) (((nbytes) - VBLOCK_BYTES_MIN) / roundup_size)
 
+/* When V is on the free list, V->contents[0] is used as a pointer to
+   next vector on the free list.  */
+
+#define NEXT_IN_FREE_LIST(v)				\
+  (*(struct Lisp_Vector **)((char *) v + header_size))
+
+/* For the large vector V, previous word is used as a pointer
+   to next large vector.  */
+
+#define NEXT_LARGE_VECTOR(v)					\
+  (*(struct Lisp_Vector **)((char *) v - sizeof (void *)))
+
+/* Convert from memory block used for large vector to the vector itself.  */
+
+#define LARGE_VECTOR_START(mem)					\
+  (struct Lisp_Vector *)((char *) mem + sizeof (void *))
+
+/* Convert from large vector to it's memory block.  */
+
+#define LARGE_VECTOR_MEM(v) (void *)((char *) v - sizeof (void *))
+
 /* Common shortcut to setup vector on a free list.  */
 
 #define SETUP_ON_FREE_LIST(v, nbytes, index)			\
   do {								\
-    XSETPVECTYPESIZE (v, PVEC_FREE, nbytes);			\
+    XSETPVECTYPESIZE (v, PVEC_FREE, 0,				\
+		      ((nbytes) - header_size) / word_size);	\
     eassert ((nbytes) % roundup_size == 0);			\
     (index) = VINDEX (nbytes);					\
     eassert ((index) < VECTOR_MAX_FREE_LIST_INDEX);		\
-    (v)->header.next.vector = vector_free_lists[index];		\
+    NEXT_IN_FREE_LIST (v) = vector_free_lists[index];		\
     vector_free_lists[index] = (v);				\
     total_free_vector_slots += (nbytes) / word_size;		\
   } while (0)
@@ -2706,8 +2728,7 @@
   if (vector_free_lists[index])
     {
       vector = vector_free_lists[index];
-      vector_free_lists[index] = vector->header.next.vector;
-      vector->header.next.nbytes = nbytes;
+      vector_free_lists[index] = NEXT_IN_FREE_LIST (vector);
       total_free_vector_slots -= nbytes / word_size;
       return vector;
     }
@@ -2721,8 +2742,7 @@
       {
 	/* This vector is larger than requested.  */
 	vector = vector_free_lists[index];
-	vector_free_lists[index] = vector->header.next.vector;
-	vector->header.next.nbytes = nbytes;
+	vector_free_lists[index] = NEXT_IN_FREE_LIST (vector);
 	total_free_vector_slots -= nbytes / word_size;
 
 	/* Excess bytes are used for the smaller vector,
@@ -2739,7 +2759,6 @@
 
   /* New vector will be at the beginning of this block.  */
   vector = (struct Lisp_Vector *) block->data;
-  vector->header.next.nbytes = nbytes;
 
   /* If the rest of space from this block is large enough
      for one-slot vector at least, set up it on a free list.  */
@@ -2759,15 +2778,27 @@
   ((char *) (vector) <= (block)->data		\
    + VECTOR_BLOCK_BYTES - VBLOCK_BYTES_MIN)
 
-/* Number of bytes used by vector-block-allocated object.  This is the only
-   place where we actually use the `nbytes' field of the vector-header.
-   I.e. we could get rid of the `nbytes' field by computing it based on the
-   vector-type.  */
-
-#define PSEUDOVECTOR_NBYTES(vector) \
-  (PSEUDOVECTOR_TYPEP (&vector->header, PVEC_FREE)	\
-   ? vector->header.size & PSEUDOVECTOR_SIZE_MASK	\
-   : vector->header.next.nbytes)
+/* Return the memory footprint of V in bytes.  */
+
+static ptrdiff_t
+vector_nbytes (struct Lisp_Vector *v)
+{
+  ptrdiff_t size = v->header.size & ~ARRAY_MARK_FLAG;
+
+  if (size & PSEUDOVECTOR_FLAG)
+    {
+      if (PSEUDOVECTOR_TYPEP (&v->header, PVEC_BOOL_VECTOR))
+	return (bool_header_size
+		+ (((struct Lisp_Bool_Vector *) v)->size
+		   + BOOL_VECTOR_BITS_PER_CHAR - 1)
+		/ BOOL_VECTOR_BITS_PER_CHAR);
+      return (header_size
+	      + ((size & PSEUDOVECTOR_SIZE_MASK)
+		 + ((size & PSEUDOVECTOR_REST_MASK)
+		    >> PSEUDOVECTOR_SIZE_BITS)) * word_size);
+    }
+  return header_size + size * word_size;
+}
 
 /* Reclaim space used by unmarked vectors.  */
 
@@ -2785,6 +2816,7 @@
   for (block = vector_blocks; block; block = *bprev)
     {
       bool free_this_block = 0;
+      ptrdiff_t nbytes;
 
       for (vector = (struct Lisp_Vector *) block->data;
 	   VECTOR_IN_BLOCK (vector, block); vector = next)
@@ -2793,14 +2825,16 @@
 	    {
 	      VECTOR_UNMARK (vector);
 	      total_vectors++;
-	      total_vector_slots += vector->header.next.nbytes / word_size;
-	      next = ADVANCE (vector, vector->header.next.nbytes);
+	      nbytes = vector_nbytes (vector);
+	      total_vector_slots += nbytes / word_size;
+	      next = ADVANCE (vector, nbytes);
 	    }
 	  else
 	    {
-	      ptrdiff_t nbytes = PSEUDOVECTOR_NBYTES (vector);
-	      ptrdiff_t total_bytes = nbytes;
+	      ptrdiff_t total_bytes;
 
+	      nbytes = vector_nbytes (vector);
+	      total_bytes = nbytes;
 	      next = ADVANCE (vector, nbytes);
 
 	      /* While NEXT is not marked, try to coalesce with VECTOR,
@@ -2810,7 +2844,7 @@
 		{
 		  if (VECTOR_MARKED_P (next))
 		    break;
-		  nbytes = PSEUDOVECTOR_NBYTES (next);
+		  nbytes = vector_nbytes (next);
 		  total_bytes += nbytes;
 		  next = ADVANCE (next, nbytes);
 		}
@@ -2867,12 +2901,15 @@
 	  else
 	    total_vector_slots
 	      += header_size / word_size + vector->header.size;
-	  vprev = &vector->header.next.vector;
+	  vprev = &NEXT_LARGE_VECTOR (vector);
 	}
       else
 	{
-	  *vprev = vector->header.next.vector;
-	  lisp_free (vector);
+	  *vprev = NEXT_LARGE_VECTOR (vector);
+#if GC_MARK_STACK && !defined GC_MALLOC_CHECK
+	  mem_delete (mem_find (vector));
+#endif
+	  xfree (LARGE_VECTOR_MEM (vector));
 	}
     }
 }
@@ -2904,8 +2941,15 @@
 	p = allocate_vector_from_block (vroundup (nbytes));
       else
 	{
-	  p = lisp_malloc (nbytes, MEM_TYPE_VECTORLIKE);
-	  p->header.next.vector = large_vectors;
+	  void *mem;
+	  /* Can't use lisp_malloc for P because
+	     NEXT_LARGE_VECTOR (P) is outside of P.  */
+	  mem = xmalloc (nbytes + sizeof (void *));
+	  p = LARGE_VECTOR_START (mem);
+#if GC_MARK_STACK && !defined GC_MALLOC_CHECK
+	  mem_insert (p, (char *) p + nbytes, MEM_TYPE_VECTORLIKE);
+#endif
+	  NEXT_LARGE_VECTOR (p) = large_vectors;
 	  large_vectors = p;
 	}
 
@@ -2952,7 +2996,7 @@
   for (i = 0; i < lisplen; ++i)
     v->contents[i] = Qnil;
 
-  XSETPVECTYPESIZE (v, tag, lisplen);
+  XSETPVECTYPESIZE (v, tag, lisplen, memlen - lisplen);
   return v;
 }
 
@@ -2961,10 +3005,9 @@
 {
   struct buffer *b = lisp_malloc (sizeof *b, MEM_TYPE_BUFFER);
 
-  XSETPVECTYPESIZE (b, PVEC_BUFFER, (offsetof (struct buffer, own_text)
-				     - header_size) / word_size);
+  BUFFER_PVEC_INIT (b);
   /* Put B on the chain of all buffers including killed ones.  */
-  b->header.next.buffer = all_buffers;
+  b->next = all_buffers;
   all_buffers = b;
   /* Note that the rest fields of B are not initialized.  */
   return b;
@@ -4068,13 +4111,10 @@
       while (VECTOR_IN_BLOCK (vector, block)
 	     && vector <= (struct Lisp_Vector *) p)
 	{
-	  if (PSEUDOVECTOR_TYPEP (&vector->header, PVEC_FREE))
-	    vector = ADVANCE (vector, (vector->header.size
-				       & PSEUDOVECTOR_SIZE_MASK));
-	  else if (vector == p)
+	  if (!PSEUDOVECTOR_TYPEP (&vector->header, PVEC_FREE) && vector == p)
 	    return 1;
 	  else
-	    vector = ADVANCE (vector, vector->header.next.nbytes);
+	    vector = ADVANCE (vector, vector_nbytes (vector));
 	}
     }
   else if (m->type == MEM_TYPE_VECTORLIKE && p == m->start)
@@ -5687,7 +5727,7 @@
 
 	if (ptr->header.size & PSEUDOVECTOR_FLAG)
 	  pvectype = ((ptr->header.size & PVEC_TYPE_MASK)
-		      >> PSEUDOVECTOR_SIZE_BITS);
+		      >> PSEUDOVECTOR_AREA_BITS);
 	else
 	  pvectype = PVEC_NORMAL_VECTOR;
 
@@ -6317,7 +6357,7 @@
     for (buffer = all_buffers; buffer; buffer = *bprev)
       if (!VECTOR_MARKED_P (buffer))
 	{
-	  *bprev = buffer->header.next.buffer;
+	  *bprev = buffer->next;
 	  lisp_free (buffer);
 	}
       else
@@ -6326,7 +6366,7 @@
 	  /* Do not use buffer_(set|get)_intervals here.  */
 	  buffer->text->intervals = balance_intervals (buffer->text->intervals);
 	  total_buffers++;
-	  bprev = &buffer->header.next.buffer;
+	  bprev = &buffer->next;
 	}
   }
 

=== modified file 'src/buffer.c'
--- src/buffer.c	2012-10-02 02:43:53 +0000
+++ src/buffer.c	2012-10-11 05:57:04 +0000
@@ -5107,11 +5107,6 @@
 init_buffer_once (void)
 {
   int idx;
-  /* If you add, remove, or reorder Lisp_Objects in a struct buffer, make
-     sure that this is still correct.  Otherwise, mark_vectorlike may not
-     trace all Lisp_Objects in buffer_defaults and buffer_local_symbols.  */
-  const int pvecsize
-    = (offsetof (struct buffer, own_text) - header_size) / word_size;
 
   memset (buffer_permanent_local_flags, 0, sizeof buffer_permanent_local_flags);
 
@@ -5134,8 +5129,8 @@
   /* This is not strictly necessary, but let's make them initialized.  */
   bset_name (&buffer_defaults, build_pure_c_string (" *buffer-defaults*"));
   bset_name (&buffer_local_symbols, build_pure_c_string (" *buffer-local-symbols*"));
-  XSETPVECTYPESIZE (&buffer_defaults, PVEC_BUFFER, pvecsize);
-  XSETPVECTYPESIZE (&buffer_local_symbols, PVEC_BUFFER, pvecsize);
+  BUFFER_PVEC_INIT (&buffer_defaults);
+  BUFFER_PVEC_INIT (&buffer_local_symbols);
 
   /* Set up the default values of various buffer slots.  */
   /* Must do these before making the first buffer! */

=== modified file 'src/buffer.h'
--- src/buffer.h	2012-09-11 04:22:03 +0000
+++ src/buffer.h	2012-10-11 05:56:43 +0000
@@ -482,11 +482,6 @@
 
 struct buffer
 {
-  /* HEADER.NEXT is the next buffer, in chain of all buffers, including killed
-     buffers.  This chain, starting from all_buffers, is used only for garbage
-     collection, in order to collect killed buffers properly.  Note that large
-     vectors and large pseudo-vector objects are all on another chain starting
-     from large_vectors.  */
   struct vectorlike_header header;
 
   /* The name of this buffer.  */
@@ -750,6 +745,9 @@
      In an indirect buffer, this is the own_text field of another buffer.  */
   struct buffer_text *text;
 
+  /* Next buffer, in chain of all buffers, including killed ones.  */
+  struct buffer *next;
+
   /* Char position of point in buffer.  */
   ptrdiff_t pt;
 
@@ -959,6 +957,26 @@
   b->INTERNAL_FIELD (width_table) = val;
 }
 
+/* Number of Lisp_Objects at the beginning of struct buffer.
+   If you add, remove, or reorder Lisp_Objects within buffer
+   structure, make sure that this is still correct.  */
+
+#define BUFFER_LISP_SIZE                                               \
+  ((offsetof (struct buffer, own_text) - header_size) / word_size)
+
+/* Size of the struct buffer part beyond leading
+   Lisp_Objects, in word_size units.  */
+
+#define BUFFER_REST_SIZE                                               \
+  ((sizeof (struct buffer) - header_size) / word_size - BUFFER_LISP_SIZE)
+
+/* Initialize the pseudovector header of buffer object.  BUFFER_LISP_SIZE
+   is required for GC, but BUFFER_REST_SIZE is set up just to be consistent
+   with other pseudovectors.  */
+
+#define BUFFER_PVEC_INIT(b)                                    \
+  XSETPVECTYPESIZE (b, PVEC_BUFFER, BUFFER_LISP_SIZE, BUFFER_REST_SIZE)
+
 /* Convenient check whether buffer B is live.  */
 
 #define BUFFER_LIVE_P(b) (!NILP (BVAR (b, name)))
@@ -970,7 +988,7 @@
 /* Used to iterate over the chain above.  */
 
 #define FOR_EACH_BUFFER(b) \
-  for ((b) = all_buffers; (b); (b) = (b)->header.next.buffer)
+  for ((b) = all_buffers; (b); (b) = (b)->next)
 
 /* This points to the current buffer.  */
 

=== modified file 'src/fns.c'
--- src/fns.c	2012-10-01 18:59:52 +0000
+++ src/fns.c	2012-10-11 05:55:01 +0000
@@ -2076,9 +2076,8 @@
 	   are sensible to compare, so eliminate the others now.  */
 	if (size & PSEUDOVECTOR_FLAG)
 	  {
-	    if (!(size & ((PVEC_COMPILED | PVEC_CHAR_TABLE
-			   | PVEC_SUB_CHAR_TABLE | PVEC_FONT)
-			  << PSEUDOVECTOR_SIZE_BITS)))
+	    if (((size & PVEC_TYPE_MASK) >> PSEUDOVECTOR_AREA_BITS)
+		< PVEC_COMPILED)
 	      return 0;
 	    size &= PSEUDOVECTOR_SIZE_MASK;
 	  }
@@ -3661,12 +3660,9 @@
 {
   Lisp_Object table;
   struct Lisp_Hash_Table *h2;
-  struct Lisp_Vector *next;
 
   h2 = allocate_hash_table ();
-  next = h2->header.next.vector;
   *h2 = *h1;
-  h2->header.next.vector = next;
   h2->key_and_value = Fcopy_sequence (h1->key_and_value);
   h2->hash = Fcopy_sequence (h1->hash);
   h2->next = Fcopy_sequence (h1->next);

=== modified file 'src/lisp.h'
--- src/lisp.h	2012-10-10 20:09:47 +0000
+++ src/lisp.h	2012-10-11 06:24:07 +0000
@@ -361,14 +361,11 @@
   PVEC_WINDOW_CONFIGURATION,
   PVEC_SUBR,
   PVEC_OTHER,
-  /* These last 4 are special because we OR them in fns.c:internal_equal,
-     so they have to use a disjoint bit pattern:
-     if (!(size & (PVEC_COMPILED | PVEC_CHAR_TABLE
-                   | PVEC_SUB_CHAR_TABLE | PVEC_FONT))) */
-  PVEC_COMPILED			= 0x10,
-  PVEC_CHAR_TABLE		= 0x20,
-  PVEC_SUB_CHAR_TABLE		= 0x30,
-  PVEC_FONT			= 0x40
+  /* These should be last, check internal_equal to see why.  */
+  PVEC_COMPILED,
+  PVEC_CHAR_TABLE,
+  PVEC_SUB_CHAR_TABLE,
+  PVEC_FONT
 };
 
 /* DATA_SEG_BITS forces extra bits to be or'd in with any pointers
@@ -388,9 +385,18 @@
        only the number of Lisp_Object fields (that need to be traced by GC).
        The distinction is used, e.g., by Lisp_Process, which places extra
        non-Lisp_Object fields at the end of the structure.  */
-    PSEUDOVECTOR_SIZE_BITS = 16,
+    PSEUDOVECTOR_SIZE_BITS = 12,
     PSEUDOVECTOR_SIZE_MASK = (1 << PSEUDOVECTOR_SIZE_BITS) - 1,
-    PVEC_TYPE_MASK = 0x0fff << PSEUDOVECTOR_SIZE_BITS,
+
+    /* To calculate the memory footprint of the pseudovector, it's useful
+       to store the size of non-Lisp area in word_size units here.  */
+    PSEUDOVECTOR_REST_BITS = 12,
+    PSEUDOVECTOR_REST_MASK = (((1 << PSEUDOVECTOR_REST_BITS) - 1) 
+			      << PSEUDOVECTOR_SIZE_BITS),
+
+    /* Used to extract pseudovector subtype information.  */
+    PSEUDOVECTOR_AREA_BITS = PSEUDOVECTOR_SIZE_BITS + PSEUDOVECTOR_REST_BITS,
+    PVEC_TYPE_MASK = 0x3f << PSEUDOVECTOR_AREA_BITS,
 
     /* Number of bits to put in each character in the internal representation
        of bool vectors.  This should not vary across implementations.  */
@@ -566,13 +572,13 @@
 
 /* Pseudovector types.  */
 
-#define XSETPVECTYPE(v, code) XSETTYPED_PVECTYPE (v, header.size, code)
-#define XSETTYPED_PVECTYPE(v, size_member, code) \
-  ((v)->size_member |= PSEUDOVECTOR_FLAG | ((code) << PSEUDOVECTOR_SIZE_BITS))
-#define XSETPVECTYPESIZE(v, code, sizeval) \
+#define XSETPVECTYPE(v, code)						\
+  ((v)->header.size |= PSEUDOVECTOR_FLAG | ((code) << PSEUDOVECTOR_AREA_BITS))
+#define XSETPVECTYPESIZE(v, code, lispsize, restsize)		\
   ((v)->header.size = (PSEUDOVECTOR_FLAG			\
-		       | ((code) << PSEUDOVECTOR_SIZE_BITS)	\
-		       | (sizeval)))
+		       | ((code) << PSEUDOVECTOR_AREA_BITS)	\
+		       | ((restsize) << PSEUDOVECTOR_SIZE_BITS) \
+		       | (lispsize)))
 
 /* The cast to struct vectorlike_header * avoids aliasing issues.  */
 #define XSETPSEUDOVECTOR(a, b, code) \
@@ -584,16 +590,14 @@
 #define XSETTYPED_PSEUDOVECTOR(a, b, size, code)			\
   (XSETVECTOR (a, b),							\
    eassert ((size & (PSEUDOVECTOR_FLAG | PVEC_TYPE_MASK))		\
-	    == (PSEUDOVECTOR_FLAG | (code << PSEUDOVECTOR_SIZE_BITS))))
+	    == (PSEUDOVECTOR_FLAG | (code << PSEUDOVECTOR_AREA_BITS))))
 
 #define XSETWINDOW_CONFIGURATION(a, b) \
   (XSETPSEUDOVECTOR (a, b, PVEC_WINDOW_CONFIGURATION))
 #define XSETPROCESS(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_PROCESS))
 #define XSETWINDOW(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_WINDOW))
 #define XSETTERMINAL(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_TERMINAL))
-/* XSETSUBR is special since Lisp_Subr lacks struct vectorlike_header.  */
-#define XSETSUBR(a, b) \
-  XSETTYPED_PSEUDOVECTOR (a, b, XSUBR (a)->size, PVEC_SUBR)
+#define XSETSUBR(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_SUBR))
 #define XSETCOMPILED(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_COMPILED))
 #define XSETBUFFER(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_BUFFER))
 #define XSETCHAR_TABLE(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_CHAR_TABLE))
@@ -783,28 +787,6 @@
          vector on a free-list and PSEUDOVECTOR_SIZE_BITS indicates its size
          in bytes.  */
     ptrdiff_t size;
-
-    /* When the vector is allocated from a vector block, NBYTES is used
-       if the vector is not on a free list, and VECTOR is used otherwise.
-       For large vector-like objects, BUFFER or VECTOR is used as a pointer
-       to the next vector-like object.  It is generally a buffer or a
-       Lisp_Vector alias, so for convenience it is a union instead of a
-       pointer: this way, one can write P->next.vector instead of ((struct
-       Lisp_Vector *) P->next).  */
-    union {
-      /* This is only needed for small vectors that are not free because the
-	 `size' field only gives us the number of Lisp_Object slots, whereas we
-	 need to know the total size, including non-Lisp_Object data.
-	 FIXME: figure out a way to store this info elsewhere so we can
-	 finally get rid of this extra word of overhead.  */
-      ptrdiff_t nbytes;
-      struct buffer *buffer;
-      /* FIXME: This can be removed: For large vectors, this field could be
-	 placed *before* the vector itself.  And for small vectors on a free
-	 list, this field could be stored in the vector's bytes, since the
-	 empty vector is handled specially anyway.  */
-      struct Lisp_Vector *vector;
-    } next;
   };
 
 /* Regular vector is just a header plus array of Lisp_Objects.  */
@@ -978,15 +960,11 @@
 
 /* This structure describes a built-in function.
    It is generated by the DEFUN macro only.
-   defsubr makes it into a Lisp object.
-
-   This type is treated in most respects as a pseudovector,
-   but since we never dynamically allocate or free them,
-   we don't need a struct vectorlike_header and its 'next' field.  */
+   defsubr makes it into a Lisp object.  */
 
 struct Lisp_Subr
   {
-    ptrdiff_t size;
+    struct vectorlike_header header;
     union {
       Lisp_Object (*a0) (void);
       Lisp_Object (*a1) (Lisp_Object);
@@ -1667,7 +1645,7 @@
 
 #define PSEUDOVECTOR_TYPEP(v, code)					\
   (((v)->size & (PSEUDOVECTOR_FLAG | PVEC_TYPE_MASK))			\
-   == (PSEUDOVECTOR_FLAG | ((code) << PSEUDOVECTOR_SIZE_BITS)))
+   == (PSEUDOVECTOR_FLAG | ((code) << PSEUDOVECTOR_AREA_BITS)))
 
 /* True if object X, with internal type struct T *, is a pseudovector whose
    code is CODE.  */
@@ -1680,8 +1658,7 @@
 #define PROCESSP(x) PSEUDOVECTORP (x, PVEC_PROCESS)
 #define WINDOWP(x) PSEUDOVECTORP (x, PVEC_WINDOW)
 #define TERMINALP(x) PSEUDOVECTORP (x, PVEC_TERMINAL)
-/* SUBRP is special since Lisp_Subr lacks struct vectorlike_header.  */
-#define SUBRP(x) TYPED_PSEUDOVECTORP (x, Lisp_Subr, PVEC_SUBR)
+#define SUBRP(x) PSEUDOVECTORP (x, PVEC_SUBR)
 #define COMPILEDP(x) PSEUDOVECTORP (x, PVEC_COMPILED)
 #define BUFFERP(x) PSEUDOVECTORP (x, PVEC_BUFFER)
 #define CHAR_TABLE_P(x) PSEUDOVECTORP (x, PVEC_CHAR_TABLE)
@@ -1870,8 +1847,8 @@
 #define DEFUN(lname, fnname, sname, minargs, maxargs, intspec, doc)	\
    Lisp_Object fnname DEFUN_ARGS_ ## maxargs ;				\
    static struct Lisp_Subr alignas (GCALIGNMENT) sname =		\
-   { (PVEC_SUBR << PSEUDOVECTOR_SIZE_BITS)				\
-     | (sizeof (struct Lisp_Subr) / sizeof (EMACS_INT)),		\
+   { { (PVEC_SUBR << PSEUDOVECTOR_AREA_BITS)				\
+       | (sizeof (struct Lisp_Subr) / sizeof (EMACS_INT)) },		\
       { (Lisp_Object (__cdecl *)(void))fnname },                        \
        minargs, maxargs, lname, intspec, 0};				\
    Lisp_Object fnname
@@ -1879,8 +1856,8 @@
 #define DEFUN(lname, fnname, sname, minargs, maxargs, intspec, doc)	\
    Lisp_Object fnname DEFUN_ARGS_ ## maxargs ;				\
    static struct Lisp_Subr alignas (GCALIGNMENT) sname =		\
-     { PVEC_SUBR << PSEUDOVECTOR_SIZE_BITS,				\
-      { .a ## maxargs = fnname },					\
+     { { PVEC_SUBR << PSEUDOVECTOR_AREA_BITS },				\
+       { .a ## maxargs = fnname },					\
        minargs, maxargs, lname, intspec, 0};				\
    Lisp_Object fnname
 #endif

=== modified file 'src/lread.c'
--- src/lread.c	2012-10-05 05:57:24 +0000
+++ src/lread.c	2012-10-11 06:01:19 +0000
@@ -3978,7 +3978,7 @@
 {
   Lisp_Object sym, tem;
   sym = intern_c_string (sname->symbol_name);
-  XSETTYPED_PVECTYPE (sname, size, PVEC_SUBR);
+  XSETPVECTYPE (sname, PVEC_SUBR);
   XSETSUBR (tem, sname);
   set_symbol_function (sym, tem);
 }


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] shrink struct vectorlike_header
  2012-10-11  6:41 [PATCH] shrink struct vectorlike_header Dmitry Antipov
@ 2012-10-11 12:57 ` Stefan Monnier
  2012-11-06 17:09   ` [RFC, PATCH] shrink struct vectorlike_header #2 Dmitry Antipov
  2012-10-11 16:42 ` [PATCH] shrink struct vectorlike_header Eli Zaretskii
  1 sibling, 1 reply; 19+ messages in thread
From: Stefan Monnier @ 2012-10-11 12:57 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: Emacs development discussions

> This is an attempt to shrink struct vectorlike_header to the only
> 'size' field. For pseudovectors, 'size' field is rearranged to have
> one more bitfield to record the size of non-Lisp_Object area, thus
> giving the possibility to calculate the memory footprint of any
> vectorlike object.

Thanks, this is a very welcomed change (which will have to wait for the
freeze to end, of course).

> +/* When V is on the free list, V->contents[0] is used as a pointer to
> +   next vector on the free list.  */
> +
> +#define NEXT_IN_FREE_LIST(v)				\
> +  (*(struct Lisp_Vector **)((char *) v + header_size))

Please make the code match the comment, e.g:

     (*(struct Lisp_Vector **)&(v->contents[0]))

> +/* For the large vector V, previous word is used as a pointer
> +   to next large vector.  */
> +
> +#define NEXT_LARGE_VECTOR(v)					\
> +  (*(struct Lisp_Vector **)((char *) v - sizeof (void *)))

You could also use the simpler:

     (((struct Lisp_Vector **)v)[-1])

> +/* Convert from memory block used for large vector to the vector itself.  */
> +
> +#define LARGE_VECTOR_START(mem)					\
> +  (struct Lisp_Vector *)((char *) mem + sizeof (void *))
> +
> +/* Convert from large vector to it's memory block.  */
> +
> +#define LARGE_VECTOR_MEM(v) (void *)((char *) v - sizeof (void *))

Why not define a struct large_vector, so that NEXT_LARGE_VECTOR
can be (VECTOR_TO_LARGE (v)->next), LARGE_VECTOR_START (renamed to
LARGE_TO_VECTOR) can be (&(mem->vector)), and LARGE_VECTOR_MEM (renamed
to VECTOR_TO_LARGE) is defined maybe in terms of offsetof(vector)?

> +static ptrdiff_t
> +vector_nbytes (struct Lisp_Vector *v)
> +{
> +  ptrdiff_t size = v->header.size & ~ARRAY_MARK_FLAG;
> +
> +  if (size & PSEUDOVECTOR_FLAG)
> +    {
> +      if (PSEUDOVECTOR_TYPEP (&v->header, PVEC_BOOL_VECTOR))
> +	return (bool_header_size
> +		+ (((struct Lisp_Bool_Vector *) v)->size
> +		   + BOOL_VECTOR_BITS_PER_CHAR - 1)
> +		/ BOOL_VECTOR_BITS_PER_CHAR);
> +      return (header_size
> +	      + ((size & PSEUDOVECTOR_SIZE_MASK)
> +		 + ((size & PSEUDOVECTOR_REST_MASK)
> +		    >> PSEUDOVECTOR_SIZE_BITS)) * word_size);
> +    }
> +  return header_size + size * word_size;

I'd put an "else" before this last "return", so that if you forget a
"return" somewhere in the first branch of the "if", GCC will tell you.

> +    PVEC_TYPE_MASK = 0x3f << PSEUDOVECTOR_AREA_BITS,

Please add a note where we define all the PVEC_foo types that the max
number of such types is limited by this 0x3f.

        Stefan



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] shrink struct vectorlike_header
  2012-10-11  6:41 [PATCH] shrink struct vectorlike_header Dmitry Antipov
  2012-10-11 12:57 ` Stefan Monnier
@ 2012-10-11 16:42 ` Eli Zaretskii
  1 sibling, 0 replies; 19+ messages in thread
From: Eli Zaretskii @ 2012-10-11 16:42 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: emacs-devel

> Date: Thu, 11 Oct 2012 10:41:48 +0400
> From: Dmitry Antipov <dmantipov@yandex.ru>
> 
> This is an attempt to shrink struct vectorlike_header to the only
> 'size' field. For pseudovectors, 'size' field is rearranged to have
> one more bitfield to record the size of non-Lisp_Object area, thus
> giving the possibility to calculate the memory footprint of any
> vectorlike object.

This will probably need an update for the related x* commands defined
by src/.gdbinit.



^ permalink raw reply	[flat|nested] 19+ messages in thread

* [RFC, PATCH] shrink struct vectorlike_header #2
  2012-10-11 12:57 ` Stefan Monnier
@ 2012-11-06 17:09   ` Dmitry Antipov
  2012-11-06 18:17     ` Stefan Monnier
  2012-11-06 20:53     ` Paul Eggert
  0 siblings, 2 replies; 19+ messages in thread
From: Dmitry Antipov @ 2012-11-06 17:09 UTC (permalink / raw)
  To: Emacs development discussions; +Cc: Stefan Monnier

[-- Attachment #1: Type: text/plain, Size: 134 bytes --]

Round #2 of http://lists.gnu.org/archive/html/emacs-devel/2012-10/msg00483.html,
re-applied on top of trunk revision 110817.

Dmitry


[-- Attachment #2: shrink_vectorlike_header_2.patch --]
[-- Type: text/plain, Size: 25070 bytes --]

=== modified file 'src/alloc.c'
--- src/alloc.c	2012-10-19 06:43:12 +0000
+++ src/alloc.c	2012-11-06 17:04:48 +0000
@@ -2040,7 +2040,7 @@
   val = Fmake_vector (make_number (length_in_elts + extra_bool_elts), Qnil);
 
   /* No Lisp_Object to trace in there.  */
-  XSETPVECTYPESIZE (XVECTOR (val), PVEC_BOOL_VECTOR, 0);
+  XSETPVECTYPESIZE (XVECTOR (val), PVEC_BOOL_VECTOR, 0, 0);
 
   p = XBOOL_VECTOR (val);
   p->size = XFASTINT (length);
@@ -2619,19 +2619,44 @@
 
 #define VINDEX(nbytes) (((nbytes) - VBLOCK_BYTES_MIN) / roundup_size)
 
+/* When V is on the free list, first word after header is
+   used as a pointer to next vector on the free list.  */
+
+#define NEXT_IN_FREE_LIST(v)				\
+  (*(struct Lisp_Vector **)((char *) v + header_size))
+
 /* Common shortcut to setup vector on a free list.  */
 
-#define SETUP_ON_FREE_LIST(v, nbytes, index)			\
-  do {								\
-    XSETPVECTYPESIZE (v, PVEC_FREE, nbytes);			\
-    eassert ((nbytes) % roundup_size == 0);			\
-    (index) = VINDEX (nbytes);					\
-    eassert ((index) < VECTOR_MAX_FREE_LIST_INDEX);		\
-    (v)->header.next.vector = vector_free_lists[index];		\
-    vector_free_lists[index] = (v);				\
-    total_free_vector_slots += (nbytes) / word_size;		\
+#define SETUP_ON_FREE_LIST(v, nbytes, tmp)		\
+  do {							\
+    (tmp) = ((nbytes - header_size) / word_size);	\
+    XSETPVECTYPESIZE (v, PVEC_FREE, 0, (tmp));		\
+    eassert ((nbytes) % roundup_size == 0);		\
+    (tmp) = VINDEX (nbytes);				\
+    eassert ((tmp) < VECTOR_MAX_FREE_LIST_INDEX);	\
+    NEXT_IN_FREE_LIST (v) = vector_free_lists[tmp];	\
+    vector_free_lists[tmp] = (v);			\
+    total_free_vector_slots += (nbytes) / word_size;	\
   } while (0)
 
+/* This internal type is used to maintain the list of large vectors
+   which are allocated at their own, e.g. outside of vector blocks.  */
+
+struct large_vector
+{
+  union {
+    struct large_vector *vector;
+#if USE_LSB_TAG
+    /* We need to maintain ROUNDUP_SIZE alignment for the vector member.  */
+    unsigned char c[vroundup (sizeof (void *))];
+#endif
+  } next;
+  struct Lisp_Vector v;
+};
+
+/* This internal type is used to maintain an underlying storage
+   for small vectors.  */
+
 struct vector_block
 {
   char data[VECTOR_BLOCK_BYTES];
@@ -2649,7 +2674,7 @@
 
 /* Singly-linked list of large vectors.  */
 
-static struct Lisp_Vector *large_vectors;
+static struct large_vector *large_vectors;
 
 /* The only vector with 0 slots, allocated from pure space.  */
 
@@ -2706,8 +2731,7 @@
   if (vector_free_lists[index])
     {
       vector = vector_free_lists[index];
-      vector_free_lists[index] = vector->header.next.vector;
-      vector->header.next.nbytes = nbytes;
+      vector_free_lists[index] = NEXT_IN_FREE_LIST (vector);
       total_free_vector_slots -= nbytes / word_size;
       return vector;
     }
@@ -2721,8 +2745,7 @@
       {
 	/* This vector is larger than requested.  */
 	vector = vector_free_lists[index];
-	vector_free_lists[index] = vector->header.next.vector;
-	vector->header.next.nbytes = nbytes;
+	vector_free_lists[index] = NEXT_IN_FREE_LIST (vector);
 	total_free_vector_slots -= nbytes / word_size;
 
 	/* Excess bytes are used for the smaller vector,
@@ -2739,7 +2762,6 @@
 
   /* New vector will be at the beginning of this block.  */
   vector = (struct Lisp_Vector *) block->data;
-  vector->header.next.nbytes = nbytes;
 
   /* If the rest of space from this block is large enough
      for one-slot vector at least, set up it on a free list.  */
@@ -2759,15 +2781,30 @@
   ((char *) (vector) <= (block)->data		\
    + VECTOR_BLOCK_BYTES - VBLOCK_BYTES_MIN)
 
-/* Number of bytes used by vector-block-allocated object.  This is the only
-   place where we actually use the `nbytes' field of the vector-header.
-   I.e. we could get rid of the `nbytes' field by computing it based on the
-   vector-type.  */
-
-#define PSEUDOVECTOR_NBYTES(vector) \
-  (PSEUDOVECTOR_TYPEP (&vector->header, PVEC_FREE)	\
-   ? vector->header.size & PSEUDOVECTOR_SIZE_MASK	\
-   : vector->header.next.nbytes)
+/* Return the memory footprint of V in bytes.  */
+
+static ptrdiff_t
+vector_nbytes (struct Lisp_Vector *v)
+{
+  ptrdiff_t size = v->header.size & ~ARRAY_MARK_FLAG;
+
+  if (size & PSEUDOVECTOR_FLAG)
+    {
+      if (PSEUDOVECTOR_TYPEP (&v->header, PVEC_BOOL_VECTOR))
+	size = (bool_header_size
+		+ (((struct Lisp_Bool_Vector *) v)->size
+		   + BOOL_VECTOR_BITS_PER_CHAR - 1)
+		/ BOOL_VECTOR_BITS_PER_CHAR);
+      else
+	size = (header_size
+		+ ((size & PSEUDOVECTOR_SIZE_MASK)
+		   + ((size & PSEUDOVECTOR_REST_MASK)
+		      >> PSEUDOVECTOR_SIZE_BITS)) * word_size);
+    }
+  else
+    size = header_size + size * word_size;
+  return vroundup (size);
+}
 
 /* Reclaim space used by unmarked vectors.  */
 
@@ -2775,7 +2812,8 @@
 sweep_vectors (void)
 {
   struct vector_block *block = vector_blocks, **bprev = &vector_blocks;
-  struct Lisp_Vector *vector, *next, **vprev = &large_vectors;
+  struct large_vector *lv, **lvprev = &large_vectors;
+  struct Lisp_Vector *vector, *next;
 
   total_vectors = total_vector_slots = total_free_vector_slots = 0;
   memset (vector_free_lists, 0, sizeof (vector_free_lists));
@@ -2785,6 +2823,7 @@
   for (block = vector_blocks; block; block = *bprev)
     {
       bool free_this_block = 0;
+      ptrdiff_t nbytes;
 
       for (vector = (struct Lisp_Vector *) block->data;
 	   VECTOR_IN_BLOCK (vector, block); vector = next)
@@ -2793,14 +2832,16 @@
 	    {
 	      VECTOR_UNMARK (vector);
 	      total_vectors++;
-	      total_vector_slots += vector->header.next.nbytes / word_size;
-	      next = ADVANCE (vector, vector->header.next.nbytes);
+	      nbytes = vector_nbytes (vector);
+	      total_vector_slots += nbytes / word_size;
+	      next = ADVANCE (vector, nbytes);
 	    }
 	  else
 	    {
-	      ptrdiff_t nbytes = PSEUDOVECTOR_NBYTES (vector);
-	      ptrdiff_t total_bytes = nbytes;
+	      ptrdiff_t total_bytes;
 
+	      nbytes = vector_nbytes (vector);
+	      total_bytes = nbytes;
 	      next = ADVANCE (vector, nbytes);
 
 	      /* While NEXT is not marked, try to coalesce with VECTOR,
@@ -2810,7 +2851,7 @@
 		{
 		  if (VECTOR_MARKED_P (next))
 		    break;
-		  nbytes = PSEUDOVECTOR_NBYTES (next);
+		  nbytes = vector_nbytes (next);
 		  total_bytes += nbytes;
 		  next = ADVANCE (next, nbytes);
 		}
@@ -2844,8 +2885,9 @@
 
   /* Sweep large vectors.  */
 
-  for (vector = large_vectors; vector; vector = *vprev)
+  for (lv = large_vectors; lv; lv = *lvprev)
     {
+      vector = &lv->v;
       if (VECTOR_MARKED_P (vector))
 	{
 	  VECTOR_UNMARK (vector);
@@ -2867,12 +2909,12 @@
 	  else
 	    total_vector_slots
 	      += header_size / word_size + vector->header.size;
-	  vprev = &vector->header.next.vector;
+	  lvprev = &lv->next.vector;
 	}
       else
 	{
-	  *vprev = vector->header.next.vector;
-	  lisp_free (vector);
+	  *lvprev = lv->next.vector;
+	  lisp_free (lv);
 	}
     }
 }
@@ -2904,9 +2946,12 @@
 	p = allocate_vector_from_block (vroundup (nbytes));
       else
 	{
-	  p = lisp_malloc (nbytes, MEM_TYPE_VECTORLIKE);
-	  p->header.next.vector = large_vectors;
-	  large_vectors = p;
+	  struct large_vector *lv
+	    = lisp_malloc (sizeof (*lv) + (len - 1) * word_size,
+			   MEM_TYPE_VECTORLIKE);
+	  lv->next.vector = large_vectors;
+	  large_vectors = lv;
+	  p = &lv->v;
 	}
 
 #ifdef DOUG_LEA_MALLOC
@@ -2943,16 +2988,21 @@
 /* Allocate other vector-like structures.  */
 
 struct Lisp_Vector *
-allocate_pseudovector (int memlen, int lisplen, int tag)
+allocate_pseudovector (int memlen, int lisplen, enum pvec_type tag)
 {
   struct Lisp_Vector *v = allocate_vectorlike (memlen);
   int i;
 
+  /* Catch bogus values.  */
+  eassert (tag <= PVEC_FONT);
+  eassert (memlen - lisplen <= (1 << PSEUDOVECTOR_REST_BITS) - 1);
+  eassert (lisplen <= (1 << PSEUDOVECTOR_SIZE_BITS) - 1);
+
   /* Only the first lisplen slots will be traced normally by the GC.  */
   for (i = 0; i < lisplen; ++i)
     v->contents[i] = Qnil;
 
-  XSETPVECTYPESIZE (v, tag, lisplen);
+  XSETPVECTYPESIZE (v, tag, lisplen, memlen - lisplen);
   return v;
 }
 
@@ -2961,10 +3011,9 @@
 {
   struct buffer *b = lisp_malloc (sizeof *b, MEM_TYPE_BUFFER);
 
-  XSETPVECTYPESIZE (b, PVEC_BUFFER, (offsetof (struct buffer, own_text)
-				     - header_size) / word_size);
+  BUFFER_PVEC_INIT (b);
   /* Put B on the chain of all buffers including killed ones.  */
-  b->header.next.buffer = all_buffers;
+  b->next = all_buffers;
   all_buffers = b;
   /* Note that the rest fields of B are not initialized.  */
   return b;
@@ -4068,16 +4117,15 @@
       while (VECTOR_IN_BLOCK (vector, block)
 	     && vector <= (struct Lisp_Vector *) p)
 	{
-	  if (PSEUDOVECTOR_TYPEP (&vector->header, PVEC_FREE))
-	    vector = ADVANCE (vector, (vector->header.size
-				       & PSEUDOVECTOR_SIZE_MASK));
-	  else if (vector == p)
+	  if (!PSEUDOVECTOR_TYPEP (&vector->header, PVEC_FREE) && vector == p)
 	    return 1;
 	  else
-	    vector = ADVANCE (vector, vector->header.next.nbytes);
+	    vector = ADVANCE (vector, vector_nbytes (vector));
 	}
     }
-  else if (m->type == MEM_TYPE_VECTORLIKE && p == m->start)
+  else if (m->type == MEM_TYPE_VECTORLIKE
+	   && (char *) p == ((char *) m->start
+			     + offsetof (struct large_vector, v)))
     /* This memory node corresponds to a large vector.  */
     return 1;
   return 0;
@@ -5687,7 +5735,7 @@
 
 	if (ptr->header.size & PSEUDOVECTOR_FLAG)
 	  pvectype = ((ptr->header.size & PVEC_TYPE_MASK)
-		      >> PSEUDOVECTOR_SIZE_BITS);
+		      >> PSEUDOVECTOR_AREA_BITS);
 	else
 	  pvectype = PVEC_NORMAL_VECTOR;
 
@@ -6317,7 +6365,7 @@
     for (buffer = all_buffers; buffer; buffer = *bprev)
       if (!VECTOR_MARKED_P (buffer))
 	{
-	  *bprev = buffer->header.next.buffer;
+	  *bprev = buffer->next;
 	  lisp_free (buffer);
 	}
       else
@@ -6326,7 +6374,7 @@
 	  /* Do not use buffer_(set|get)_intervals here.  */
 	  buffer->text->intervals = balance_intervals (buffer->text->intervals);
 	  total_buffers++;
-	  bprev = &buffer->header.next.buffer;
+	  bprev = &buffer->next;
 	}
   }
 

=== modified file 'src/buffer.c'
--- src/buffer.c	2012-11-06 13:26:20 +0000
+++ src/buffer.c	2012-11-06 17:04:48 +0000
@@ -5106,11 +5106,6 @@
 init_buffer_once (void)
 {
   int idx;
-  /* If you add, remove, or reorder Lisp_Objects in a struct buffer, make
-     sure that this is still correct.  Otherwise, mark_vectorlike may not
-     trace all Lisp_Objects in buffer_defaults and buffer_local_symbols.  */
-  const int pvecsize
-    = (offsetof (struct buffer, own_text) - header_size) / word_size;
 
   memset (buffer_permanent_local_flags, 0, sizeof buffer_permanent_local_flags);
 
@@ -5133,8 +5128,8 @@
   /* This is not strictly necessary, but let's make them initialized.  */
   bset_name (&buffer_defaults, build_pure_c_string (" *buffer-defaults*"));
   bset_name (&buffer_local_symbols, build_pure_c_string (" *buffer-local-symbols*"));
-  XSETPVECTYPESIZE (&buffer_defaults, PVEC_BUFFER, pvecsize);
-  XSETPVECTYPESIZE (&buffer_local_symbols, PVEC_BUFFER, pvecsize);
+  BUFFER_PVEC_INIT (&buffer_defaults);
+  BUFFER_PVEC_INIT (&buffer_local_symbols);
 
   /* Set up the default values of various buffer slots.  */
   /* Must do these before making the first buffer! */

=== modified file 'src/buffer.h'
--- src/buffer.h	2012-10-17 04:58:15 +0000
+++ src/buffer.h	2012-11-06 17:04:48 +0000
@@ -482,11 +482,6 @@
 
 struct buffer
 {
-  /* HEADER.NEXT is the next buffer, in chain of all buffers, including killed
-     buffers.  This chain, starting from all_buffers, is used only for garbage
-     collection, in order to collect killed buffers properly.  Note that large
-     vectors and large pseudo-vector objects are all on another chain starting
-     from large_vectors.  */
   struct vectorlike_header header;
 
   /* The name of this buffer.  */
@@ -750,6 +745,9 @@
      In an indirect buffer, this is the own_text field of another buffer.  */
   struct buffer_text *text;
 
+  /* Next buffer, in chain of all buffers, including killed ones.  */
+  struct buffer *next;
+
   /* Char position of point in buffer.  */
   ptrdiff_t pt;
 
@@ -959,6 +957,26 @@
   b->INTERNAL_FIELD (width_table) = val;
 }
 
+/* Number of Lisp_Objects at the beginning of struct buffer.
+   If you add, remove, or reorder Lisp_Objects within buffer
+   structure, make sure that this is still correct.  */
+
+#define BUFFER_LISP_SIZE                                               \
+  ((offsetof (struct buffer, own_text) - header_size) / word_size)
+
+/* Size of the struct buffer part beyond leading
+   Lisp_Objects, in word_size units.  */
+
+#define BUFFER_REST_SIZE                                               \
+  ((sizeof (struct buffer) - header_size) / word_size - BUFFER_LISP_SIZE)
+
+/* Initialize the pseudovector header of buffer object.  BUFFER_LISP_SIZE
+   is required for GC, but BUFFER_REST_SIZE is set up just to be consistent
+   with other pseudovectors.  */
+
+#define BUFFER_PVEC_INIT(b)                                    \
+  XSETPVECTYPESIZE (b, PVEC_BUFFER, BUFFER_LISP_SIZE, BUFFER_REST_SIZE)
+
 /* Convenient check whether buffer B is live.  */
 
 #define BUFFER_LIVE_P(b) (!NILP (BVAR (b, name)))
@@ -986,7 +1004,7 @@
 /* Used to iterate over the chain above.  */
 
 #define FOR_EACH_BUFFER(b) \
-  for ((b) = all_buffers; (b); (b) = (b)->header.next.buffer)
+  for ((b) = all_buffers; (b); (b) = (b)->next)
 
 /* This points to the current buffer.  */
 

=== modified file 'src/fns.c'
--- src/fns.c	2012-10-19 00:54:35 +0000
+++ src/fns.c	2012-11-06 17:04:48 +0000
@@ -2076,9 +2076,8 @@
 	   are sensible to compare, so eliminate the others now.  */
 	if (size & PSEUDOVECTOR_FLAG)
 	  {
-	    if (!(size & ((PVEC_COMPILED | PVEC_CHAR_TABLE
-			   | PVEC_SUB_CHAR_TABLE | PVEC_FONT)
-			  << PSEUDOVECTOR_SIZE_BITS)))
+	    if (((size & PVEC_TYPE_MASK) >> PSEUDOVECTOR_AREA_BITS)
+		< PVEC_COMPILED)
 	      return 0;
 	    size &= PSEUDOVECTOR_SIZE_MASK;
 	  }
@@ -3661,12 +3660,9 @@
 {
   Lisp_Object table;
   struct Lisp_Hash_Table *h2;
-  struct Lisp_Vector *next;
 
   h2 = allocate_hash_table ();
-  next = h2->header.next.vector;
   *h2 = *h1;
-  h2->header.next.vector = next;
   h2->key_and_value = Fcopy_sequence (h1->key_and_value);
   h2->hash = Fcopy_sequence (h1->hash);
   h2->next = Fcopy_sequence (h1->next);

=== modified file 'src/lisp.h'
--- src/lisp.h	2012-11-06 13:26:20 +0000
+++ src/lisp.h	2012-11-06 17:04:48 +0000
@@ -401,14 +401,11 @@
   PVEC_WINDOW_CONFIGURATION,
   PVEC_SUBR,
   PVEC_OTHER,
-  /* These last 4 are special because we OR them in fns.c:internal_equal,
-     so they have to use a disjoint bit pattern:
-     if (!(size & (PVEC_COMPILED | PVEC_CHAR_TABLE
-                   | PVEC_SUB_CHAR_TABLE | PVEC_FONT))) */
-  PVEC_COMPILED			= 0x10,
-  PVEC_CHAR_TABLE		= 0x20,
-  PVEC_SUB_CHAR_TABLE		= 0x30,
-  PVEC_FONT			= 0x40
+  /* These should be last, check internal_equal to see why.  */
+  PVEC_COMPILED,
+  PVEC_CHAR_TABLE,
+  PVEC_SUB_CHAR_TABLE,
+  PVEC_FONT /* Should be last because it's used for range checking.  */
 };
 
 /* DATA_SEG_BITS forces extra bits to be or'd in with any pointers
@@ -428,9 +425,18 @@
        only the number of Lisp_Object fields (that need to be traced by GC).
        The distinction is used, e.g., by Lisp_Process, which places extra
        non-Lisp_Object fields at the end of the structure.  */
-    PSEUDOVECTOR_SIZE_BITS = 16,
+    PSEUDOVECTOR_SIZE_BITS = 12,
     PSEUDOVECTOR_SIZE_MASK = (1 << PSEUDOVECTOR_SIZE_BITS) - 1,
-    PVEC_TYPE_MASK = 0x0fff << PSEUDOVECTOR_SIZE_BITS,
+
+    /* To calculate the memory footprint of the pseudovector, it's useful
+       to store the size of non-Lisp area in word_size units here.  */
+    PSEUDOVECTOR_REST_BITS = 12,
+    PSEUDOVECTOR_REST_MASK = (((1 << PSEUDOVECTOR_REST_BITS) - 1) 
+			      << PSEUDOVECTOR_SIZE_BITS),
+
+    /* Used to extract pseudovector subtype information.  */
+    PSEUDOVECTOR_AREA_BITS = PSEUDOVECTOR_SIZE_BITS + PSEUDOVECTOR_REST_BITS,
+    PVEC_TYPE_MASK = 0x3f << PSEUDOVECTOR_AREA_BITS,
 
     /* Number of bits to put in each character in the internal representation
        of bool vectors.  This should not vary across implementations.  */
@@ -599,13 +605,13 @@
 
 /* Pseudovector types.  */
 
-#define XSETPVECTYPE(v, code) XSETTYPED_PVECTYPE (v, header.size, code)
-#define XSETTYPED_PVECTYPE(v, size_member, code) \
-  ((v)->size_member |= PSEUDOVECTOR_FLAG | ((code) << PSEUDOVECTOR_SIZE_BITS))
-#define XSETPVECTYPESIZE(v, code, sizeval) \
+#define XSETPVECTYPE(v, code)						\
+  ((v)->header.size |= PSEUDOVECTOR_FLAG | ((code) << PSEUDOVECTOR_AREA_BITS))
+#define XSETPVECTYPESIZE(v, code, lispsize, restsize)		\
   ((v)->header.size = (PSEUDOVECTOR_FLAG			\
-		       | ((code) << PSEUDOVECTOR_SIZE_BITS)	\
-		       | (sizeval)))
+		       | ((code) << PSEUDOVECTOR_AREA_BITS)	\
+		       | ((restsize) << PSEUDOVECTOR_SIZE_BITS) \
+		       | (lispsize)))
 
 /* The cast to struct vectorlike_header * avoids aliasing issues.  */
 #define XSETPSEUDOVECTOR(a, b, code) \
@@ -617,16 +623,14 @@
 #define XSETTYPED_PSEUDOVECTOR(a, b, size, code)			\
   (XSETVECTOR (a, b),							\
    eassert ((size & (PSEUDOVECTOR_FLAG | PVEC_TYPE_MASK))		\
-	    == (PSEUDOVECTOR_FLAG | (code << PSEUDOVECTOR_SIZE_BITS))))
+	    == (PSEUDOVECTOR_FLAG | (code << PSEUDOVECTOR_AREA_BITS))))
 
 #define XSETWINDOW_CONFIGURATION(a, b) \
   (XSETPSEUDOVECTOR (a, b, PVEC_WINDOW_CONFIGURATION))
 #define XSETPROCESS(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_PROCESS))
 #define XSETWINDOW(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_WINDOW))
 #define XSETTERMINAL(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_TERMINAL))
-/* XSETSUBR is special since Lisp_Subr lacks struct vectorlike_header.  */
-#define XSETSUBR(a, b) \
-  XSETTYPED_PSEUDOVECTOR (a, b, XSUBR (a)->size, PVEC_SUBR)
+#define XSETSUBR(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_SUBR))
 #define XSETCOMPILED(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_COMPILED))
 #define XSETBUFFER(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_BUFFER))
 #define XSETCHAR_TABLE(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_CHAR_TABLE))
@@ -793,7 +797,7 @@
   };
 
 /* Header of vector-like objects.  This documents the layout constraints on
-   vectors and pseudovectors other than struct Lisp_Subr.  It also prevents
+   vectors and pseudovectors (objects of PVEC_xxx subtype).  It also prevents
    compilers from being fooled by Emacs's type punning: the XSETPSEUDOVECTOR
    and PSEUDOVECTORP macros cast their pointers to struct vectorlike_header *,
    because when two such pointers potentially alias, a compiler won't
@@ -801,43 +805,26 @@
    <http://debbugs.gnu.org/cgi/bugreport.cgi?bug=8546>.  */
 struct vectorlike_header
   {
-    /* This field contains various pieces of information:
+    /* The only field contains various pieces of information:
        - The MSB (ARRAY_MARK_FLAG) holds the gcmarkbit.
        - The next bit (PSEUDOVECTOR_FLAG) indicates whether this is a plain
          vector (0) or a pseudovector (1).
        - If PSEUDOVECTOR_FLAG is 0, the rest holds the size (number
          of slots) of the vector.
-       - If PSEUDOVECTOR_FLAG is 1, the rest is subdivided into
-         a "pvec type" tag held in PVEC_TYPE_MASK and a size held in the lowest
-         PSEUDOVECTOR_SIZE_BITS.  That size normally indicates the number of
-         Lisp_Object slots at the beginning of the object that need to be
-         traced by the GC, tho some types use it slightly differently.
-       - E.g. if the pvec type is PVEC_FREE it means this is an unallocated
-         vector on a free-list and PSEUDOVECTOR_SIZE_BITS indicates its size
-         in bytes.  */
+       - If PSEUDOVECTOR_FLAG is 1, the rest is subdivided into three fields:
+	 - a) pseudovector subtype held in PVEC_TYPE_MASK field;
+	 - b) number of Lisp_Objects slots at the beginning of the object
+	   held in PSEUDOVECTOR_SIZE_MASK field.  These objects are always
+	   traced by the GC;
+	 - c) size of the rest fields held in PSEUDOVECTOR_REST_MASK and
+	   measured in word_size units.  Rest fields may also include
+	   Lisp_Objects, but these objects usually needs some special treatment
+	   during GC.
+	 There are some exceptions.  For PVEC_FREE, b) is always zero.  For
+	 PVEC_BOOL_VECTOR and PVEC_SUBR, both b) and c) are always zero.
+	 Current layout limits the pseudovectors to 63 PVEC_xxx subtypes,
+	 4095 Lisp_Objects in GC-ed area and 4095 word-sized other slots.  */
     ptrdiff_t size;
-
-    /* When the vector is allocated from a vector block, NBYTES is used
-       if the vector is not on a free list, and VECTOR is used otherwise.
-       For large vector-like objects, BUFFER or VECTOR is used as a pointer
-       to the next vector-like object.  It is generally a buffer or a
-       Lisp_Vector alias, so for convenience it is a union instead of a
-       pointer: this way, one can write P->next.vector instead of ((struct
-       Lisp_Vector *) P->next).  */
-    union {
-      /* This is only needed for small vectors that are not free because the
-	 `size' field only gives us the number of Lisp_Object slots, whereas we
-	 need to know the total size, including non-Lisp_Object data.
-	 FIXME: figure out a way to store this info elsewhere so we can
-	 finally get rid of this extra word of overhead.  */
-      ptrdiff_t nbytes;
-      struct buffer *buffer;
-      /* FIXME: This can be removed: For large vectors, this field could be
-	 placed *before* the vector itself.  And for small vectors on a free
-	 list, this field could be stored in the vector's bytes, since the
-	 empty vector is handled specially anyway.  */
-      struct Lisp_Vector *vector;
-    } next;
   };
 
 /* Regular vector is just a header plus array of Lisp_Objects.  */
@@ -1011,15 +998,11 @@
 
 /* This structure describes a built-in function.
    It is generated by the DEFUN macro only.
-   defsubr makes it into a Lisp object.
-
-   This type is treated in most respects as a pseudovector,
-   but since we never dynamically allocate or free them,
-   we don't need a struct vectorlike_header and its 'next' field.  */
+   defsubr makes it into a Lisp object.  */
 
 struct Lisp_Subr
   {
-    ptrdiff_t size;
+    struct vectorlike_header header;
     union {
       Lisp_Object (*a0) (void);
       Lisp_Object (*a1) (Lisp_Object);
@@ -1700,7 +1683,7 @@
 
 #define PSEUDOVECTOR_TYPEP(v, code)					\
   (((v)->size & (PSEUDOVECTOR_FLAG | PVEC_TYPE_MASK))			\
-   == (PSEUDOVECTOR_FLAG | ((code) << PSEUDOVECTOR_SIZE_BITS)))
+   == (PSEUDOVECTOR_FLAG | ((code) << PSEUDOVECTOR_AREA_BITS)))
 
 /* True if object X, with internal type struct T *, is a pseudovector whose
    code is CODE.  */
@@ -1713,8 +1696,7 @@
 #define PROCESSP(x) PSEUDOVECTORP (x, PVEC_PROCESS)
 #define WINDOWP(x) PSEUDOVECTORP (x, PVEC_WINDOW)
 #define TERMINALP(x) PSEUDOVECTORP (x, PVEC_TERMINAL)
-/* SUBRP is special since Lisp_Subr lacks struct vectorlike_header.  */
-#define SUBRP(x) TYPED_PSEUDOVECTORP (x, Lisp_Subr, PVEC_SUBR)
+#define SUBRP(x) PSEUDOVECTORP (x, PVEC_SUBR)
 #define COMPILEDP(x) PSEUDOVECTORP (x, PVEC_COMPILED)
 #define BUFFERP(x) PSEUDOVECTORP (x, PVEC_BUFFER)
 #define CHAR_TABLE_P(x) PSEUDOVECTORP (x, PVEC_CHAR_TABLE)
@@ -1889,8 +1871,8 @@
 #define DEFUN(lname, fnname, sname, minargs, maxargs, intspec, doc)	\
    Lisp_Object fnname DEFUN_ARGS_ ## maxargs ;				\
    static struct Lisp_Subr alignas (GCALIGNMENT) sname =		\
-   { (PVEC_SUBR << PSEUDOVECTOR_SIZE_BITS)				\
-     | (sizeof (struct Lisp_Subr) / sizeof (EMACS_INT)),		\
+   { { (PVEC_SUBR << PSEUDOVECTOR_AREA_BITS)				\
+       | (sizeof (struct Lisp_Subr) / sizeof (EMACS_INT)) },		\
       { (Lisp_Object (__cdecl *)(void))fnname },                        \
        minargs, maxargs, lname, intspec, 0};				\
    Lisp_Object fnname
@@ -1898,8 +1880,8 @@
 #define DEFUN(lname, fnname, sname, minargs, maxargs, intspec, doc)	\
    Lisp_Object fnname DEFUN_ARGS_ ## maxargs ;				\
    static struct Lisp_Subr alignas (GCALIGNMENT) sname =		\
-     { PVEC_SUBR << PSEUDOVECTOR_SIZE_BITS,				\
-      { .a ## maxargs = fnname },					\
+     { { PVEC_SUBR << PSEUDOVECTOR_AREA_BITS },				\
+       { .a ## maxargs = fnname },					\
        minargs, maxargs, lname, intspec, 0};				\
    Lisp_Object fnname
 #endif
@@ -2946,7 +2928,7 @@
 extern Lisp_Object Qautomatic_gc;
 extern Lisp_Object Qchar_table_extra_slots;
 extern struct Lisp_Vector *allocate_vector (EMACS_INT);
-extern struct Lisp_Vector *allocate_pseudovector (int memlen, int lisplen, int tag);
+extern struct Lisp_Vector *allocate_pseudovector (int, int, enum pvec_type);
 #define ALLOCATE_PSEUDOVECTOR(typ,field,tag)				\
   ((typ*)								\
    allocate_pseudovector						\

=== modified file 'src/lread.c'
--- src/lread.c	2012-10-20 12:50:49 +0000
+++ src/lread.c	2012-11-06 17:04:48 +0000
@@ -3981,7 +3981,7 @@
 {
   Lisp_Object sym, tem;
   sym = intern_c_string (sname->symbol_name);
-  XSETTYPED_PVECTYPE (sname, size, PVEC_SUBR);
+  XSETPVECTYPE (sname, PVEC_SUBR);
   XSETSUBR (tem, sname);
   set_symbol_function (sym, tem);
 }


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC, PATCH] shrink struct vectorlike_header #2
  2012-11-06 17:09   ` [RFC, PATCH] shrink struct vectorlike_header #2 Dmitry Antipov
@ 2012-11-06 18:17     ` Stefan Monnier
  2012-11-07 14:57       ` Dmitry Antipov
  2012-11-06 20:53     ` Paul Eggert
  1 sibling, 1 reply; 19+ messages in thread
From: Stefan Monnier @ 2012-11-06 18:17 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: Emacs development discussions

> +/* When V is on the free list, first word after header is
> +   used as a pointer to next vector on the free list.  */
> +
> +#define NEXT_IN_FREE_LIST(v)				\
> +  (*(struct Lisp_Vector **)((char *) v + header_size))
> +
>  /* Common shortcut to setup vector on a free list.  */

Why change the comment rather than the code.  IOW what don't you like in:

   Please make the code match the comment, e.g:
     (*(struct Lisp_Vector **)&(v->contents[0]))

I personally find it much more elegant and robust than doing pointer
arithmetic via conversion to char*.

The rest looks fine, tho it still probably lacks the corresponding
changes in .gdbinit, right?


        Stefan



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC, PATCH] shrink struct vectorlike_header #2
  2012-11-06 17:09   ` [RFC, PATCH] shrink struct vectorlike_header #2 Dmitry Antipov
  2012-11-06 18:17     ` Stefan Monnier
@ 2012-11-06 20:53     ` Paul Eggert
  2012-11-06 21:28       ` Eli Zaretskii
  1 sibling, 1 reply; 19+ messages in thread
From: Paul Eggert @ 2012-11-06 20:53 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: Stefan Monnier, Emacs development discussions

> +struct large_vector
> +{
> +  union {
> +    struct large_vector *vector;
> +#if USE_LSB_TAG
> +    /* We need to maintain ROUNDUP_SIZE alignment for the vector member.  */
> +    unsigned char c[vroundup (sizeof (void *))];
> +#endif

That 'void *' should be 'struct large_vector *', mostly for readability
(but also for weird architectures where different pointer types have
different sizes).

> +/* Size of the struct buffer part beyond leading
> +   Lisp_Objects, in word_size units.  */
> +
> +#define BUFFER_REST_SIZE                                               \
> +  ((sizeof (struct buffer) - header_size) / word_size - BUFFER_LISP_SIZE)

This assumes (sizeof (struct buffer) - header_size) is a
multiple of word_size, but that's not necessarily the case.  Does it
matter that BUFFER_REST_SIZE might be incorrect?  If so, it needs to
be fixed; if not, there should be a comment explaining why it doesn't
matter whether the value is correct.

Otherwise, looks good; thanks.




^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC, PATCH] shrink struct vectorlike_header #2
  2012-11-06 20:53     ` Paul Eggert
@ 2012-11-06 21:28       ` Eli Zaretskii
  0 siblings, 0 replies; 19+ messages in thread
From: Eli Zaretskii @ 2012-11-06 21:28 UTC (permalink / raw)
  To: Paul Eggert; +Cc: dmantipov, monnier, emacs-devel

> Date: Tue, 06 Nov 2012 12:53:19 -0800
> From: Paul Eggert <eggert@cs.ucla.edu>
> Cc: Stefan Monnier <monnier@iro.umontreal.ca>,
> 	Emacs development discussions <emacs-devel@gnu.org>
> 
> > +/* Size of the struct buffer part beyond leading
> > +   Lisp_Objects, in word_size units.  */
> > +
> > +#define BUFFER_REST_SIZE                                               \
> > +  ((sizeof (struct buffer) - header_size) / word_size - BUFFER_LISP_SIZE)
> 
> This assumes (sizeof (struct buffer) - header_size) is a
> multiple of word_size, but that's not necessarily the case.

Shouldn't this use offsetof for safer results?



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC, PATCH] shrink struct vectorlike_header #2
  2012-11-06 18:17     ` Stefan Monnier
@ 2012-11-07 14:57       ` Dmitry Antipov
  2012-11-08  3:08         ` Stefan Monnier
  0 siblings, 1 reply; 19+ messages in thread
From: Dmitry Antipov @ 2012-11-07 14:57 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: Emacs development discussions

On 11/06/2012 10:17 PM, Stefan Monnier wrote:

>> +/* When V is on the free list, first word after header is
>> +   used as a pointer to next vector on the free list.  */
>> +
>> +#define NEXT_IN_FREE_LIST(v)				\
>> +  (*(struct Lisp_Vector **)((char *) v + header_size))
>> +
>>   /* Common shortcut to setup vector on a free list.  */
>
> Why change the comment rather than the code.  IOW what don't you like in:
>
>     Please make the code match the comment, e.g:
>       (*(struct Lisp_Vector **)&(v->contents[0]))
>
> I personally find it much more elegant and robust than doing pointer
> arithmetic via conversion to char*.

I agree that your code looks better, but it causes 'dereferencing
type-punned pointer will break strict-aliasing rules' warning
(and so error if --enable-gcc-warnings, gcc 4.7.2).

> The rest looks fine, tho it still probably lacks the corresponding
> changes in .gdbinit, right?

Yes, it's still on the way...

Dmitry





^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC, PATCH] shrink struct vectorlike_header #2
  2012-11-07 14:57       ` Dmitry Antipov
@ 2012-11-08  3:08         ` Stefan Monnier
  2012-11-08  5:25           ` Paul Eggert
  2012-11-08  6:56           ` Stephen J. Turnbull
  0 siblings, 2 replies; 19+ messages in thread
From: Stefan Monnier @ 2012-11-08  3:08 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: Emacs development discussions

> I agree that your code looks better, but it causes 'dereferencing
> type-punned pointer will break strict-aliasing rules' warning
> (and so error if --enable-gcc-warnings, gcc 4.7.2).

I doubt (*(struct Lisp_Vector **)((char *) v + header_size))
is any better.  I guess it just defeats gcc's detection of the problem.
Maybe someone like Paul might have an idea how to solve this "cleanly"?
In the mean time, feel free to use your code, but please add my code in
comment with a note about why we don't use it.


        Stefan



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC, PATCH] shrink struct vectorlike_header #2
  2012-11-08  3:08         ` Stefan Monnier
@ 2012-11-08  5:25           ` Paul Eggert
  2012-11-08 13:31             ` Dmitry Antipov
                               ` (2 more replies)
  2012-11-08  6:56           ` Stephen J. Turnbull
  1 sibling, 3 replies; 19+ messages in thread
From: Paul Eggert @ 2012-11-08  5:25 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: Dmitry Antipov, Emacs development discussions

On 11/07/2012 07:08 PM, Stefan Monnier wrote:
> I doubt (*(struct Lisp_Vector **)((char *) v + header_size))
> is any better.  I guess it just defeats gcc's detection of the problem.

Casting through char * is better, because the C standard
says that a compiler cannot do type-based alias inferencing
in the presence of char * pointers.  I presume this is why GCC
generates all those warnings when we don't use char * -- GCC is
warning us that it may be doing optimizations that will crash
our code.

But better yet would be to omit the casts entirely.
This might let GCC do more optimizations safely.  That is,
change struct Lisp_Vector to be something like this:

struct Lisp_Vector
  {
     struct vectorlike_header header;
     union
       {
          Lisp_Object contents[1];
          struct Lisp_Vector *next;
       } u;
  };

Replace all current uses of 'contents'
with 'u.contents', and then use u.next
when you want to use the memory as a next field.
No pointer-casts necessary, and the union is more likely to
work correctly and efficiency than either of the techniques
that involve casting pointers.



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC, PATCH] shrink struct vectorlike_header #2
  2012-11-08  3:08         ` Stefan Monnier
  2012-11-08  5:25           ` Paul Eggert
@ 2012-11-08  6:56           ` Stephen J. Turnbull
  1 sibling, 0 replies; 19+ messages in thread
From: Stephen J. Turnbull @ 2012-11-08  6:56 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: Dmitry Antipov, Emacs development discussions

Stefan Monnier writes:

 > I doubt (*(struct Lisp_Vector **)((char *) v + header_size))
 > is any better.  I guess it just defeats gcc's detection of the
 > problem.

Not true, in general.  Martin Buchholz (IIRC) reported that (at
least for some constructs, for some versions of GCC, and specific
optimization settings -- this was a few years ago) such gymnastics
change the code generated (presumably from something incorrect to
something correct).




^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC, PATCH] shrink struct vectorlike_header #2
  2012-11-08  5:25           ` Paul Eggert
@ 2012-11-08 13:31             ` Dmitry Antipov
  2012-11-08 14:03             ` Stefan Monnier
  2012-11-08 17:42             ` Nix
  2 siblings, 0 replies; 19+ messages in thread
From: Dmitry Antipov @ 2012-11-08 13:31 UTC (permalink / raw)
  To: Paul Eggert; +Cc: Stefan Monnier, Emacs development discussions

On 11/08/2012 09:25 AM, Paul Eggert wrote:

> But better yet would be to omit the casts entirely.
> This might let GCC do more optimizations safely.  That is,
> change struct Lisp_Vector to be something like this:
>
> struct Lisp_Vector
>    {
>       struct vectorlike_header header;
>       union
>         {
>            Lisp_Object contents[1];
>            struct Lisp_Vector *next;
>         } u;
>    };

This is the most clean way, but

> Replace all current uses of 'contents'
> with 'u.contents',

is it worth doing such a massive change just to avoid one little glitch?

Dmitry




^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC, PATCH] shrink struct vectorlike_header #2
  2012-11-08  5:25           ` Paul Eggert
  2012-11-08 13:31             ` Dmitry Antipov
@ 2012-11-08 14:03             ` Stefan Monnier
  2012-11-08 14:45               ` Dmitry Antipov
  2012-11-08 17:12               ` Andreas Schwab
  2012-11-08 17:42             ` Nix
  2 siblings, 2 replies; 19+ messages in thread
From: Stefan Monnier @ 2012-11-08 14:03 UTC (permalink / raw)
  To: Paul Eggert; +Cc: Dmitry Antipov, Emacs development discussions

>> I doubt (*(struct Lisp_Vector **)((char *) v + header_size))
>> is any better.  I guess it just defeats gcc's detection of the problem.
> Casting through char * is better, because the C standard
> says that a compiler cannot do type-based alias inferencing
> in the presence of char * pointers.

That's so backwards: rather than force people to use low-level fiddly
code, why can't they say "oh wait, you're doing some funny cast, let's
be more conservative with type-based aliasing, as if the code used
char*".

> I presume this is why GCC generates all those warnings when we don't
> use char * -- GCC is warning us that it may be doing optimizations
> that will crash our code.

IOW it's telling us "beware that even though I have here evidence that
type-based alias analysis is unsafe for your code, I'll go ahead and use
it anyway even though I know how not to use it since I have to do that
when you use char* casts!".  Nice.

> struct Lisp_Vector
>   {
>      struct vectorlike_header header;
>      union
>        {
>           Lisp_Object contents[1];
>           struct Lisp_Vector *next;
>        } u;
>   };

Yes, that'd be better.


        Stefan



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC, PATCH] shrink struct vectorlike_header #2
  2012-11-08 14:03             ` Stefan Monnier
@ 2012-11-08 14:45               ` Dmitry Antipov
  2012-11-08 15:08                 ` Dmitry Antipov
  2012-11-08 17:12               ` Andreas Schwab
  1 sibling, 1 reply; 19+ messages in thread
From: Dmitry Antipov @ 2012-11-08 14:45 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: Paul Eggert, Emacs development discussions

On 11/08/2012 06:03 PM, Stefan Monnier wrote:

>> struct Lisp_Vector
>>    {
>>       struct vectorlike_header header;
>>       union
>>         {
>>            Lisp_Object contents[1];
>>            struct Lisp_Vector *next;
>>         } u;
>>    };
>
> Yes, that'd be better.

This will be a quite large patch, with a lot of small changes,
and I don't want to mix them with the core logical changes.
So if others votes for such a change, I would rather like to
do it separately.

Dmitry




^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC, PATCH] shrink struct vectorlike_header #2
  2012-11-08 14:45               ` Dmitry Antipov
@ 2012-11-08 15:08                 ` Dmitry Antipov
  2012-11-08 16:30                   ` Paul Eggert
  0 siblings, 1 reply; 19+ messages in thread
From: Dmitry Antipov @ 2012-11-08 15:08 UTC (permalink / raw)
  To: Emacs development discussions; +Cc: Paul Eggert, Stefan Monnier

[-- Attachment #1: Type: text/plain, Size: 652 bytes --]

On 11/08/2012 06:45 PM, Dmitry Antipov wrote:

> On 11/08/2012 06:03 PM, Stefan Monnier wrote:
>
>>> struct Lisp_Vector
>>>    {
>>>       struct vectorlike_header header;
>>>       union
>>>         {
>>>            Lisp_Object contents[1];
>>>            struct Lisp_Vector *next;
>>>         } u;
>>>    };
>>
>> Yes, that'd be better.
>
> This will be a quite large patch, with a lot of small changes,
> and I don't want to mix them with the core logical changes.
> So if others votes for such a change, I would rather like to
> do it separately.

Argh, there is another solution (comes into my head immediately
after commit, of course).

Dmitry



[-- Attachment #2: lisp_vectorlike_free.patch --]
[-- Type: text/plain, Size: 1067 bytes --]

=== modified file 'src/alloc.c'
--- src/alloc.c	2012-11-08 14:10:28 +0000
+++ src/alloc.c	2012-11-08 15:05:02 +0000
@@ -2611,16 +2611,18 @@
 
 #define VINDEX(nbytes) (((nbytes) - VBLOCK_BYTES_MIN) / roundup_size)
 
-/* When V is on the free list, first word after header is used as a pointer
-   to next vector on the free list.  It might be done in a better way with:
-
-   (*(struct Lisp_Vector **)&(v->contents[0]))
-
-   but this breaks GCC's strict-aliasing rules (which looks more relaxed
-   for char and void pointers).  */
-
-#define NEXT_IN_FREE_LIST(v)				\
-  (*(struct Lisp_Vector **)((char *) v + header_size))
+/* This is a pseudo vectorlike object used to represent any
+   block-allocated vectorlike object on the free list.  */
+
+struct Lisp_Vectorlike_Free
+{
+  struct vectorlike_header header;
+  struct Lisp_Vector *next;
+};
+
+/* When V is on the free list, it's always treated as Lisp_Vectorlike_Free.  */
+
+#define NEXT_IN_FREE_LIST(v) ((struct Lisp_Vectorlike_Free *) v)->next
 
 /* Common shortcut to setup vector on a free list.  */
 


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC, PATCH] shrink struct vectorlike_header #2
  2012-11-08 15:08                 ` Dmitry Antipov
@ 2012-11-08 16:30                   ` Paul Eggert
  0 siblings, 0 replies; 19+ messages in thread
From: Paul Eggert @ 2012-11-08 16:30 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: Stefan Monnier, Emacs development discussions

On 11/08/2012 07:08 AM, Dmitry Antipov wrote:
> +#define NEXT_IN_FREE_LIST(v) ((struct Lisp_Vectorlike_Free *) v)->next

This solution is nicer than what we have, but it still
has the problem of casting pointers, which would let
the C compiler do optimizations that we don't want to
allow.

Another possibility would be something like this:

  static struct Lisp_Vector *
  next_in_free_list (struct Lisp_Vector *v)
  {
    intptr_t i = XIL (v->contents[0]);
    return (struct Lisp_Vector *) i;
  }
  static void
  set_next_in_free_list (struct Lisp_Vector *v, struct Lisp_Vector *next)
  {
    v->contents[0] = XIL ((intptr_t) (next));
  }

That is, define a setter as well as a getter, and use the setter
when modifying the next field.  This would be simpler and
arguably cleaner than the union and would be just as safe, since we
know that Lisp_Object is at least as wide as intptr_t.



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC, PATCH] shrink struct vectorlike_header #2
  2012-11-08 14:03             ` Stefan Monnier
  2012-11-08 14:45               ` Dmitry Antipov
@ 2012-11-08 17:12               ` Andreas Schwab
  1 sibling, 0 replies; 19+ messages in thread
From: Andreas Schwab @ 2012-11-08 17:12 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: Paul Eggert, Dmitry Antipov, Emacs development discussions

Stefan Monnier <monnier@iro.umontreal.ca> writes:

> That's so backwards: rather than force people to use low-level fiddly
> code, why can't they say "oh wait, you're doing some funny cast, let's
> be more conservative with type-based aliasing, as if the code used
> char*".

You can do that with the may_alias attribute.

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC, PATCH] shrink struct vectorlike_header #2
  2012-11-08  5:25           ` Paul Eggert
  2012-11-08 13:31             ` Dmitry Antipov
  2012-11-08 14:03             ` Stefan Monnier
@ 2012-11-08 17:42             ` Nix
  2012-11-09 18:04               ` Andreas Rottmann
  2 siblings, 1 reply; 19+ messages in thread
From: Nix @ 2012-11-08 17:42 UTC (permalink / raw)
  To: Paul Eggert; +Cc: Dmitry Antipov, Stefan Monnier, Emacs development discussions

On 8 Nov 2012, Paul Eggert spake thusly:
> But better yet would be to omit the casts entirely.
> This might let GCC do more optimizations safely.  That is,
> change struct Lisp_Vector to be something like this:
>
> struct Lisp_Vector
>   {
>      struct vectorlike_header header;
>      union
>        {
>           Lisp_Object contents[1];
>           struct Lisp_Vector *next;
>        } u;
>   };
>
> Replace all current uses of 'contents'
> with 'u.contents', and then use u.next
> when you want to use the memory as a next field.

It's a shame we can't use an unnamed union here:

struct Lisp_Vector
  {
     struct vectorlike_header header;
     union
       {
          Lisp_Object contents[1];
          struct Lisp_Vector *next;
       };
  };

No changes to source necessary.

Downside: GCC extension :( which probably rules it out unless wrapped in
compiler-specific macro trickery, and if you're doing that you might as
well just unconditionally use a named union.

Sigh.

-- 
NULL && (void)



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [RFC, PATCH] shrink struct vectorlike_header #2
  2012-11-08 17:42             ` Nix
@ 2012-11-09 18:04               ` Andreas Rottmann
  0 siblings, 0 replies; 19+ messages in thread
From: Andreas Rottmann @ 2012-11-09 18:04 UTC (permalink / raw)
  To: Nix
  Cc: Paul Eggert, Dmitry Antipov, Stefan Monnier,
	Emacs development discussions

Nix <nix@esperi.org.uk> writes:

> On 8 Nov 2012, Paul Eggert spake thusly:
>> But better yet would be to omit the casts entirely.
>> This might let GCC do more optimizations safely.  That is,
>> change struct Lisp_Vector to be something like this:
>>
>> struct Lisp_Vector
>>   {
>>      struct vectorlike_header header;
>>      union
>>        {
>>           Lisp_Object contents[1];
>>           struct Lisp_Vector *next;
>>        } u;
>>   };
>>
>> Replace all current uses of 'contents'
>> with 'u.contents', and then use u.next
>> when you want to use the memory as a next field.
>
> It's a shame we can't use an unnamed union here:
>
> struct Lisp_Vector
>   {
>      struct vectorlike_header header;
>      union
>        {
>           Lisp_Object contents[1];
>           struct Lisp_Vector *next;
>        };
>   };
>
> No changes to source necessary.
>
> Downside: GCC extension :( which probably rules it out unless wrapped in
> compiler-specific macro trickery, and if you're doing that you might as
> well just unconditionally use a named union.
>
At least according to Wikipedia[0], anonymous unions and structs are
part of C11, so they're not compiler-specific any longer.  However,
given the newness of C11, similiar considerations regarding portability
to non-GCC compilers may still apply.

[0] http://en.wikipedia.org/wiki/C11_(C_standard_revision)

Regards, Rotty
-- 
Andreas Rottmann -- <http://rotty.xx.vu/>



^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2012-11-09 18:04 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-10-11  6:41 [PATCH] shrink struct vectorlike_header Dmitry Antipov
2012-10-11 12:57 ` Stefan Monnier
2012-11-06 17:09   ` [RFC, PATCH] shrink struct vectorlike_header #2 Dmitry Antipov
2012-11-06 18:17     ` Stefan Monnier
2012-11-07 14:57       ` Dmitry Antipov
2012-11-08  3:08         ` Stefan Monnier
2012-11-08  5:25           ` Paul Eggert
2012-11-08 13:31             ` Dmitry Antipov
2012-11-08 14:03             ` Stefan Monnier
2012-11-08 14:45               ` Dmitry Antipov
2012-11-08 15:08                 ` Dmitry Antipov
2012-11-08 16:30                   ` Paul Eggert
2012-11-08 17:12               ` Andreas Schwab
2012-11-08 17:42             ` Nix
2012-11-09 18:04               ` Andreas Rottmann
2012-11-08  6:56           ` Stephen J. Turnbull
2012-11-06 20:53     ` Paul Eggert
2012-11-06 21:28       ` Eli Zaretskii
2012-10-11 16:42 ` [PATCH] shrink struct vectorlike_header Eli Zaretskii

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).