all messages for Emacs-related lists mirrored at yhetil.org
 help / color / mirror / code / Atom feed
* [patch] mem_node shrink #2
@ 2007-11-27 16:34 Dmitry Antipov
  2007-11-29  1:04 ` Richard Stallman
  0 siblings, 1 reply; 7+ messages in thread
From: Dmitry Antipov @ 2007-11-27 16:34 UTC (permalink / raw)
  To: emacs-devel

[-- Attachment #1: Type: text/plain, Size: 588 bytes --]

This is a second attempt with struct mem_node shrink stuff. It also touches xrealloc due to the reasons described at http://lists.gnu.org/archive/html/emacs-devel/2007-11/msg01583.html.



On 64-bit system with glibc malloc, the benefits are even more - for 48-bytes mem_node, 56-bytes chunk is allocated in the best (normal) case, and 72-bytes chunk is allocated rarely. This patch reduces mem_node to 40 bytes, which is an exact allocation size in the most of the cases (rare worst cases are 56-bytes chunks). That's for the case if GC_MALLOC_CHECK isn't defined, of course.



Dmitry


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: mem_node_shrink_2.patch --]
[-- Type: text/x-patch; name="mem_node_shrink_2.patch", Size: 6013 bytes --]

Index: alloc.c
===================================================================
RCS file: /sources/emacs/emacs/src/alloc.c,v
retrieving revision 1.432
diff -u -r1.432 alloc.c
--- alloc.c	16 Nov 2007 21:24:59 -0000	1.432
+++ alloc.c	27 Nov 2007 16:03:47 -0000
@@ -430,6 +430,19 @@
    description of red-black trees.  Any book worth its salt should
    describe them.  */
 
+#ifdef GC_MALLOC_CHECK
+/* If this is defined, xmalloc and xrealloc calls mem_insert.  On 64-bit
+   system, if neither USE_MMAP_FOR_BUFFERS nor REL_ALLOC are used, xmalloc
+   or xrealloc may allocate buffer text which size fits into integer, but
+   not into BITS_PER_INT - 4 bits wide bit field.  */
+#define MEM_NODE_SIZE_WIDTH (BITS_PER_EMACS_INT - 4)
+#else
+/* Otherwise, every region which is passed to mem_insert is never more than
+   256M.  */
+#define MEM_NODE_SIZE_WIDTH (BITS_PER_INT - 4)
+#endif
+#define MEM_NODE_SIZE_MAX ((1 << MEM_NODE_SIZE_WIDTH) - 1)
+
 struct mem_node
 {
   /* Children of this node.  These pointers are never NULL.  When there
@@ -439,14 +452,18 @@
   /* The parent of this node.  In the root node, this is NULL.  */
   struct mem_node *parent;
 
-  /* Start and end of allocated region.  */
-  void *start, *end;
+  /* Start of allocated region.  */
+  void *start;
+
+  /* Size of allocated region.  */
+  unsigned size : MEM_NODE_SIZE_WIDTH;
 
   /* Node color.  */
-  enum {MEM_BLACK, MEM_RED} color;
+  enum {MEM_BLACK, MEM_RED} color : 1;
 
-  /* Memory type.  */
-  enum mem_type type;
+  /* Memory type.  The width of this bitfield should be
+     enough to hold any possible mem_type value.  */
+  enum mem_type type : 3;
 };
 
 /* Base address of stack.  Set in main.  */
@@ -480,7 +497,7 @@
 static void mark_maybe_object P_ ((Lisp_Object));
 static void mark_memory P_ ((void *, void *, int));
 static void mem_init P_ ((void));
-static struct mem_node *mem_insert P_ ((void *, void *, enum mem_type));
+static struct mem_node *mem_insert P_ ((void *, size_t, enum mem_type));
 static void mem_insert_fixup P_ ((struct mem_node *));
 static void mem_rotate_left P_ ((struct mem_node *));
 static void mem_rotate_right P_ ((struct mem_node *));
@@ -781,9 +798,16 @@
   register POINTER_TYPE *val;
 
   MALLOC_BLOCK_INPUT;
+  /* Some mallocs returns valid pointer even if SIZE is 0.
+     We're just doing free (PTR) here.  */
+  if (size == 0)
+    {
+      free (block);
+      val = NULL;
+    }
   /* We must call malloc explicitly when BLOCK is 0, since some
      reallocs don't do this.  */
-  if (! block)
+  else if (! block)
     val = (POINTER_TYPE *) malloc (size);
   else
     val = (POINTER_TYPE *) realloc (block, size);
@@ -880,7 +904,7 @@
 
 #if GC_MARK_STACK && !defined GC_MALLOC_CHECK
   if (val && type != MEM_TYPE_NON_LISP)
-    mem_insert (val, (char *) val + nbytes, type);
+    mem_insert (val, nbytes, type);
 #endif
 
   MALLOC_UNBLOCK_INPUT;
@@ -1090,7 +1114,7 @@
 
 #if GC_MARK_STACK && !defined GC_MALLOC_CHECK
   if (val && type != MEM_TYPE_NON_LISP)
-    mem_insert (val, (char *) val + nbytes, type);
+    mem_insert (val, nbytes, type);
 #endif
 
   MALLOC_UNBLOCK_INPUT;
@@ -1263,14 +1287,13 @@
 	fprintf (stderr, "Malloc returned %p which is already in use\n",
 		 value);
 	fprintf (stderr, "Region in use is %p...%p, %u bytes, type %d\n",
-		 m->start, m->end, (char *) m->end - (char *) m->start,
-		 m->type);
+		 m->start, m->start + m->size, m->size, m->type);
 	abort ();
       }
 
     if (!dont_register_blocks)
       {
-	mem_insert (value, (char *) value + max (1, size), allocated_mem_type);
+	mem_insert (value, size, allocated_mem_type);
 	allocated_mem_type = MEM_TYPE_NON_LISP;
       }
   }
@@ -1330,9 +1353,7 @@
 	fprintf (stderr, "Realloc returns memory that is already in use\n");
 	abort ();
       }
-
-    /* Can't handle zero size regions in the red-black tree.  */
-    mem_insert (value, (char *) value + max (size, 1), MEM_TYPE_NON_LISP);
+    mem_insert (value, size, MEM_TYPE_NON_LISP);
   }
 
   /* fprintf (stderr, "%p <- realloc\n", value); */
@@ -3543,7 +3564,8 @@
   mem_z.left = mem_z.right = MEM_NIL;
   mem_z.parent = NULL;
   mem_z.color = MEM_BLACK;
-  mem_z.start = mem_z.end = NULL;
+  mem_z.start = NULL;
+  mem_z.size = 0;
   mem_root = MEM_NIL;
 }
 
@@ -3562,10 +3584,10 @@
 
   /* Make the search always successful to speed up the loop below.  */
   mem_z.start = start;
-  mem_z.end = (char *) start + 1;
+  mem_z.size = 1;
 
   p = mem_root;
-  while (start < p->start || start >= p->end)
+  while (start < p->start || start >= p->start + p->size)
     p = start < p->start ? p->left : p->right;
   return p;
 }
@@ -3576,16 +3598,20 @@
    pointer to the node that was inserted.  */
 
 static struct mem_node *
-mem_insert (start, end, type)
-     void *start, *end;
+mem_insert (start, size, type)
+     void *start;
+     size_t size;
      enum mem_type type;
 {
   struct mem_node *c, *parent, *x;
 
+  if (size == 0 || size > MEM_NODE_SIZE_MAX)
+    abort ();
+
   if (min_heap_address == NULL || start < min_heap_address)
     min_heap_address = start;
-  if (max_heap_address == NULL || end > max_heap_address)
-    max_heap_address = end;
+  if (max_heap_address == NULL || start + size > max_heap_address)
+    max_heap_address = start + size;
 
   /* See where in the tree a node for START belongs.  In this
      particular application, it shouldn't happen that a node is already
@@ -3597,7 +3623,7 @@
 
   while (c != MEM_NIL)
     {
-      if (start >= c->start && start < c->end)
+      if (start >= c->start && start < c->start + c->size)
 	abort ();
       parent = c;
       c = start < c->start ? c->left : c->right;
@@ -3622,7 +3648,7 @@
   x = (struct mem_node *) xmalloc (sizeof *x);
 #endif
   x->start = start;
-  x->end = end;
+  x->size = size;
   x->type = type;
   x->parent = parent;
   x->left = x->right = MEM_NIL;
@@ -3835,7 +3861,7 @@
   if (y != z)
     {
       z->start = y->start;
-      z->end = y->end;
+      z->size = y->size;
       z->type = y->type;
     }
 

[-- Attachment #3: Type: text/plain, Size: 142 bytes --]

_______________________________________________
Emacs-devel mailing list
Emacs-devel@gnu.org
http://lists.gnu.org/mailman/listinfo/emacs-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch] mem_node shrink #2
  2007-11-27 16:34 [patch] mem_node shrink #2 Dmitry Antipov
@ 2007-11-29  1:04 ` Richard Stallman
  2007-11-29 15:43   ` Dmitry Antipov
  0 siblings, 1 reply; 7+ messages in thread
From: Richard Stallman @ 2007-11-29  1:04 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: emacs-devel

It looks good to me, except that here

    +  /* Size of allocated region.  */
    +  unsigned size : MEM_NODE_SIZE_WIDTH;

The comment should explain WHY it is defined that way.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch] mem_node shrink #2
  2007-11-29  1:04 ` Richard Stallman
@ 2007-11-29 15:43   ` Dmitry Antipov
  2007-11-29 18:48     ` Stefan Monnier
  2007-11-29 18:57     ` Jan D.
  0 siblings, 2 replies; 7+ messages in thread
From: Dmitry Antipov @ 2007-11-29 15:43 UTC (permalink / raw)
  To: rms; +Cc: emacs-devel

[-- Attachment #1: Type: text/plain, Size: 295 bytes --]

Richard Stallman wrote:
> It looks good to me, except that here
> 
>     +  /* Size of allocated region.  */
>     +  unsigned size : MEM_NODE_SIZE_WIDTH;
> 
> The comment should explain WHY it is defined that way.

OK. BTW, is there a (supported) system where gmalloc.c is still used ?

Dmitry

[-- Attachment #2: mem_node_shrink_3.patch --]
[-- Type: text/x-patch, Size: 6243 bytes --]

Index: alloc.c
===================================================================
RCS file: /sources/emacs/emacs/src/alloc.c,v
retrieving revision 1.432
diff -u -r1.432 alloc.c
--- alloc.c	16 Nov 2007 21:24:59 -0000	1.432
+++ alloc.c	29 Nov 2007 15:39:56 -0000
@@ -430,6 +430,19 @@
    description of red-black trees.  Any book worth its salt should
    describe them.  */
 
+#ifdef GC_MALLOC_CHECK
+/* If this is defined, xmalloc and xrealloc calls mem_insert.  On 64-bit
+   system, if neither USE_MMAP_FOR_BUFFERS nor REL_ALLOC are used, xmalloc
+   or xrealloc may allocate buffer text which size fits into integer, but
+   not into BITS_PER_INT - 4 bits wide bit field.  */
+#define MEM_NODE_SIZE_WIDTH (BITS_PER_EMACS_INT - 4)
+#else
+/* Otherwise, every region which is passed to mem_insert is never larger
+   than 256M, so it's size fits into BITS_PER_INT - 4 bits wide bit field.  */
+#define MEM_NODE_SIZE_WIDTH (BITS_PER_INT - 4)
+#endif
+#define MEM_NODE_SIZE_MAX ((1 << MEM_NODE_SIZE_WIDTH) - 1)
+
 struct mem_node
 {
   /* Children of this node.  These pointers are never NULL.  When there
@@ -439,14 +452,20 @@
   /* The parent of this node.  In the root node, this is NULL.  */
   struct mem_node *parent;
 
-  /* Start and end of allocated region.  */
-  void *start, *end;
+  /* Start of allocated region.  */
+  void *start;
+
+  /* Size of allocated region.  It's defined in such a way to pack the
+     rest of mem_node data within single integer, but still provide
+     the space enough to hold any acceptable size value.  */
+  unsigned size : MEM_NODE_SIZE_WIDTH;
 
   /* Node color.  */
-  enum {MEM_BLACK, MEM_RED} color;
+  enum {MEM_BLACK, MEM_RED} color : 1;
 
-  /* Memory type.  */
-  enum mem_type type;
+  /* Memory type.  The width of this bitfield should be
+     enough to hold any possible mem_type value.  */
+  enum mem_type type : 3;
 };
 
 /* Base address of stack.  Set in main.  */
@@ -480,7 +499,7 @@
 static void mark_maybe_object P_ ((Lisp_Object));
 static void mark_memory P_ ((void *, void *, int));
 static void mem_init P_ ((void));
-static struct mem_node *mem_insert P_ ((void *, void *, enum mem_type));
+static struct mem_node *mem_insert P_ ((void *, size_t, enum mem_type));
 static void mem_insert_fixup P_ ((struct mem_node *));
 static void mem_rotate_left P_ ((struct mem_node *));
 static void mem_rotate_right P_ ((struct mem_node *));
@@ -781,9 +800,16 @@
   register POINTER_TYPE *val;
 
   MALLOC_BLOCK_INPUT;
+  /* Some mallocs returns valid pointer even if SIZE is 0.
+     We're just doing free (PTR) here.  */
+  if (size == 0)
+    {
+      free (block);
+      val = NULL;
+    }
   /* We must call malloc explicitly when BLOCK is 0, since some
      reallocs don't do this.  */
-  if (! block)
+  else if (! block)
     val = (POINTER_TYPE *) malloc (size);
   else
     val = (POINTER_TYPE *) realloc (block, size);
@@ -880,7 +906,7 @@
 
 #if GC_MARK_STACK && !defined GC_MALLOC_CHECK
   if (val && type != MEM_TYPE_NON_LISP)
-    mem_insert (val, (char *) val + nbytes, type);
+    mem_insert (val, nbytes, type);
 #endif
 
   MALLOC_UNBLOCK_INPUT;
@@ -1090,7 +1116,7 @@
 
 #if GC_MARK_STACK && !defined GC_MALLOC_CHECK
   if (val && type != MEM_TYPE_NON_LISP)
-    mem_insert (val, (char *) val + nbytes, type);
+    mem_insert (val, nbytes, type);
 #endif
 
   MALLOC_UNBLOCK_INPUT;
@@ -1263,14 +1289,13 @@
 	fprintf (stderr, "Malloc returned %p which is already in use\n",
 		 value);
 	fprintf (stderr, "Region in use is %p...%p, %u bytes, type %d\n",
-		 m->start, m->end, (char *) m->end - (char *) m->start,
-		 m->type);
+		 m->start, m->start + m->size, m->size, m->type);
 	abort ();
       }
 
     if (!dont_register_blocks)
       {
-	mem_insert (value, (char *) value + max (1, size), allocated_mem_type);
+	mem_insert (value, size, allocated_mem_type);
 	allocated_mem_type = MEM_TYPE_NON_LISP;
       }
   }
@@ -1330,9 +1355,7 @@
 	fprintf (stderr, "Realloc returns memory that is already in use\n");
 	abort ();
       }
-
-    /* Can't handle zero size regions in the red-black tree.  */
-    mem_insert (value, (char *) value + max (size, 1), MEM_TYPE_NON_LISP);
+    mem_insert (value, size, MEM_TYPE_NON_LISP);
   }
 
   /* fprintf (stderr, "%p <- realloc\n", value); */
@@ -3543,7 +3566,8 @@
   mem_z.left = mem_z.right = MEM_NIL;
   mem_z.parent = NULL;
   mem_z.color = MEM_BLACK;
-  mem_z.start = mem_z.end = NULL;
+  mem_z.start = NULL;
+  mem_z.size = 0;
   mem_root = MEM_NIL;
 }
 
@@ -3562,10 +3586,10 @@
 
   /* Make the search always successful to speed up the loop below.  */
   mem_z.start = start;
-  mem_z.end = (char *) start + 1;
+  mem_z.size = 1;
 
   p = mem_root;
-  while (start < p->start || start >= p->end)
+  while (start < p->start || start >= p->start + p->size)
     p = start < p->start ? p->left : p->right;
   return p;
 }
@@ -3576,16 +3600,20 @@
    pointer to the node that was inserted.  */
 
 static struct mem_node *
-mem_insert (start, end, type)
-     void *start, *end;
+mem_insert (start, size, type)
+     void *start;
+     size_t size;
      enum mem_type type;
 {
   struct mem_node *c, *parent, *x;
 
+  if (size == 0 || size > MEM_NODE_SIZE_MAX)
+    abort ();
+
   if (min_heap_address == NULL || start < min_heap_address)
     min_heap_address = start;
-  if (max_heap_address == NULL || end > max_heap_address)
-    max_heap_address = end;
+  if (max_heap_address == NULL || start + size > max_heap_address)
+    max_heap_address = start + size;
 
   /* See where in the tree a node for START belongs.  In this
      particular application, it shouldn't happen that a node is already
@@ -3597,7 +3625,7 @@
 
   while (c != MEM_NIL)
     {
-      if (start >= c->start && start < c->end)
+      if (start >= c->start && start < c->start + c->size)
 	abort ();
       parent = c;
       c = start < c->start ? c->left : c->right;
@@ -3622,7 +3650,7 @@
   x = (struct mem_node *) xmalloc (sizeof *x);
 #endif
   x->start = start;
-  x->end = end;
+  x->size = size;
   x->type = type;
   x->parent = parent;
   x->left = x->right = MEM_NIL;
@@ -3835,7 +3863,7 @@
   if (y != z)
     {
       z->start = y->start;
-      z->end = y->end;
+      z->size = y->size;
       z->type = y->type;
     }
 

[-- Attachment #3: Type: text/plain, Size: 142 bytes --]

_______________________________________________
Emacs-devel mailing list
Emacs-devel@gnu.org
http://lists.gnu.org/mailman/listinfo/emacs-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch] mem_node shrink #2
  2007-11-29 15:43   ` Dmitry Antipov
@ 2007-11-29 18:48     ` Stefan Monnier
  2007-11-30 14:09       ` Dmitry Antipov
  2007-11-29 18:57     ` Jan D.
  1 sibling, 1 reply; 7+ messages in thread
From: Stefan Monnier @ 2007-11-29 18:48 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: rms, emacs-devel

> +/* Otherwise, every region which is passed to mem_insert is never larger
> +   than 256M, so it's size fits into BITS_PER_INT - 4 bits wide bit field.  */

Is that true?  Why is it the case?
On 64bit systems we can have buffers larger than 256MB.


        Stefan

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch] mem_node shrink #2
  2007-11-29 15:43   ` Dmitry Antipov
  2007-11-29 18:48     ` Stefan Monnier
@ 2007-11-29 18:57     ` Jan D.
  1 sibling, 0 replies; 7+ messages in thread
From: Jan D. @ 2007-11-29 18:57 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: rms, emacs-devel

Dmitry Antipov wrote:
> Richard Stallman wrote:
>> It looks good to me, except that here
>>
>>     +  /* Size of allocated region.  */
>>     +  unsigned size : MEM_NODE_SIZE_WIDTH;
>>
>> The comment should explain WHY it is defined that way.
> 
> OK. BTW, is there a (supported) system where gmalloc.c is still used ?
> 


All BSD:s I think.

	Jan D.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch] mem_node shrink #2
  2007-11-29 18:48     ` Stefan Monnier
@ 2007-11-30 14:09       ` Dmitry Antipov
  2007-11-30 19:45         ` Stefan Monnier
  0 siblings, 1 reply; 7+ messages in thread
From: Dmitry Antipov @ 2007-11-30 14:09 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: rms, emacs-devel

Stefan Monnier wrote:

>> +/* Otherwise, every region which is passed to mem_insert is never larger
>> +   than 256M, so it's size fits into BITS_PER_INT - 4 bits wide bit field.  */
> 
> Is that true?  Why is it the case?
> On 64bit systems we can have buffers larger than 256MB.

If buffer text is xmalloc'ed, (re)allocated region is not mem_insert'ed unless
GC_MALLOC_CHECK is defined.

If you run (buffer-substring (point-min) (point-max)) when current buffer's size
is 300M (on 64-bit system), the storage for resulting string is allocated with
MEM_TYPE_NON_LISP and isn't mem_insert'ed too.

But yes, it still has problems - on 64-bit system, (make-vector 50000000 nil)
wants ~380M of MEM_TYPE_VECTOR, which isn't fit. So my stuff introduces vector
size limits - (32M - 3) items on 64-bit and (64M - 3) on 32-bit. It looks
acceptable for me, but I'm not sure about others.

Is there any other code which may wants to allocate >256M ?

Dmitry

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch] mem_node shrink #2
  2007-11-30 14:09       ` Dmitry Antipov
@ 2007-11-30 19:45         ` Stefan Monnier
  0 siblings, 0 replies; 7+ messages in thread
From: Stefan Monnier @ 2007-11-30 19:45 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: rms, emacs-devel

>> Is that true?  Why is it the case?
>> On 64bit systems we can have buffers larger than 256MB.

> If buffer text is xmalloc'ed, (re)allocated region is not mem_insert'ed unless
> GC_MALLOC_CHECK is defined.

> If you run (buffer-substring (point-min) (point-max)) when current buffer's size
> is 300M (on 64-bit system), the storage for resulting string is allocated with
> MEM_TYPE_NON_LISP and isn't mem_insert'ed too.

> But yes, it still has problems - on 64-bit system, (make-vector 50000000 nil)
> wants ~380M of MEM_TYPE_VECTOR, which isn't fit. So my stuff introduces vector
> size limits - (32M - 3) items on 64-bit and (64M - 3) on 32-bit. It looks
> acceptable for me, but I'm not sure about others.

> Is there any other code which may wants to allocate >256M ?

I don't think that saving 4bytes on 64bit systems is worth the trouble.
Also to lift the limit on 32bit systems, we just need to store size/8
instead of size.

So basically we want `size' to have (BITS_PER_EMACS_INT - 4) bits and to
hold (size >> GCTYPEBITS).  If USE_LSB_TAG is not set, then we shouldn't
shift (so the top bits with be truncated instead).

The justification for (BITS_PER_EMACS_INT - 4) is "so that combined with
the 1 and the 3 from the other two bitfields we get a whole number of
words".  And the justification for why this is correct will have to
explain that this assumes GCTYPEBITS==3 so that pointers and integers
only have BITS_PER_EMACS_INT - 3 significant values and stripping an
extra bit should still be OK because the size of the largest object will
be similarly halved by the use of signed integers (both for the size of
arrays and for the size of buffer text).

The only "problem" left is when USE_LSB_TAG is not set: this will limit
the max size of an array to (BITS_PER_EMACS_INT - 4) whereas in such
a case the byte-size of an array is limited by (BITS_PER_EMACS_INT - 1)
(but there can only be 1 such array because the start address needs to
fit within (BITS_PER_EMACS_INT - 3) :-)).


        Stefan

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2007-11-30 19:45 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-11-27 16:34 [patch] mem_node shrink #2 Dmitry Antipov
2007-11-29  1:04 ` Richard Stallman
2007-11-29 15:43   ` Dmitry Antipov
2007-11-29 18:48     ` Stefan Monnier
2007-11-30 14:09       ` Dmitry Antipov
2007-11-30 19:45         ` Stefan Monnier
2007-11-29 18:57     ` Jan D.

Code repositories for project(s) associated with this external index

	https://git.savannah.gnu.org/cgit/emacs.git
	https://git.savannah.gnu.org/cgit/emacs/org-mode.git

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.