unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
* [patch] mem_node shrink
@ 2007-11-19 10:43 Dmitry Antipov
  2007-11-22  2:26 ` Richard Stallman
  2007-11-22  3:42 ` Stefan Monnier
  0 siblings, 2 replies; 6+ messages in thread
From: Dmitry Antipov @ 2007-11-19 10:43 UTC (permalink / raw)
  To: emacs-devel

[-- Attachment #1: Type: text/plain, Size: 1074 bytes --]

This patch reduces the size of struct mem_node, assuming that Emacs never
allocates an area which size can't be represented as Lisp integer. On i386,
mem_node can be reduced from 28 to 20 bytes, giving a substantial size benefit
which may be precisely evaluated by using malloc_usable_size function of
glibc malloc.

I've never seen more than 50K lived mem_nodes, but 20-30K is typical. For an
average run ends with 30K lived mem_nodes, currently ~7% requests to allocate
28-bytes mem_node are satisfied by allocating 36-bytes chunk, and the rest of
requests causes exact 28-bytes allocations. So, 30K mem_nodes occupies ~848K,
and ~840K of them are really used.

With this patch, ~3.5% requests to allocate 20-bytes mem_node are satisfied
by allocating 28-bytes chunk, and the rest of request causes exact 20-bytes
allocations. So, 30K mem_nodes occupies ~613K, and ~600K of them are really
used.

In this example, we're shaving 235K. Note very long runs may cause heap
fragmentation, and this advantage may degrade. As usual for benchmarks,
your mileage may vary.

Dmitry

[-- Attachment #2: mem_node_shrink.patch --]
[-- Type: text/x-patch, Size: 5511 bytes --]

Index: alloc.c
===================================================================
RCS file: /sources/emacs/emacs/src/alloc.c,v
retrieving revision 1.432
diff -u -r1.432 alloc.c
--- alloc.c	16 Nov 2007 21:24:59 -0000	1.432
+++ alloc.c	19 Nov 2007 10:17:21 -0000
@@ -365,7 +365,8 @@
 
 /* When scanning the C stack for live Lisp objects, Emacs keeps track
    of what memory allocated via lisp_malloc is intended for what
-   purpose.  This enumeration specifies the type of memory.  */
+   purpose.  This enumeration specifies the type of memory.  Values
+   should begins from 0 and fits into type field of struct mem_node.  */
 
 enum mem_type
 {
@@ -430,6 +431,10 @@
    description of red-black trees.  Any book worth its salt should
    describe them.  */
 
+/* Colors of the node below.  */
+#define MEM_BLACK 0
+#define MEM_RED   1
+
 struct mem_node
 {
   /* Children of this node.  These pointers are never NULL.  When there
@@ -439,14 +444,18 @@
   /* The parent of this node.  In the root node, this is NULL.  */
   struct mem_node *parent;
 
-  /* Start and end of allocated region.  */
-  void *start, *end;
+  /* Start of allocated region.  */
+  void *start;
+
+  /* Size of allocated region.  */
+  unsigned size : BITS_PER_EMACS_INT - 4;
 
   /* Node color.  */
-  enum {MEM_BLACK, MEM_RED} color;
+  unsigned color : 1;
 
-  /* Memory type.  */
-  enum mem_type type;
+  /* Memory type. The width of this bitfield should be
+     enough to hold all values of enum mem_type.  */
+  unsigned type : 3;
 };
 
 /* Base address of stack.  Set in main.  */
@@ -480,7 +489,7 @@
 static void mark_maybe_object P_ ((Lisp_Object));
 static void mark_memory P_ ((void *, void *, int));
 static void mem_init P_ ((void));
-static struct mem_node *mem_insert P_ ((void *, void *, enum mem_type));
+static struct mem_node *mem_insert P_ ((void *, size_t, enum mem_type));
 static void mem_insert_fixup P_ ((struct mem_node *));
 static void mem_rotate_left P_ ((struct mem_node *));
 static void mem_rotate_right P_ ((struct mem_node *));
@@ -880,7 +889,7 @@
 
 #if GC_MARK_STACK && !defined GC_MALLOC_CHECK
   if (val && type != MEM_TYPE_NON_LISP)
-    mem_insert (val, (char *) val + nbytes, type);
+    mem_insert (val, nbytes, type);
 #endif
 
   MALLOC_UNBLOCK_INPUT;
@@ -1090,7 +1099,7 @@
 
 #if GC_MARK_STACK && !defined GC_MALLOC_CHECK
   if (val && type != MEM_TYPE_NON_LISP)
-    mem_insert (val, (char *) val + nbytes, type);
+    mem_insert (val, nbytes, type);
 #endif
 
   MALLOC_UNBLOCK_INPUT;
@@ -1263,14 +1272,13 @@
 	fprintf (stderr, "Malloc returned %p which is already in use\n",
 		 value);
 	fprintf (stderr, "Region in use is %p...%p, %u bytes, type %d\n",
-		 m->start, m->end, (char *) m->end - (char *) m->start,
-		 m->type);
+		 m->start, m->start + m->size, m->size, m->type);
 	abort ();
       }
 
     if (!dont_register_blocks)
       {
-	mem_insert (value, (char *) value + max (1, size), allocated_mem_type);
+	mem_insert (value, max (1, size), allocated_mem_type);
 	allocated_mem_type = MEM_TYPE_NON_LISP;
       }
   }
@@ -1332,7 +1340,7 @@
       }
 
     /* Can't handle zero size regions in the red-black tree.  */
-    mem_insert (value, (char *) value + max (size, 1), MEM_TYPE_NON_LISP);
+    mem_insert (value, max (size, 1), MEM_TYPE_NON_LISP);
   }
 
   /* fprintf (stderr, "%p <- realloc\n", value); */
@@ -3543,7 +3551,8 @@
   mem_z.left = mem_z.right = MEM_NIL;
   mem_z.parent = NULL;
   mem_z.color = MEM_BLACK;
-  mem_z.start = mem_z.end = NULL;
+  mem_z.start = NULL;
+  mem_z.size = 0;
   mem_root = MEM_NIL;
 }
 
@@ -3562,10 +3571,10 @@
 
   /* Make the search always successful to speed up the loop below.  */
   mem_z.start = start;
-  mem_z.end = (char *) start + 1;
+  mem_z.size = 1;
 
   p = mem_root;
-  while (start < p->start || start >= p->end)
+  while (start < p->start || start >= p->start + p->size)
     p = start < p->start ? p->left : p->right;
   return p;
 }
@@ -3576,16 +3585,24 @@
    pointer to the node that was inserted.  */
 
 static struct mem_node *
-mem_insert (start, end, type)
-     void *start, *end;
+mem_insert (start, size, type)
+     void *start;
+     size_t size;
      enum mem_type type;
 {
   struct mem_node *c, *parent, *x;
 
+#ifdef ALLOC_DEBUG
+  /* Check if we can store requested size in bitfield. It's expected that Emacs
+     never allocates the area which size can't be represented as Lisp integer.  */
+  if (size > MOST_POSITIVE_FIXNUM)
+    abort ();
+#endif
+
   if (min_heap_address == NULL || start < min_heap_address)
     min_heap_address = start;
-  if (max_heap_address == NULL || end > max_heap_address)
-    max_heap_address = end;
+  if (max_heap_address == NULL || start + size > max_heap_address)
+    max_heap_address = start + size;
 
   /* See where in the tree a node for START belongs.  In this
      particular application, it shouldn't happen that a node is already
@@ -3597,7 +3614,7 @@
 
   while (c != MEM_NIL)
     {
-      if (start >= c->start && start < c->end)
+      if (start >= c->start && start < c->start + c->size)
 	abort ();
       parent = c;
       c = start < c->start ? c->left : c->right;
@@ -3622,7 +3639,7 @@
   x = (struct mem_node *) xmalloc (sizeof *x);
 #endif
   x->start = start;
-  x->end = end;
+  x->size = size;
   x->type = type;
   x->parent = parent;
   x->left = x->right = MEM_NIL;
@@ -3835,7 +3852,7 @@
   if (y != z)
     {
       z->start = y->start;
-      z->end = y->end;
+      z->size = y->size;
       z->type = y->type;
     }
 

[-- Attachment #3: Type: text/plain, Size: 142 bytes --]

_______________________________________________
Emacs-devel mailing list
Emacs-devel@gnu.org
http://lists.gnu.org/mailman/listinfo/emacs-devel

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [patch] mem_node shrink
  2007-11-19 10:43 [patch] mem_node shrink Dmitry Antipov
@ 2007-11-22  2:26 ` Richard Stallman
  2007-11-22 13:57   ` Dmitry Antipov
  2007-11-22  3:42 ` Stefan Monnier
  1 sibling, 1 reply; 6+ messages in thread
From: Richard Stallman @ 2007-11-22  2:26 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: emacs-devel

The idea looks like an improvement.  I think there needs to be a comment
explaining that the widths of these fields are supposed to add up to
the same as an EMACS_INT.

And is EMACS_INT the right thing?  EMACS_INT is `long' in some cases.
Should it be plain `int' instead?  Should it be a type that's as wide
as a pointer or as size_t?

Should the size value be measured in units of Lisp_Object instead
of bytes?

    +  if (size > MOST_POSITIVE_FIXNUM)
    +    abort ();
    +#endif

That's not reliably the correct test.  It happens to be right, at
present, because the width of the field is BITS_PER_EMACS_INT - 4, and
it is unsigned, so it has the same number of bits as a positive Lisp
integer.  But that is just coincidence.

So I think MOST_POSITIVE_FIXNUM should be replaced with something
guaranteed to be right.  Define a constant to serve as the width of
that field, and use the same constant here in something like -((-1) <<
MEM_NODE_SIZE_WIDTH).

	 /* Can't handle zero size regions in the red-black tree.  */
    -    mem_insert (value, (char *) value + max (size, 1), MEM_TYPE_NON_LISP);
    +    mem_insert (value, max (size, 1), MEM_TYPE_NON_LISP);

Wouldn't it be cleaner for mem_insert to do  max (..., 1) ?

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [patch] mem_node shrink
  2007-11-19 10:43 [patch] mem_node shrink Dmitry Antipov
  2007-11-22  2:26 ` Richard Stallman
@ 2007-11-22  3:42 ` Stefan Monnier
  1 sibling, 0 replies; 6+ messages in thread
From: Stefan Monnier @ 2007-11-22  3:42 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: emacs-devel

> -  enum {MEM_BLACK, MEM_RED} color;
> +  unsigned color : 1;
 
> -  /* Memory type.  */
> -  enum mem_type type;
> +  /* Memory type. The width of this bitfield should be
> +     enough to hold all values of enum mem_type.  */
> +  unsigned type : 3;
>  };

Any reason you don't use
 
    enum {MEM_BLACK, MEM_RED} color : 1;
    enum mem_type type : 3;


-- Stefan

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [patch] mem_node shrink
  2007-11-22  2:26 ` Richard Stallman
@ 2007-11-22 13:57   ` Dmitry Antipov
  2007-11-23  4:35     ` Richard Stallman
  0 siblings, 1 reply; 6+ messages in thread
From: Dmitry Antipov @ 2007-11-22 13:57 UTC (permalink / raw)
  To: rms; +Cc: emacs-devel

Richard Stallman wrote:

> The idea looks like an improvement.  I think there needs to be a comment
> explaining that the widths of these fields are supposed to add up to
> the same as an EMACS_INT.
> 
> And is EMACS_INT the right thing?  EMACS_INT is `long' in some cases.
> Should it be plain `int' instead?  Should it be a type that's as wide
> as a pointer or as size_t?
> 
> Should the size value be measured in units of Lisp_Object instead
> of bytes?
> 
>     +  if (size > MOST_POSITIVE_FIXNUM)
>     +    abort ();
>     +#endif
> 
> That's not reliably the correct test.  It happens to be right, at
> present, because the width of the field is BITS_PER_EMACS_INT - 4, and
> it is unsigned, so it has the same number of bits as a positive Lisp
> integer.  But that is just coincidence.
> 
> So I think MOST_POSITIVE_FIXNUM should be replaced with something
> guaranteed to be right.  Define a constant to serve as the width of
> that field, and use the same constant here in something like -((-1) <<
> MEM_NODE_SIZE_WIDTH).

If GC_MALLOC_CHECK is defined, xmalloc and xrealloc calls mem_insert.  On 64-bit system,
if neither USE_MMAP_FOR_BUFFERS nor REL_ALLOC are used, xmalloc and xrealloc may grow
buffer text up to the size which fits into integer, but not into 28-bit bitfield.  So,
the bitfield should be wide enough, i.e. have width BITS_PER_EMACS_INT - 4.

If GC_MALLOC_CHECK isn't defined, mem_insert is probably never called for an area
which is >= 256M even on a 64-bit machine, so it should be safe to use BITS_PER_INT - 4.

This stuff should be checked carefully - I don't have an access to any 64-bit system for
now, but hopefully will have it in a few days...

> Wouldn't it be cleaner for mem_insert to do  max (..., 1) ?

IMHO, inserting a 'region' of zero size is a nonsense because:

  a) In general, malloc (0) is most probably a bug (although some implementations
     returns a non-NULL pointer to the space which can be accessed), it should be
     aborted, and other code shouldn't assume that returned pointer is valid;

  b) In the case of Emacs, xrealloc (ptr, 0) should be equal to free (ptr) regardless
     of the ptr value (and return NULL). Rationale may be found, for example, within
     shrink_regexp_cache: it may call xrealloc (ptr, 0) and, if cp->buf.buffer is NULL
     (which happens during bootstrap), this xrealloc yields to malloc (0). If malloc
     allocates something and returns non-NULL in this case, we may have an allocated
     cp->buf.buffer even if cp->buf.allocated == cp->buf.used == 0, which is definitely
     stupid (but not a fatal bug).

Dmitry

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [patch] mem_node shrink
  2007-11-22 13:57   ` Dmitry Antipov
@ 2007-11-23  4:35     ` Richard Stallman
  2007-11-23 14:55       ` Stefan Monnier
  0 siblings, 1 reply; 6+ messages in thread
From: Richard Stallman @ 2007-11-23  4:35 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: emacs-devel

    > So I think MOST_POSITIVE_FIXNUM should be replaced with something
    > guaranteed to be right.  Define a constant to serve as the width of
    > that field, and use the same constant here in something like -((-1) <<
    > MEM_NODE_SIZE_WIDTH).

    If GC_MALLOC_CHECK is defined, xmalloc and xrealloc calls
    mem_insert.  On 64-bit system, if neither USE_MMAP_FOR_BUFFERS nor
    REL_ALLOC are used, xmalloc and xrealloc may grow buffer text up
    to the size which fits into integer, but not into 28-bit bitfield.
    So, the bitfield should be wide enough, i.e. have width
    BITS_PER_EMACS_INT - 4.

There are two questions here.  You're talking about whether the field
is wide enough.  The other is how to write this error check:

>     +  if (size > MOST_POSITIVE_FIXNUM)
>     +    abort ();

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [patch] mem_node shrink
  2007-11-23  4:35     ` Richard Stallman
@ 2007-11-23 14:55       ` Stefan Monnier
  0 siblings, 0 replies; 6+ messages in thread
From: Stefan Monnier @ 2007-11-23 14:55 UTC (permalink / raw)
  To: rms; +Cc: Dmitry Antipov, emacs-devel

>> So I think MOST_POSITIVE_FIXNUM should be replaced with something
>> guaranteed to be right.  Define a constant to serve as the width of
>> that field, and use the same constant here in something like -((-1) <<
>> MEM_NODE_SIZE_WIDTH).

>     If GC_MALLOC_CHECK is defined, xmalloc and xrealloc calls
>     mem_insert.  On 64-bit system, if neither USE_MMAP_FOR_BUFFERS nor
>     REL_ALLOC are used, xmalloc and xrealloc may grow buffer text up
>     to the size which fits into integer, but not into 28-bit bitfield.
>     So, the bitfield should be wide enough, i.e. have width
>     BITS_PER_EMACS_INT - 4.

> There are two questions here.  You're talking about whether the field
> is wide enough.  The other is how to write this error check:

>> +  if (size > MOST_POSITIVE_FIXNUM)
>> +    abort ();

Yes.  A simpler test is to read the size we just wrote in the object's
field and check it reads out as the same value.


        Stefan

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2007-11-23 14:55 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-11-19 10:43 [patch] mem_node shrink Dmitry Antipov
2007-11-22  2:26 ` Richard Stallman
2007-11-22 13:57   ` Dmitry Antipov
2007-11-23  4:35     ` Richard Stallman
2007-11-23 14:55       ` Stefan Monnier
2007-11-22  3:42 ` Stefan Monnier

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).