unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
From: Dmitry Antipov <dmantipov@yandex.ru>
To: Stefan Monnier <monnier@iro.umontreal.ca>
Cc: emacs-devel@gnu.org
Subject: Re: Proposal: block-based vector allocator
Date: Tue, 22 May 2012 12:24:37 +0400	[thread overview]
Message-ID: <4FBB4D45.4070009@yandex.ru> (raw)
In-Reply-To: <jwvk4054jab.fsf-monnier+emacs@gnu.org>

[-- Attachment #1: Type: text/plain, Size: 870 bytes --]

On 05/22/2012 12:12 AM, Stefan Monnier wrote:

>> IIUC, if/when we will have mmap()ed Lisp_Objects someday, this should help
>> malloc() to do direct mmap()/munmap() of page-size vector blocks.
>
> There are several conditions here, some of them might never
> actually happen.  So I'd rather not worry about it for now.

OK

>> As for the roundup() check, it looks like GNU/Linux and *BSD systems
>> both has it in sys/param.h, but (Open)Solaris uses sys/sysmacros.h.
>> So I'm trying to check for the most common case and then fall back
>> to simple default.
>
> Since it's a constant we can choose, we can choose that constant to be
> one that makes for efficient code.  So I think we don't need to use some
> predefined macro but can code up our own, custom-designed for our
> specific needs, without worry about where various systems define
> `roundup'.

OK

Dmitry

[-- Attachment #2: vector_alloc.patch --]
[-- Type: text/plain, Size: 13282 bytes --]

=== modified file 'src/alloc.c'
--- src/alloc.c	2012-05-21 15:36:54 +0000
+++ src/alloc.c	2012-05-22 07:34:36 +0000
@@ -2924,17 +2924,305 @@
 			   Vector Allocation
  ***********************************************************************/
 
-/* Singly-linked list of all vectors.  */
-
-static struct Lisp_Vector *all_vectors;
+#define VECTOR_BLOCK_SIZE 4096
+
+/* Round up X to nearest mult-of-Y, assuming Y is a power of 2.  */
+
+#define roundup_powof2(x,y) (((x) + (y) - 1) & ~((y) - 1))
 
 /* Handy constants for vectorlike objects.  */
 enum
   {
     header_size = offsetof (struct Lisp_Vector, contents),
-    word_size = sizeof (Lisp_Object)
+    word_size = sizeof (Lisp_Object),
+    /* On a 32-bit system, rounding up vector size (in bytes) up
+       to mult-of-8 helps to maintain mult-of-8 alignment.  */
+    roundup_size = 8
   };
 
+/* Rounding helps to maintain alignment constraints.  */
+
+#define VECTOR_BLOCK_BYTES \
+  (VECTOR_BLOCK_SIZE - roundup_powof2 (sizeof (void *), roundup_size))
+
+/* Maximum amount of vectors allocated from the vector block.  */
+
+#define VECTORS_PER_BLOCK_MAX \
+  (VECTOR_BLOCK_BYTES / sizeof (struct vectorlike_header))
+
+/* We maintain one free list for each possible block-allocated
+   vector size, and this is now much of the free lists we have.  */
+
+#define VECTOR_MAX_FREE_LIST_INDEX ((VECTOR_BLOCK_BYTES / roundup_size) + 1)
+
+/* When the vector is on a free list, vectorlike_header.SIZE is set to
+   this special value ORed with vector's memory footprint size.  */
+
+#define VECTOR_FREE_LIST_FLAG \
+  (((EMACS_INT) ~0) & ~(ARRAY_MARK_FLAG | PSEUDOVECTOR_FLAG | \
+			(VECTOR_BLOCK_SIZE - 1)))
+
+/* Common shortcut to advance vector pointer over a block data.  */
+
+#define ADVANCE(v,nbytes) \
+  (struct Lisp_Vector *) ((unsigned char *) (v) + (nbytes))
+
+/* Common shortcut to setup vector on a free list.  */
+
+#define SETUP_ON_FREE_LIST(v,nbytes,index) do { \
+  (v)->header.size = VECTOR_FREE_LIST_FLAG | (nbytes); \
+  eassert ((nbytes) % roundup_size == 0); \
+  (index) = (nbytes) / roundup_size; \
+  eassert ((index) < VECTOR_MAX_FREE_LIST_INDEX); \
+  (v)->header.next.vector = vector_free_lists[(index)]; \
+  vector_free_lists[(index)] = (v); } while (0)
+
+struct vector_block
+{
+  unsigned char data[VECTOR_BLOCK_BYTES];
+  struct vector_block *next;
+};
+
+/* Chain of vector blocks.  */
+
+static struct vector_block *vector_blocks;
+
+/* Vector free lists, where NTH item points to a chain
+   of free vectors of NTH * ROUNDUP_SIZE bytes.  */
+
+static struct Lisp_Vector *vector_free_lists[VECTOR_MAX_FREE_LIST_INDEX];
+
+/* Singly-linked list of large vectors.  */
+
+static struct Lisp_Vector *large_vectors;
+
+/* The only vector with 0 slots, allocated from pure space.  */
+
+static struct Lisp_Vector *zero_vector;
+
+/* Get a new vector block.  */
+
+static struct vector_block *
+allocate_vector_block (void)
+{
+  struct vector_block *block;
+
+#ifdef DOUG_LEA_MALLOC
+  mallopt (M_MMAP_MAX, 0);
+#endif
+
+  block = xmalloc (sizeof (struct vector_block));
+  if (!block)
+    memory_full (VECTOR_BLOCK_SIZE);
+
+#ifdef DOUG_LEA_MALLOC
+  mallopt (M_MMAP_MAX, MMAP_MAX_AREAS);
+#endif
+
+#if GC_MARK_STACK && !defined GC_MALLOC_CHECK
+  mem_insert (block->data, block->data + VECTOR_BLOCK_BYTES,
+	      MEM_TYPE_VECTORLIKE);
+#endif
+
+  block->next = vector_blocks;
+  vector_blocks = block;
+  return block;
+}
+
+/* Called once to initialize vector allocation.  */
+
+static void
+init_vectors (void)
+{
+  int i;
+
+  large_vectors = NULL;
+
+  zero_vector = (struct Lisp_Vector *)
+    pure_alloc (header_size, Lisp_Vectorlike);
+  zero_vector->header.size = 0;
+
+  for (i = 0; i < VECTOR_MAX_FREE_LIST_INDEX; i++)
+    vector_free_lists[i] = NULL;
+}
+
+/* Allocate vector from a vector block.  */
+
+static struct Lisp_Vector *
+allocate_vector_from_block (size_t nbytes)
+{
+  struct Lisp_Vector *vector, *rest;
+  struct vector_block *block;
+  size_t index, restbytes;
+
+  /* No vectors with 0 slots for Lisp_Objects here.  */
+  eassert (nbytes > sizeof (struct vectorlike_header) &&
+	   nbytes <= VECTOR_BLOCK_BYTES);
+  eassert (nbytes % roundup_size == 0);
+
+  /* First, try to allocate from a free list
+     contains vectors of the requested size.  */
+  index = nbytes / roundup_size;
+  if (vector_free_lists[index])
+    {
+      vector = vector_free_lists[index];
+      vector_free_lists[index] = vector->header.next.vector;
+      vector->header.next.nbytes = nbytes;
+      return vector;
+    }
+
+  /* Next, check free lists contains larger vectors.  Since we will
+     split the result, we should have remaining space large enough
+     to use for one-slot vector at least.  */
+  for (index = (nbytes + sizeof (struct Lisp_Vector)) / roundup_size;
+       index < VECTOR_MAX_FREE_LIST_INDEX; index++)
+    if (vector_free_lists[index])
+      {
+	/* This vector is larger than it was requested.  */
+	vector = vector_free_lists[index];
+	vector_free_lists[index] = vector->header.next.vector;
+	vector->header.next.nbytes = nbytes;
+
+	/* Excessive bytes are used for the smaller vector,
+	   which should be set on an appropriate free list.  */
+	restbytes = index * roundup_size - nbytes;
+	eassert (restbytes % roundup_size == 0);
+	rest = ADVANCE (vector, nbytes);
+	SETUP_ON_FREE_LIST (rest, restbytes, index);
+	return vector;
+      }
+
+  /* Finally, need a new vector block.  */
+  block = allocate_vector_block ();
+
+  /* New vector will be at the beginning of this block.  */
+  vector = (struct Lisp_Vector *) block->data;
+  vector->header.next.nbytes = nbytes;
+
+  /* The rest of this block is set up on a free list.  */
+  rest = ADVANCE (vector, nbytes);
+  restbytes = VECTOR_BLOCK_BYTES - nbytes;
+  index = restbytes / roundup_size;
+  SETUP_ON_FREE_LIST (rest, restbytes, index);
+
+  return vector;
+ }
+
+/* Return amount of Lisp_Objects which can be stored in that vector.  */
+
+#define VECTOR_SIZE(v) ((v)->header.size & PSEUDOVECTOR_FLAG ? \
+  (PSEUDOVECTOR_SIZE_MASK & (v)->header.size) : (v)->header.size)
+
+/* Reclaim space used by unmarked vectors.  */
+
+static void
+sweep_vectors (void)
+{
+  struct vector_block *block = vector_blocks, *bprev = NULL, *bnext;
+  struct Lisp_Vector *vector, *prev, *next;
+  int i;
+
+  total_vector_size = 0;
+  for (i = 0; i < VECTOR_MAX_FREE_LIST_INDEX; i++)
+    vector_free_lists[i] = NULL;
+
+  /* Looking through vector blocks.  */
+
+  while (block)
+    {
+      int free_this_block;
+
+      for (vector = (struct Lisp_Vector *) block->data;
+	   (unsigned char *) vector < block->data + VECTOR_BLOCK_BYTES;
+	   vector = next)
+	{
+	  free_this_block = 0;
+
+	  if (VECTOR_MARKED_P (vector))
+	    {
+	      VECTOR_UNMARK (vector);
+	      total_vector_size += VECTOR_SIZE (vector);
+	      next = ADVANCE (vector, vector->header.next.nbytes);
+	    }
+	  else
+	    {
+	      EMACS_INT nbytes;
+
+	      if ((vector->header.size & VECTOR_FREE_LIST_FLAG) == 
+		  VECTOR_FREE_LIST_FLAG)
+		vector->header.next.nbytes =
+		  vector->header.size & (VECTOR_BLOCK_SIZE - 1);
+	      
+	      next = ADVANCE (vector, vector->header.next.nbytes);
+
+	      /* While NEXT is not marked, try to coalesce with VECTOR,
+		 thus making VECTOR of the largest possible size.  */
+
+	      while ((unsigned char *) next < block->data + VECTOR_BLOCK_BYTES)
+		{
+		  if (VECTOR_MARKED_P (next))
+		    break;
+		  if ((next->header.size & VECTOR_FREE_LIST_FLAG) == 
+		      VECTOR_FREE_LIST_FLAG)
+		    nbytes = next->header.size & (VECTOR_BLOCK_SIZE - 1);
+		  else
+		    nbytes = next->header.next.nbytes;
+		  vector->header.next.nbytes += nbytes;
+		  next = ADVANCE (next, nbytes);
+		}
+	      
+	      eassert (vector->header.next.nbytes % roundup_size == 0);
+
+	      if (vector == (struct Lisp_Vector *) block->data &&
+		  (unsigned char *) next >= block->data + VECTOR_BLOCK_BYTES)
+		/* This block should be freed because all of it's
+		   space was coalesced into the only free vector.  */
+		free_this_block = 1;
+	      else
+		SETUP_ON_FREE_LIST (vector, vector->header.next.nbytes, i);
+	    }
+	}
+
+      if (free_this_block)
+	{
+	  if (bprev)
+	    bprev->next = block->next;
+	  else
+	    vector_blocks = block->next;
+	  bnext = block->next;
+#if GC_MARK_STACK && !defined GC_MALLOC_CHECK
+	  mem_delete (mem_find (block->data));
+#endif
+	  xfree (block);
+	  block = bnext;
+	}
+      else
+	bprev = block, block = block->next;
+    }
+
+  /* Sweep large vectors.  */
+
+  vector = large_vectors, prev = NULL;
+
+  while (vector)
+    if (VECTOR_MARKED_P (vector))
+      {
+	VECTOR_UNMARK (vector);
+	total_vector_size += VECTOR_SIZE (vector);
+	prev = vector, vector = vector->header.next.vector;
+      }
+    else
+      {
+	if (prev)
+	  prev->header.next = vector->header.next;
+	else
+	  large_vectors = vector->header.next.vector;
+	next = vector->header.next.vector;
+	lisp_free (vector);
+	vector = next;
+      }
+}
+
 /* Value is a pointer to a newly allocated Lisp_Vector structure
    with room for LEN Lisp_Objects.  */
 
@@ -2956,8 +3244,20 @@
   /* This gets triggered by code which I haven't bothered to fix.  --Stef  */
   /* eassert (!handling_signal); */
 
+  if (len == 0)
+    return zero_vector;
+
   nbytes = header_size + len * word_size;
-  p = (struct Lisp_Vector *) lisp_malloc (nbytes, MEM_TYPE_VECTORLIKE);
+
+  if (nbytes > VECTOR_BLOCK_BYTES)
+    {
+      p = (struct Lisp_Vector *) lisp_malloc (nbytes, MEM_TYPE_VECTORLIKE);
+      p->header.next.vector = large_vectors;
+      large_vectors = p;
+    }
+  else
+    /* Rounding is to preserve alignment.  */
+    p = allocate_vector_from_block (roundup_powof2 (nbytes, roundup_size));
 
 #ifdef DOUG_LEA_MALLOC
   /* Back to a reasonable maximum of mmap'ed areas.  */
@@ -2967,9 +3267,6 @@
   consing_since_gc += nbytes;
   vector_cells_consed += len;
 
-  p->header.next.vector = all_vectors;
-  all_vectors = p;
-
   MALLOC_UNBLOCK_INPUT;
 
   return p;
@@ -4068,7 +4365,39 @@
 static inline int
 live_vector_p (struct mem_node *m, void *p)
 {
-  return (p == m->start && m->type == MEM_TYPE_VECTORLIKE);
+  if (m->type == MEM_TYPE_VECTORLIKE)
+    {
+      if (m->end - m->start == VECTOR_BLOCK_BYTES)
+	{
+	  /* This memory node corresponds to a vector block.  */
+	  struct vector_block *block = (struct vector_block *) m->start;
+	  struct Lisp_Vector *vector = (struct Lisp_Vector *) block->data;
+
+	  /* P is in the block's allocation range.  Scan the block
+	     up to P and see whether P points to the start of some
+	     vector which is not on a free list.  FIXME: check whether
+	     some allocation patterns (probably a lot of short vectors)
+	     may cause a substantial overhead of this loop.  */
+	  while (vector <= (struct Lisp_Vector *) p)
+	    {
+	      if ((vector->header.size & VECTOR_FREE_LIST_FLAG)
+		  == VECTOR_FREE_LIST_FLAG)
+		vector = ADVANCE (vector, (vector->header.size & 
+					   (VECTOR_BLOCK_SIZE - 1)));
+	      else if (vector == p)
+		return 1;
+	      else
+		vector = ADVANCE (vector, vector->header.next.nbytes);
+	    }
+	}
+      else
+	{
+	  if (p == m->start)
+	    /* This memory node corresponds to a large vector.  */
+	    return 1;
+	}
+    }
+  return 0;
 }
 
 
@@ -6237,33 +6566,7 @@
 	}
   }
 
-  /* Free all unmarked vectors */
-  {
-    register struct Lisp_Vector *vector = all_vectors, *prev = 0, *next;
-    total_vector_size = 0;
-
-    while (vector)
-      if (!VECTOR_MARKED_P (vector))
-	{
-	  if (prev)
-	    prev->header.next = vector->header.next;
-	  else
-	    all_vectors = vector->header.next.vector;
-	  next = vector->header.next.vector;
-	  lisp_free (vector);
-	  vector = next;
-
-	}
-      else
-	{
-	  VECTOR_UNMARK (vector);
-	  if (vector->header.size & PSEUDOVECTOR_FLAG)
-	    total_vector_size += PSEUDOVECTOR_SIZE_MASK & vector->header.size;
-	  else
-	    total_vector_size += vector->header.size;
-	  prev = vector, vector = vector->header.next.vector;
-	}
-  }
+  sweep_vectors ();
 
 #ifdef GC_CHECK_STRING_BYTES
   if (!noninteractive)
@@ -6400,7 +6703,6 @@
   Vdead = make_pure_string ("DEAD", 4, 4, 0);
 #endif
 
-  all_vectors = 0;
   ignore_warnings = 1;
 #ifdef DOUG_LEA_MALLOC
   mallopt (M_TRIM_THRESHOLD, 128*1024); /* trim threshold */
@@ -6413,6 +6715,7 @@
   init_marker ();
   init_float ();
   init_intervals ();
+  init_vectors ();
   init_weak_hash_tables ();
 
 #ifdef REL_ALLOC

=== modified file 'src/lisp.h'
--- src/lisp.h	2012-05-21 15:36:54 +0000
+++ src/lisp.h	2012-05-22 07:13:18 +0000
@@ -899,12 +899,15 @@
 struct vectorlike_header
   {
     EMACS_INT size;
-
-    /* Pointer to the next vector-like object.  It is generally a buffer or a
+    /* When the vector is allocated from a vector block, NBYTES is used
+       if the vector is not on a free list, and VECTOR is used otherwise.
+       For large vector-like objects, BUFFER or VECTOR is used as a pointer
+       to the next vector-like object.  It is generally a buffer or a 
        Lisp_Vector alias, so for convenience it is a union instead of a
        pointer: this way, one can write P->next.vector instead of ((struct
        Lisp_Vector *) P->next).  */
     union {
+      EMACS_INT nbytes;
       struct buffer *buffer;
       struct Lisp_Vector *vector;
     } next;


  reply	other threads:[~2012-05-22  8:24 UTC|newest]

Thread overview: 62+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-12-06  5:22 Proposal: block-based vector allocator Dmitry Antipov
2011-12-06 13:35 ` Stefan Monnier
2011-12-06 15:14   ` Dmitry Antipov
2011-12-06 19:39     ` Stefan Monnier
2011-12-07  5:05       ` Dmitry Antipov
2011-12-07 12:27         ` Carsten Mattner
2011-12-07 13:52         ` Stefan Monnier
2011-12-07 16:08           ` Dmitry Antipov
2011-12-07 16:30             ` Stefan Monnier
2011-12-08  8:50               ` Dmitry Antipov
2011-12-08 13:52                 ` Stefan Monnier
2011-12-08  1:53             ` Stephen J. Turnbull
2011-12-08  4:41               ` Dmitry Antipov
2011-12-08 14:10                 ` Stefan Monnier
2011-12-08 16:48                   ` Dmitry Antipov
2011-12-08 19:58                     ` Stefan Monnier
2011-12-09  7:32                       ` Eli Zaretskii
2011-12-09  9:04                       ` Dmitry Antipov
2011-12-09 14:05                         ` Stefan Monnier
2011-12-09 16:15                           ` Dmitry Antipov
2011-12-09 21:04                             ` Stefan Monnier
2011-12-11 13:18                               ` Dmitry Antipov
2011-12-12  3:07                               ` Dmitry Antipov
2011-12-12 16:24                                 ` Stefan Monnier
2011-12-09  4:44                 ` Stephen J. Turnbull
     [not found] ` <jwvaa1yjs21.fsf-monnier+emacs@gnu.org>
2012-05-17  7:58   ` Dmitry Antipov
2012-05-18 17:40     ` Stefan Monnier
2012-05-21 12:19       ` Dmitry Antipov
2012-05-21 13:02         ` Andreas Schwab
2012-05-21 13:48           ` Dmitry Antipov
2012-05-21 15:07             ` Andreas Schwab
2012-05-22  5:23             ` Ken Raeburn
2012-05-21 20:12         ` Stefan Monnier
2012-05-22  8:24           ` Dmitry Antipov [this message]
2012-05-31 13:44           ` Dmitry Antipov
2012-05-31 15:43             ` Paul Eggert
2012-06-01  5:15               ` Dmitry Antipov
2012-06-01  5:44                 ` Paul Eggert
2012-06-01  9:06                   ` Dmitry Antipov
2012-06-01 17:36                     ` Stefan Monnier
2012-06-02  0:32                       ` Paul Eggert
2012-06-02  7:41                         ` Eli Zaretskii
2012-06-03  6:49                           ` Paul Eggert
2012-06-03 14:26                             ` Eli Zaretskii
2012-05-31 21:16             ` Stefan Monnier
2012-06-01  7:34               ` Dmitry Antipov
2012-06-01 17:40                 ` Stefan Monnier
2012-06-01 17:43                 ` Stefan Monnier
2012-06-06  7:02                   ` Dmitry Antipov
2012-06-06 13:13                     ` Stefan Monnier
2012-06-06 14:58                       ` Dmitry Antipov
2012-06-06 19:18                         ` Stefan Monnier
2012-06-07 10:03                           ` Dmitry Antipov
2012-06-07 14:07                             ` Stefan Monnier
2012-06-08  5:50                               ` Dmitry Antipov
2012-06-08  6:17                                 ` Stefan Monnier
2012-06-08  8:49                                   ` Dmitry Antipov
2012-06-08  8:53                                     ` Eli Zaretskii
2012-06-08  9:41                                       ` Eli Zaretskii
2012-06-08 10:00                                         ` Eli Zaretskii
2012-06-08  6:57                                 ` Eli Zaretskii
2012-06-08  6:38                             ` Paul Eggert

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4FBB4D45.4070009@yandex.ru \
    --to=dmantipov@yandex.ru \
    --cc=emacs-devel@gnu.org \
    --cc=monnier@iro.umontreal.ca \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).