all messages for Emacs-related lists mirrored at yhetil.org
 help / color / mirror / code / Atom feed
* [RFC] allocating intervals as vectors
@ 2012-08-29  6:11 Dmitry Antipov
  2012-08-29 13:51 ` Stefan Monnier
  0 siblings, 1 reply; 2+ messages in thread
From: Dmitry Antipov @ 2012-08-29  6:11 UTC (permalink / raw)
  To: Emacs development discussions

[-- Attachment #1: Type: text/plain, Size: 797 bytes --]

This patch converts intervals to a pseudovectors, thus eliminating special
allocation and sweeping stuff.  Although I think that intervals are more
similar to misc objects rather than vectors, they're just too large for
union Lisp_Misc (largest misc objects has 4 full-sized slots and interval
has 6).  Since intervals aren't referenced via Lisp_Objects, we don't need
special pseudovector type and can use PVEC_OTHER.

Also I think that this change may be useful for further GC development.
It's very useful to have the ability to represent all collectable objects
as Lisp_Objects, for example, to avoid tricks when recording inter-generational
pointers - if intervals are usual pseudovectors, interval pointers may be
passed and recorded as Lisp_Objects after introducing PVEC_INTERVAL.

Dmitry

[-- Attachment #2: pvec_interval.patch --]
[-- Type: text/plain, Size: 13004 bytes --]

=== modified file 'src/alloc.c'
--- src/alloc.c	2012-08-27 09:30:26 +0000
+++ src/alloc.c	2012-08-29 05:17:06 +0000
@@ -260,7 +260,6 @@
 static Lisp_Object Qstrings;
 static Lisp_Object Qvectors;
 static Lisp_Object Qfloats;
-static Lisp_Object Qintervals;
 static Lisp_Object Qbuffers;
 static Lisp_Object Qstring_bytes, Qvector_slots, Qheap;
 static Lisp_Object Qgc_cons_threshold;
@@ -275,6 +274,7 @@
 static Lisp_Object make_pure_vector (ptrdiff_t);
 static void mark_glyph_matrix (struct glyph_matrix *);
 static void mark_face_cache (struct face_cache *);
+static void mark_vectorlike (struct Lisp_Vector *);
 
 #if !defined REL_ALLOC || defined SYSTEM_MALLOC
 static void refill_memory_reserve (void);
@@ -1458,88 +1458,6 @@
 #endif /* not SYNC_INPUT */
 #endif /* not SYSTEM_MALLOC */
 
-
-\f
-/***********************************************************************
-			 Interval Allocation
- ***********************************************************************/
-
-/* Number of intervals allocated in an interval_block structure.
-   The 1020 is 1024 minus malloc overhead.  */
-
-#define INTERVAL_BLOCK_SIZE \
-  ((1020 - sizeof (struct interval_block *)) / sizeof (struct interval))
-
-/* Intervals are allocated in chunks in form of an interval_block
-   structure.  */
-
-struct interval_block
-{
-  /* Place `intervals' first, to preserve alignment.  */
-  struct interval intervals[INTERVAL_BLOCK_SIZE];
-  struct interval_block *next;
-};
-
-/* Current interval block.  Its `next' pointer points to older
-   blocks.  */
-
-static struct interval_block *interval_block;
-
-/* Index in interval_block above of the next unused interval
-   structure.  */
-
-static int interval_block_index = INTERVAL_BLOCK_SIZE;
-
-/* Number of free and live intervals.  */
-
-static EMACS_INT total_free_intervals, total_intervals;
-
-/* List of free intervals.  */
-
-static INTERVAL interval_free_list;
-
-/* Return a new interval.  */
-
-INTERVAL
-make_interval (void)
-{
-  INTERVAL val;
-
-  /* eassert (!handling_signal); */
-
-  MALLOC_BLOCK_INPUT;
-
-  if (interval_free_list)
-    {
-      val = interval_free_list;
-      interval_free_list = INTERVAL_PARENT (interval_free_list);
-    }
-  else
-    {
-      if (interval_block_index == INTERVAL_BLOCK_SIZE)
-	{
-	  struct interval_block *newi
-	    = lisp_malloc (sizeof *newi, MEM_TYPE_NON_LISP);
-
-	  newi->next = interval_block;
-	  interval_block = newi;
-	  interval_block_index = 0;
-	  total_free_intervals += INTERVAL_BLOCK_SIZE;
-	}
-      val = &interval_block->intervals[interval_block_index++];
-    }
-
-  MALLOC_UNBLOCK_INPUT;
-
-  consing_since_gc += sizeof (struct interval);
-  intervals_consed++;
-  total_free_intervals--;
-  RESET_INTERVAL (val);
-  val->gcmarkbit = 0;
-  return val;
-}
-
-
 /* Mark Lisp objects in interval I.  */
 
 static void
@@ -1547,16 +1465,14 @@
 {
   /* Intervals should never be shared.  So, if extra internal checking is
      enabled, GC aborts if it seems to have visited an interval twice.  */
-  eassert (!i->gcmarkbit);
-  i->gcmarkbit = 1;
-  mark_object (i->plist);
+  mark_vectorlike ((struct Lisp_Vector *) i);
 }
 
 /* Mark the interval tree rooted in I.  */
 
 #define MARK_INTERVAL_TREE(i)					\
   do {								\
-    if (i && !i->gcmarkbit)					\
+    if (i && !VECTOR_MARKED_P (i))				\
       traverse_intervals_noorder (i, mark_interval, Qnil);	\
   } while (0)
 
@@ -3336,6 +3252,17 @@
   return p;
 }
 
+INTERVAL
+allocate_interval (void)
+{
+  INTERVAL i;
+
+  /* Intervals has no their own pseudovector type, but this may be changed.  */
+  i = ALLOCATE_PSEUDOVECTOR (struct interval, total_length, PVEC_OTHER);
+  RESET_INTERVAL (i);
+  return i;
+}
+
 DEFUN ("make-vector", Fmake_vector, Smake_vector, 2, 2, 0,
        doc: /* Return a newly created vector of length LENGTH, with each element being INIT.
 See also the function `vector'.  */)
@@ -5597,7 +5524,6 @@
       tot += total_string_bytes;
       tot += total_vector_slots * word_size;
       tot += total_floats  * sizeof (struct Lisp_Float);
-      tot += total_intervals * sizeof (struct interval);
       tot += total_strings * sizeof (struct Lisp_String);
 
       tot *= XFLOAT_DATA (Vgc_cons_percentage);
@@ -5620,8 +5546,8 @@
 
   unbind_to (count, Qnil);
   {
-    Lisp_Object total[11];
-    int total_size = 10;
+    Lisp_Object total[10];
+    int total_size = 9;
 
     total[0] = list4 (Qconses, make_number (sizeof (struct Lisp_Cons)),
 		      bounded_number (total_conses),
@@ -5653,16 +5579,12 @@
 		      bounded_number (total_floats),
 		      bounded_number (total_free_floats));
 
-    total[8] = list4 (Qintervals, make_number (sizeof (struct interval)),
-		      bounded_number (total_intervals),
-		      bounded_number (total_free_intervals));
-
-    total[9] = list3 (Qbuffers, make_number (sizeof (struct buffer)),
+    total[8] = list3 (Qbuffers, make_number (sizeof (struct buffer)),
 		      bounded_number (total_buffers));
 
 #ifdef DOUG_LEA_MALLOC
     total_size++;
-    total[10] = list4 (Qheap, make_number (1024),
+    total[9] = list4 (Qheap, make_number (1024),
                        bounded_number ((mallinfo ().uordblks + 1023) >> 10),
                        bounded_number ((mallinfo ().fordblks + 1023) >> 10));
 #endif
@@ -5674,7 +5596,7 @@
     /* Compute average percentage of zombies.  */
     double nlive
       = (total_conses + total_symbols + total_markers + total_strings
-         + total_vectors + total_floats + total_intervals + total_buffers);
+         + total_vectors + total_floats + total_buffers);
 
     avg_live = (avg_live * ngcs + nlive) / (ngcs + 1);
     max_live = max (nlive, max_live);
@@ -6391,55 +6313,6 @@
     total_free_floats = num_free;
   }
 
-  /* Put all unmarked intervals on free list */
-  {
-    register struct interval_block *iblk;
-    struct interval_block **iprev = &interval_block;
-    register int lim = interval_block_index;
-    EMACS_INT num_free = 0, num_used = 0;
-
-    interval_free_list = 0;
-
-    for (iblk = interval_block; iblk; iblk = *iprev)
-      {
-	register int i;
-	int this_free = 0;
-
-	for (i = 0; i < lim; i++)
-	  {
-	    if (!iblk->intervals[i].gcmarkbit)
-	      {
-		set_interval_parent (&iblk->intervals[i], interval_free_list);
-		interval_free_list = &iblk->intervals[i];
-		this_free++;
-	      }
-	    else
-	      {
-		num_used++;
-		iblk->intervals[i].gcmarkbit = 0;
-	      }
-	  }
-	lim = INTERVAL_BLOCK_SIZE;
-	/* If this block contains only free intervals and we have already
-	   seen more than two blocks worth of free intervals then
-	   deallocate this block.  */
-	if (this_free == INTERVAL_BLOCK_SIZE && num_free > INTERVAL_BLOCK_SIZE)
-	  {
-	    *iprev = iblk->next;
-	    /* Unhook from the free list.  */
-	    interval_free_list = INTERVAL_PARENT (&iblk->intervals[0]);
-	    lisp_free (iblk);
-	  }
-	else
-	  {
-	    num_free += this_free;
-	    iprev = &iblk->next;
-	  }
-      }
-    total_intervals = num_used;
-    total_free_intervals = num_free;
-  }
-
   /* Put all unmarked symbols on free list */
   {
     register struct symbol_block *sblk;
@@ -6614,7 +6487,7 @@
 The counters wrap around from the largest positive integer to zero.
 Garbage collection does not decrease them.
 The elements of the value are as follows:
-  (CONSES FLOATS VECTOR-CELLS SYMBOLS STRING-CHARS MISCS INTERVALS STRINGS)
+  (CONSES FLOATS VECTOR-CELLS SYMBOLS STRING-CHARS MISCS STRINGS)
 All are in units of 1 = one object consed
 except for VECTOR-CELLS and STRING-CHARS, which count the total length of
 objects consed.
@@ -6623,14 +6496,13 @@
   (but the contents of a buffer's text do not count here).  */)
   (void)
 {
-  return listn (CONSTYPE_HEAP, 8,
+  return listn (CONSTYPE_HEAP, 7,
 		bounded_number (cons_cells_consed),
 		bounded_number (floats_consed),
 		bounded_number (vector_cells_consed),
 		bounded_number (symbols_consed),
 		bounded_number (string_chars_consed),
 		bounded_number (misc_objects_consed),
-		bounded_number (intervals_consed),
 		bounded_number (strings_consed));
 }
 
@@ -6794,9 +6666,6 @@
 These include markers and overlays, plus certain objects not visible
 to users.  */);
 
-  DEFVAR_INT ("intervals-consed", intervals_consed,
-	      doc: /* Number of intervals that have been consed so far.  */);
-
   DEFVAR_INT ("strings-consed", strings_consed,
 	      doc: /* Number of strings that have been consed so far.  */);
 
@@ -6833,7 +6702,6 @@
   DEFSYM (Qstrings, "strings");
   DEFSYM (Qvectors, "vectors");
   DEFSYM (Qfloats, "floats");
-  DEFSYM (Qintervals, "intervals");
   DEFSYM (Qbuffers, "buffers");
   DEFSYM (Qstring_bytes, "string-bytes");
   DEFSYM (Qvector_slots, "vector-slots");

=== modified file 'src/intervals.c'
--- src/intervals.c	2012-08-18 06:06:39 +0000
+++ src/intervals.c	2012-08-29 04:59:49 +0000
@@ -104,7 +104,7 @@
 
   CHECK_IMPURE (parent);
 
-  new = make_interval ();
+  new = allocate_interval ();
 
   if (BUFFERP (parent))
     {
@@ -546,7 +546,7 @@
 INTERVAL
 split_interval_right (INTERVAL interval, ptrdiff_t offset)
 {
-  INTERVAL new = make_interval ();
+  INTERVAL new = allocate_interval ();
   ptrdiff_t position = interval->position;
   ptrdiff_t new_length = LENGTH (interval) - offset;
 
@@ -591,7 +591,7 @@
 INTERVAL
 split_interval_left (INTERVAL interval, ptrdiff_t offset)
 {
-  INTERVAL new = make_interval ();
+  INTERVAL new = allocate_interval ();
   ptrdiff_t new_length = offset;
 
   new->position = interval->position;
@@ -1534,7 +1534,7 @@
 static INTERVAL
 reproduce_interval (INTERVAL source)
 {
-  register INTERVAL target = make_interval ();
+  register INTERVAL target = allocate_interval ();
 
   target->total_length = source->total_length;
   target->position = source->position;
@@ -2270,7 +2270,7 @@
       && DEFAULT_INTERVAL_P (i))
     return NULL;
 
-  new = make_interval ();
+  new = allocate_interval ();
   new->position = 0;
   got = (LENGTH (i) - (start - i->position));
   new->total_length = length;

=== modified file 'src/intervals.h'
--- src/intervals.h	2012-08-17 21:12:11 +0000
+++ src/intervals.h	2012-08-29 05:36:27 +0000
@@ -27,17 +27,29 @@
 
 struct interval
 {
-  /* The first group of entries deal with the tree structure.  */
-
-  ptrdiff_t total_length;       /* Length of myself and both children.  */
-  ptrdiff_t position;	        /* Cache of interval's character position.  */
-				/* This field is usually updated
-				   simultaneously with an interval
-				   traversal, there is no guarantee
-				   that it is valid for a random
-				   interval.  */
-  struct interval *left;	/* Intervals which precede me.  */
-  struct interval *right;	/* Intervals which succeed me.  */
+  /* Although intervals are not used as Lisp_Objects, they are
+     allocated as pseudodectors and should have a standard header.  */
+  struct vectorlike_header header;
+
+  /* All properties of this interval.  This is
+     the only slot which is traced by GC.  */
+  Lisp_Object plist;
+
+  /* This group of entries deal with the tree structure.  */
+
+  /* Length of myself and both children.  */
+  ptrdiff_t total_length;
+
+  /* Cache of interval's character position.  This field is usually
+     updated simultaneously with an interval traversal, and there is
+     no guarantee that it is valid for a random interval.  */
+  ptrdiff_t position;
+
+  /* Intervals which precede me.  */
+  struct interval *left;
+
+  /* Intervals which succeed me.  */
+  struct interval *right;
 
   /* Parent in the tree, or the Lisp_Object containing this interval tree.  */
   union
@@ -45,20 +57,24 @@
     struct interval *interval;
     Lisp_Object obj;
   } up;
+
+  /* Non-zero if OBJ slot from the above is in use.
+     Even if this is so, OBJ is never traced by GC.  */
   unsigned int up_obj : 1;
 
-  unsigned gcmarkbit : 1;
-
-  /* The remaining components are `properties' of the interval.
-     The first four are duplicates for things which can be on the list,
-     for purposes of speed.  */
-
-  unsigned int write_protect : 1;   /* Non-zero means can't modify.  */
-  unsigned int visible : 1;	    /* Zero means don't display.  */
-  unsigned int front_sticky : 1;    /* Non-zero means text inserted just
-				       before this interval goes into it.  */
-  unsigned int rear_sticky : 1;	    /* Likewise for just after it.  */
-  Lisp_Object plist;		    /* Other properties.  */
+  /* These properties can be on the list, but cached for the sake of speed.  */
+
+  /* Non-zero means can't modify.  */
+  unsigned int write_protect : 1;
+
+  /* Zero means don't display.  */
+  unsigned int visible : 1;
+
+  /* Non-zero means text inserted just before this interval goes into it.  */
+  unsigned int front_sticky : 1;
+
+  /* Likewise for just after it.  */
+  unsigned int rear_sticky : 1;
 };
 
 /* These are macros for dealing with the interval tree.  */
@@ -221,7 +237,7 @@
 
 /* Declared in alloc.c.  */
 
-extern INTERVAL make_interval (void);
+extern INTERVAL allocate_interval (void);
 
 /* Declared in intervals.c.  */
 


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [RFC] allocating intervals as vectors
  2012-08-29  6:11 [RFC] allocating intervals as vectors Dmitry Antipov
@ 2012-08-29 13:51 ` Stefan Monnier
  0 siblings, 0 replies; 2+ messages in thread
From: Stefan Monnier @ 2012-08-29 13:51 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: Emacs development discussions

If IIUC this adds 2 words for each interval object (from 7 to 9).
That seems high for the minor benefits we get in return.


        Stefan


> -  unsigned gcmarkbit : 1;
> -
> -  /* The remaining components are `properties' of the interval.
> -     The first four are duplicates for things which can be on the list,
> -     for purposes of speed.  */
> -
> -  unsigned int write_protect : 1;   /* Non-zero means can't modify.  */
> -  unsigned int visible : 1;	    /* Zero means don't display.  */
> -  unsigned int front_sticky : 1;    /* Non-zero means text inserted just
> -				       before this interval goes into it.  */
> -  unsigned int rear_sticky : 1;	    /* Likewise for just after it.  */
> -  Lisp_Object plist;		    /* Other properties.  */
> +  /* These properties can be on the list, but cached for the sake of speed.  */
> +
> +  /* Non-zero means can't modify.  */
> +  unsigned int write_protect : 1;
> +
> +  /* Zero means don't display.  */
> +  unsigned int visible : 1;
> +
> +  /* Non-zero means text inserted just before this interval goes into it.  */
> +  unsigned int front_sticky : 1;
> +
> +  /* Likewise for just after it.  */
> +  unsigned int rear_sticky : 1;
>  };

Could you please try to avoid such cosmetic changes in your patches?
Especially since they make the code more difficult to read in my
opinion (the whole struct doesn't fit as comfortably on a single screen,
and there's more comments interspersed with the field declarations).



^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2012-08-29 13:51 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-08-29  6:11 [RFC] allocating intervals as vectors Dmitry Antipov
2012-08-29 13:51 ` Stefan Monnier

Code repositories for project(s) associated with this external index

	https://git.savannah.gnu.org/cgit/emacs.git
	https://git.savannah.gnu.org/cgit/emacs/org-mode.git

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.