unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
* [RFC] position caches
@ 2013-03-12  7:56 Dmitry Antipov
  2013-03-12 17:08 ` Eli Zaretskii
  0 siblings, 1 reply; 4+ messages in thread
From: Dmitry Antipov @ 2013-03-12  7:56 UTC (permalink / raw)
  To: Emacs development discussions; +Cc: Eli Zaretskii

[-- Attachment #1: Type: text/plain, Size: 390 bytes --]

This was designed in attempt to avoid long buffer scans in bidi_find_paragraph_start.
The same stuff may be used to implement simple per-buffer charpos <-> bytepos cache.
Caching the result of bidi_find_paragraph_start may improve the speed of backward
scrolling/movement (I've seen ~6x speedup for 60M buffer with average string of 5K
characters). Comments are highly appreciated.

Dmitry

[-- Attachment #2: pos_cache.patch --]
[-- Type: text/plain, Size: 9997 bytes --]

=== modified file 'src/bidi.c'
--- src/bidi.c	2013-03-08 09:34:35 +0000
+++ src/bidi.c	2013-03-12 07:39:27 +0000
@@ -1097,24 +1097,33 @@
 static ptrdiff_t
 bidi_find_paragraph_start (ptrdiff_t pos, ptrdiff_t pos_byte)
 {
-  Lisp_Object re = paragraph_start_re;
-  ptrdiff_t limit = ZV, limit_byte = ZV_BYTE;
-  ptrdiff_t n = 0;
+  struct buffer *b = current_buffer;
 
-  while (pos_byte > BEGV_BYTE
-	 && n++ < MAX_PARAGRAPH_SEARCH
-	 && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
+  if (valid_pos_cache (b, &b->bidi_paragraph_cache)
+      && b->bidi_paragraph_cache.bufpos == pos_byte)
+    return b->bidi_paragraph_cache.posval;
+  else
     {
-      /* FIXME: What if the paragraph beginning is covered by a
-	 display string?  And what if a display string covering some
-	 of the text over which we scan back includes
-	 paragraph_start_re?  */
-      DEC_BOTH (pos, pos_byte);
-      pos = find_newline_no_quit (pos, pos_byte, -1, &pos_byte);
+      ptrdiff_t n = 0, savedpos = pos_byte;
+
+      while (pos_byte > BEGV_BYTE
+	     && n++ < MAX_PARAGRAPH_SEARCH
+	     && fast_looking_at (paragraph_start_re, pos, pos_byte,
+				 ZV, ZV_BYTE, Qnil) < 0)
+	{
+	  /* FIXME: What if the paragraph beginning is covered by a
+	     display string?  And what if a display string covering some
+	     of the text over which we scan back includes
+	     paragraph_start_re?  */
+	  DEC_BOTH (pos, pos_byte);
+	  pos = find_newline_no_quit (pos, pos_byte, -1, &pos_byte);
+	}
+      if (n >= MAX_PARAGRAPH_SEARCH)
+	pos_byte = BEGV_BYTE;
+
+      update_pos_cache (b, &b->bidi_paragraph_cache, savedpos, pos_byte);
+      return pos_byte;
     }
-  if (n >= MAX_PARAGRAPH_SEARCH)
-    pos_byte = BEGV_BYTE;
-  return pos_byte;
 }
 
 /* On a 3.4 GHz machine, searching forward for a strong directional

=== modified file 'src/buffer.c'
--- src/buffer.c	2013-01-19 20:04:33 +0000
+++ src/buffer.c	2013-03-12 07:39:37 +0000
@@ -931,6 +931,8 @@
 {
   bset_filename (b, Qnil);
   bset_file_truename (b, Qnil);
+  init_pos_cache (&b->char_byte_cache);
+  init_pos_cache (&b->bidi_paragraph_cache);
   bset_directory (b, current_buffer ? BVAR (current_buffer, directory) : Qnil);
   b->modtime = make_emacs_time (0, UNKNOWN_MODTIME_NSECS);
   b->modtime_size = -1;
@@ -1844,8 +1846,6 @@
      insisted on circular lists) so allow quitting here.  */
   frames_discard_buffer (buffer);
 
-  clear_charpos_cache (b);
-
   tem = Vinhibit_quit;
   Vinhibit_quit = Qt;
   /* Remove the buffer from the list of all buffers.  */
@@ -2352,6 +2352,8 @@
   current_buffer->clip_changed = 1;	other_buffer->clip_changed = 1;
   swapfield (newline_cache, struct region_cache *);
   swapfield (width_run_cache, struct region_cache *);
+  swapfield (char_byte_cache, struct pos_cache);
+  swapfield (bidi_paragraph_cache, struct pos_cache);
   current_buffer->prevent_redisplay_optimizations_p = 1;
   other_buffer->prevent_redisplay_optimizations_p = 1;
   swapfield (overlays_before, struct Lisp_Overlay *);
@@ -2459,9 +2461,6 @@
      instead.  */
   bset_undo_list (current_buffer, Qt);
 
-  /* If the cached position is for this buffer, clear it out.  */
-  clear_charpos_cache (current_buffer);
-
   if (NILP (flag))
     begv = BEGV_BYTE, zv = ZV_BYTE;
   else

=== modified file 'src/buffer.h'
--- src/buffer.h	2013-01-17 05:52:13 +0000
+++ src/buffer.h	2013-03-12 07:39:54 +0000
@@ -412,7 +412,18 @@
 
 #define BUF_FETCH_BYTE(buf, n) \
   *(BUF_BYTE_ADDRESS ((buf), (n)))
-\f
+
+/* Convenient method to cache the correspondence between two
+   buffer positions.  Unlike markers, the cache is not updated
+   when the buffer text is changed but just invalidated.  */
+
+struct pos_cache
+  {
+    EMACS_INT modiff;
+    ptrdiff_t bufpos;
+    ptrdiff_t posval;
+  };
+
 /* Define the actual buffer data structures.  */
 
 /* This data structure describes the actual text contents of a buffer.
@@ -861,6 +872,14 @@
   /* Position where the overlay lists are centered.  */
   ptrdiff_t overlay_center;
 
+  /* Used by buf_charpos_to_bytepos and buf_bytepos_to_charpos.
+     Also updated with INC_BOTH and DEC_BOTH macros.  */
+  struct pos_cache char_byte_cache;
+
+  /* Used by bidi_find_paragraph_start in attempt to avoid
+     long buffer scans.  */
+  struct pos_cache bidi_paragraph_cache;
+
   /* Changes in the buffer are recorded here for undo, and t means
      don't record anything.  This information belongs to the base
      buffer of an indirect buffer.  But we can't store it in the
@@ -869,6 +888,30 @@
   Lisp_Object INTERNAL_FIELD (undo_list);
 };
 
+/* Simple position cache API.  */
+
+BUFFER_INLINE void
+init_pos_cache (struct pos_cache *pc)
+{
+  pc->modiff = 1;
+  pc->bufpos = pc->posval = 0;
+}
+
+BUFFER_INLINE bool
+valid_pos_cache (struct buffer *b, struct pos_cache *pc)
+{
+  return BUF_MODIFF (b) == pc->modiff;
+}
+
+BUFFER_INLINE void
+update_pos_cache (struct buffer *b, struct pos_cache *pc,
+		  ptrdiff_t pos, ptrdiff_t val)
+{
+  pc->modiff = BUF_MODIFF (b);
+  pc->bufpos = pos;
+  pc->posval = val;
+}
+
 /* Most code should use these functions to set Lisp fields in struct
    buffer.  */
 BUFFER_INLINE void

=== modified file 'src/character.h'
--- src/character.h	2012-09-26 20:00:29 +0000
+++ src/character.h	2013-03-12 07:40:09 +0000
@@ -501,10 +501,14 @@
   do								\
     {								\
       (charpos)++;						\
-      if (NILP (BVAR (current_buffer, enable_multibyte_characters)))	\
+      if (NILP (BVAR (current_buffer,				\
+		      enable_multibyte_characters)))		\
 	(bytepos)++;						\
       else							\
 	INC_POS ((bytepos));					\
+      update_pos_cache (current_buffer,				\
+			&current_buffer->char_byte_cache,	\
+			charpos, bytepos);			\
     }								\
   while (0)
 
@@ -515,10 +519,14 @@
   do								\
     {								\
       (charpos)--;						\
-      if (NILP (BVAR (current_buffer, enable_multibyte_characters)))	\
+      if (NILP (BVAR (current_buffer,				\
+		      enable_multibyte_characters)))		\
 	(bytepos)--;						\
       else							\
 	DEC_POS ((bytepos));					\
+      update_pos_cache (current_buffer,				\
+			&current_buffer->char_byte_cache,	\
+			charpos, bytepos);			\
     }								\
   while (0)
 

=== modified file 'src/lisp.h'
--- src/lisp.h	2013-03-11 04:02:06 +0000
+++ src/lisp.h	2013-03-12 07:14:22 +0000
@@ -3319,7 +3319,6 @@
 
 extern ptrdiff_t marker_position (Lisp_Object);
 extern ptrdiff_t marker_byte_position (Lisp_Object);
-extern void clear_charpos_cache (struct buffer *);
 extern ptrdiff_t buf_charpos_to_bytepos (struct buffer *, ptrdiff_t);
 extern ptrdiff_t buf_bytepos_to_charpos (struct buffer *, ptrdiff_t);
 extern void unchain_marker (struct Lisp_Marker *marker);

=== modified file 'src/marker.c'
--- src/marker.c	2013-02-19 14:44:03 +0000
+++ src/marker.c	2013-03-12 07:40:43 +0000
@@ -24,14 +24,6 @@
 #include "character.h"
 #include "buffer.h"
 
-/* Record one cached position found recently by
-   buf_charpos_to_bytepos or buf_bytepos_to_charpos.  */
-
-static ptrdiff_t cached_charpos;
-static ptrdiff_t cached_bytepos;
-static struct buffer *cached_buffer;
-static EMACS_INT cached_modiff;
-
 /* Juanma Barranquero <lekktu@gmail.com> reported ~3x increased
    bootstrap time when byte_char_debug_check is enabled; so this
    is never turned on by --enable-checking configure option.  */
@@ -69,13 +61,6 @@
 
 #endif /* MARKER_DEBUG */
 
-void
-clear_charpos_cache (struct buffer *b)
-{
-  if (cached_buffer == b)
-    cached_buffer = 0;
-}
-\f
 /* Converting between character positions and byte positions.  */
 
 /* There are several places in the buffer where we know
@@ -164,8 +149,8 @@
   CONSIDER (BUF_BEGV (b), BUF_BEGV_BYTE (b));
   CONSIDER (BUF_ZV (b), BUF_ZV_BYTE (b));
 
-  if (b == cached_buffer && BUF_MODIFF (b) == cached_modiff)
-    CONSIDER (cached_charpos, cached_bytepos);
+  if (valid_pos_cache (b, &b->char_byte_cache))
+    CONSIDER (b->char_byte_cache.bufpos, b->char_byte_cache.posval);
 
   for (tail = BUF_MARKERS (b); tail; tail = tail->next)
     {
@@ -199,11 +184,7 @@
 	build_marker (b, best_below, best_below_byte);
 
       byte_char_debug_check (b, best_below, best_below_byte);
-
-      cached_buffer = b;
-      cached_modiff = BUF_MODIFF (b);
-      cached_charpos = best_below;
-      cached_bytepos = best_below_byte;
+      update_pos_cache (b, &b->char_byte_cache, best_below, best_below_byte);
 
       return best_below_byte;
     }
@@ -224,11 +205,7 @@
 	build_marker (b, best_above, best_above_byte);
 
       byte_char_debug_check (b, best_above, best_above_byte);
-
-      cached_buffer = b;
-      cached_modiff = BUF_MODIFF (b);
-      cached_charpos = best_above;
-      cached_bytepos = best_above_byte;
+      update_pos_cache (b, &b->char_byte_cache, best_above, best_above_byte);
 
       return best_above_byte;
     }
@@ -307,8 +284,8 @@
   CONSIDER (BUF_BEGV_BYTE (b), BUF_BEGV (b));
   CONSIDER (BUF_ZV_BYTE (b), BUF_ZV (b));
 
-  if (b == cached_buffer && BUF_MODIFF (b) == cached_modiff)
-    CONSIDER (cached_bytepos, cached_charpos);
+  if (valid_pos_cache (b, &b->char_byte_cache))
+    CONSIDER (b->char_byte_cache.posval, b->char_byte_cache.bufpos);
 
   for (tail = BUF_MARKERS (b); tail; tail = tail->next)
     {
@@ -344,11 +321,7 @@
 	build_marker (b, best_below, best_below_byte);
 
       byte_char_debug_check (b, best_below, best_below_byte);
-
-      cached_buffer = b;
-      cached_modiff = BUF_MODIFF (b);
-      cached_charpos = best_below;
-      cached_bytepos = best_below_byte;
+      update_pos_cache (b, &b->char_byte_cache, best_below, best_below_byte);
 
       return best_below;
     }
@@ -371,11 +344,7 @@
 	build_marker (b, best_above, best_above_byte);
 
       byte_char_debug_check (b, best_above, best_above_byte);
-
-      cached_buffer = b;
-      cached_modiff = BUF_MODIFF (b);
-      cached_charpos = best_above;
-      cached_bytepos = best_above_byte;
+      update_pos_cache (b, &b->char_byte_cache, best_above, best_above_byte);
 
       return best_above;
     }


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2013-03-14 18:23 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-03-12  7:56 [RFC] position caches Dmitry Antipov
2013-03-12 17:08 ` Eli Zaretskii
2013-03-14  5:40   ` Dmitry Antipov
2013-03-14 18:23     ` Eli Zaretskii

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).