unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
* [RFC] position caches
@ 2013-03-12  7:56 Dmitry Antipov
  2013-03-12 17:08 ` Eli Zaretskii
  0 siblings, 1 reply; 4+ messages in thread
From: Dmitry Antipov @ 2013-03-12  7:56 UTC (permalink / raw)
  To: Emacs development discussions; +Cc: Eli Zaretskii

[-- Attachment #1: Type: text/plain, Size: 390 bytes --]

This was designed in attempt to avoid long buffer scans in bidi_find_paragraph_start.
The same stuff may be used to implement simple per-buffer charpos <-> bytepos cache.
Caching the result of bidi_find_paragraph_start may improve the speed of backward
scrolling/movement (I've seen ~6x speedup for 60M buffer with average string of 5K
characters). Comments are highly appreciated.

Dmitry

[-- Attachment #2: pos_cache.patch --]
[-- Type: text/plain, Size: 9997 bytes --]

=== modified file 'src/bidi.c'
--- src/bidi.c	2013-03-08 09:34:35 +0000
+++ src/bidi.c	2013-03-12 07:39:27 +0000
@@ -1097,24 +1097,33 @@
 static ptrdiff_t
 bidi_find_paragraph_start (ptrdiff_t pos, ptrdiff_t pos_byte)
 {
-  Lisp_Object re = paragraph_start_re;
-  ptrdiff_t limit = ZV, limit_byte = ZV_BYTE;
-  ptrdiff_t n = 0;
+  struct buffer *b = current_buffer;
 
-  while (pos_byte > BEGV_BYTE
-	 && n++ < MAX_PARAGRAPH_SEARCH
-	 && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
+  if (valid_pos_cache (b, &b->bidi_paragraph_cache)
+      && b->bidi_paragraph_cache.bufpos == pos_byte)
+    return b->bidi_paragraph_cache.posval;
+  else
     {
-      /* FIXME: What if the paragraph beginning is covered by a
-	 display string?  And what if a display string covering some
-	 of the text over which we scan back includes
-	 paragraph_start_re?  */
-      DEC_BOTH (pos, pos_byte);
-      pos = find_newline_no_quit (pos, pos_byte, -1, &pos_byte);
+      ptrdiff_t n = 0, savedpos = pos_byte;
+
+      while (pos_byte > BEGV_BYTE
+	     && n++ < MAX_PARAGRAPH_SEARCH
+	     && fast_looking_at (paragraph_start_re, pos, pos_byte,
+				 ZV, ZV_BYTE, Qnil) < 0)
+	{
+	  /* FIXME: What if the paragraph beginning is covered by a
+	     display string?  And what if a display string covering some
+	     of the text over which we scan back includes
+	     paragraph_start_re?  */
+	  DEC_BOTH (pos, pos_byte);
+	  pos = find_newline_no_quit (pos, pos_byte, -1, &pos_byte);
+	}
+      if (n >= MAX_PARAGRAPH_SEARCH)
+	pos_byte = BEGV_BYTE;
+
+      update_pos_cache (b, &b->bidi_paragraph_cache, savedpos, pos_byte);
+      return pos_byte;
     }
-  if (n >= MAX_PARAGRAPH_SEARCH)
-    pos_byte = BEGV_BYTE;
-  return pos_byte;
 }
 
 /* On a 3.4 GHz machine, searching forward for a strong directional

=== modified file 'src/buffer.c'
--- src/buffer.c	2013-01-19 20:04:33 +0000
+++ src/buffer.c	2013-03-12 07:39:37 +0000
@@ -931,6 +931,8 @@
 {
   bset_filename (b, Qnil);
   bset_file_truename (b, Qnil);
+  init_pos_cache (&b->char_byte_cache);
+  init_pos_cache (&b->bidi_paragraph_cache);
   bset_directory (b, current_buffer ? BVAR (current_buffer, directory) : Qnil);
   b->modtime = make_emacs_time (0, UNKNOWN_MODTIME_NSECS);
   b->modtime_size = -1;
@@ -1844,8 +1846,6 @@
      insisted on circular lists) so allow quitting here.  */
   frames_discard_buffer (buffer);
 
-  clear_charpos_cache (b);
-
   tem = Vinhibit_quit;
   Vinhibit_quit = Qt;
   /* Remove the buffer from the list of all buffers.  */
@@ -2352,6 +2352,8 @@
   current_buffer->clip_changed = 1;	other_buffer->clip_changed = 1;
   swapfield (newline_cache, struct region_cache *);
   swapfield (width_run_cache, struct region_cache *);
+  swapfield (char_byte_cache, struct pos_cache);
+  swapfield (bidi_paragraph_cache, struct pos_cache);
   current_buffer->prevent_redisplay_optimizations_p = 1;
   other_buffer->prevent_redisplay_optimizations_p = 1;
   swapfield (overlays_before, struct Lisp_Overlay *);
@@ -2459,9 +2461,6 @@
      instead.  */
   bset_undo_list (current_buffer, Qt);
 
-  /* If the cached position is for this buffer, clear it out.  */
-  clear_charpos_cache (current_buffer);
-
   if (NILP (flag))
     begv = BEGV_BYTE, zv = ZV_BYTE;
   else

=== modified file 'src/buffer.h'
--- src/buffer.h	2013-01-17 05:52:13 +0000
+++ src/buffer.h	2013-03-12 07:39:54 +0000
@@ -412,7 +412,18 @@
 
 #define BUF_FETCH_BYTE(buf, n) \
   *(BUF_BYTE_ADDRESS ((buf), (n)))
-\f
+
+/* Convenient method to cache the correspondence between two
+   buffer positions.  Unlike markers, the cache is not updated
+   when the buffer text is changed but just invalidated.  */
+
+struct pos_cache
+  {
+    EMACS_INT modiff;
+    ptrdiff_t bufpos;
+    ptrdiff_t posval;
+  };
+
 /* Define the actual buffer data structures.  */
 
 /* This data structure describes the actual text contents of a buffer.
@@ -861,6 +872,14 @@
   /* Position where the overlay lists are centered.  */
   ptrdiff_t overlay_center;
 
+  /* Used by buf_charpos_to_bytepos and buf_bytepos_to_charpos.
+     Also updated with INC_BOTH and DEC_BOTH macros.  */
+  struct pos_cache char_byte_cache;
+
+  /* Used by bidi_find_paragraph_start in attempt to avoid
+     long buffer scans.  */
+  struct pos_cache bidi_paragraph_cache;
+
   /* Changes in the buffer are recorded here for undo, and t means
      don't record anything.  This information belongs to the base
      buffer of an indirect buffer.  But we can't store it in the
@@ -869,6 +888,30 @@
   Lisp_Object INTERNAL_FIELD (undo_list);
 };
 
+/* Simple position cache API.  */
+
+BUFFER_INLINE void
+init_pos_cache (struct pos_cache *pc)
+{
+  pc->modiff = 1;
+  pc->bufpos = pc->posval = 0;
+}
+
+BUFFER_INLINE bool
+valid_pos_cache (struct buffer *b, struct pos_cache *pc)
+{
+  return BUF_MODIFF (b) == pc->modiff;
+}
+
+BUFFER_INLINE void
+update_pos_cache (struct buffer *b, struct pos_cache *pc,
+		  ptrdiff_t pos, ptrdiff_t val)
+{
+  pc->modiff = BUF_MODIFF (b);
+  pc->bufpos = pos;
+  pc->posval = val;
+}
+
 /* Most code should use these functions to set Lisp fields in struct
    buffer.  */
 BUFFER_INLINE void

=== modified file 'src/character.h'
--- src/character.h	2012-09-26 20:00:29 +0000
+++ src/character.h	2013-03-12 07:40:09 +0000
@@ -501,10 +501,14 @@
   do								\
     {								\
       (charpos)++;						\
-      if (NILP (BVAR (current_buffer, enable_multibyte_characters)))	\
+      if (NILP (BVAR (current_buffer,				\
+		      enable_multibyte_characters)))		\
 	(bytepos)++;						\
       else							\
 	INC_POS ((bytepos));					\
+      update_pos_cache (current_buffer,				\
+			&current_buffer->char_byte_cache,	\
+			charpos, bytepos);			\
     }								\
   while (0)
 
@@ -515,10 +519,14 @@
   do								\
     {								\
       (charpos)--;						\
-      if (NILP (BVAR (current_buffer, enable_multibyte_characters)))	\
+      if (NILP (BVAR (current_buffer,				\
+		      enable_multibyte_characters)))		\
 	(bytepos)--;						\
       else							\
 	DEC_POS ((bytepos));					\
+      update_pos_cache (current_buffer,				\
+			&current_buffer->char_byte_cache,	\
+			charpos, bytepos);			\
     }								\
   while (0)
 

=== modified file 'src/lisp.h'
--- src/lisp.h	2013-03-11 04:02:06 +0000
+++ src/lisp.h	2013-03-12 07:14:22 +0000
@@ -3319,7 +3319,6 @@
 
 extern ptrdiff_t marker_position (Lisp_Object);
 extern ptrdiff_t marker_byte_position (Lisp_Object);
-extern void clear_charpos_cache (struct buffer *);
 extern ptrdiff_t buf_charpos_to_bytepos (struct buffer *, ptrdiff_t);
 extern ptrdiff_t buf_bytepos_to_charpos (struct buffer *, ptrdiff_t);
 extern void unchain_marker (struct Lisp_Marker *marker);

=== modified file 'src/marker.c'
--- src/marker.c	2013-02-19 14:44:03 +0000
+++ src/marker.c	2013-03-12 07:40:43 +0000
@@ -24,14 +24,6 @@
 #include "character.h"
 #include "buffer.h"
 
-/* Record one cached position found recently by
-   buf_charpos_to_bytepos or buf_bytepos_to_charpos.  */
-
-static ptrdiff_t cached_charpos;
-static ptrdiff_t cached_bytepos;
-static struct buffer *cached_buffer;
-static EMACS_INT cached_modiff;
-
 /* Juanma Barranquero <lekktu@gmail.com> reported ~3x increased
    bootstrap time when byte_char_debug_check is enabled; so this
    is never turned on by --enable-checking configure option.  */
@@ -69,13 +61,6 @@
 
 #endif /* MARKER_DEBUG */
 
-void
-clear_charpos_cache (struct buffer *b)
-{
-  if (cached_buffer == b)
-    cached_buffer = 0;
-}
-\f
 /* Converting between character positions and byte positions.  */
 
 /* There are several places in the buffer where we know
@@ -164,8 +149,8 @@
   CONSIDER (BUF_BEGV (b), BUF_BEGV_BYTE (b));
   CONSIDER (BUF_ZV (b), BUF_ZV_BYTE (b));
 
-  if (b == cached_buffer && BUF_MODIFF (b) == cached_modiff)
-    CONSIDER (cached_charpos, cached_bytepos);
+  if (valid_pos_cache (b, &b->char_byte_cache))
+    CONSIDER (b->char_byte_cache.bufpos, b->char_byte_cache.posval);
 
   for (tail = BUF_MARKERS (b); tail; tail = tail->next)
     {
@@ -199,11 +184,7 @@
 	build_marker (b, best_below, best_below_byte);
 
       byte_char_debug_check (b, best_below, best_below_byte);
-
-      cached_buffer = b;
-      cached_modiff = BUF_MODIFF (b);
-      cached_charpos = best_below;
-      cached_bytepos = best_below_byte;
+      update_pos_cache (b, &b->char_byte_cache, best_below, best_below_byte);
 
       return best_below_byte;
     }
@@ -224,11 +205,7 @@
 	build_marker (b, best_above, best_above_byte);
 
       byte_char_debug_check (b, best_above, best_above_byte);
-
-      cached_buffer = b;
-      cached_modiff = BUF_MODIFF (b);
-      cached_charpos = best_above;
-      cached_bytepos = best_above_byte;
+      update_pos_cache (b, &b->char_byte_cache, best_above, best_above_byte);
 
       return best_above_byte;
     }
@@ -307,8 +284,8 @@
   CONSIDER (BUF_BEGV_BYTE (b), BUF_BEGV (b));
   CONSIDER (BUF_ZV_BYTE (b), BUF_ZV (b));
 
-  if (b == cached_buffer && BUF_MODIFF (b) == cached_modiff)
-    CONSIDER (cached_bytepos, cached_charpos);
+  if (valid_pos_cache (b, &b->char_byte_cache))
+    CONSIDER (b->char_byte_cache.posval, b->char_byte_cache.bufpos);
 
   for (tail = BUF_MARKERS (b); tail; tail = tail->next)
     {
@@ -344,11 +321,7 @@
 	build_marker (b, best_below, best_below_byte);
 
       byte_char_debug_check (b, best_below, best_below_byte);
-
-      cached_buffer = b;
-      cached_modiff = BUF_MODIFF (b);
-      cached_charpos = best_below;
-      cached_bytepos = best_below_byte;
+      update_pos_cache (b, &b->char_byte_cache, best_below, best_below_byte);
 
       return best_below;
     }
@@ -371,11 +344,7 @@
 	build_marker (b, best_above, best_above_byte);
 
       byte_char_debug_check (b, best_above, best_above_byte);
-
-      cached_buffer = b;
-      cached_modiff = BUF_MODIFF (b);
-      cached_charpos = best_above;
-      cached_bytepos = best_above_byte;
+      update_pos_cache (b, &b->char_byte_cache, best_above, best_above_byte);
 
       return best_above;
     }


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC] position caches
  2013-03-12  7:56 [RFC] position caches Dmitry Antipov
@ 2013-03-12 17:08 ` Eli Zaretskii
  2013-03-14  5:40   ` Dmitry Antipov
  0 siblings, 1 reply; 4+ messages in thread
From: Eli Zaretskii @ 2013-03-12 17:08 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: emacs-devel

> Date: Tue, 12 Mar 2013 11:56:26 +0400
> From: Dmitry Antipov <dmantipov@yandex.ru>
> CC: Eli Zaretskii <eliz@gnu.org>
> 
> This was designed in attempt to avoid long buffer scans in bidi_find_paragraph_start.
> The same stuff may be used to implement simple per-buffer charpos <-> bytepos cache.
> Caching the result of bidi_find_paragraph_start may improve the speed of backward
> scrolling/movement (I've seen ~6x speedup for 60M buffer with average string of 5K
> characters). Comments are highly appreciated.

Thank you for doing this.

> +BUFFER_INLINE bool
> +valid_pos_cache (struct buffer *b, struct pos_cache *pc)
> +{
> +  return BUF_MODIFF (b) == pc->modiff;
> +}

I think there's a fundamental design problem here, when this cache is
used for holding the paragraph start.  bidi_paragraph_cache is
intended to be used by bidi.c, which is invoked as part of iterating
through all the visible parts of a buffer in order to display them.
(Sometimes, it iterates across characters immediately before and after
the window, and very rarely it does that for positions that are very
far from the displayed portion of text.)  The visible portion of a
buffer can include more than one paragraph (each empty line starts a
new paragraph), and each one of these paragraphs can have a different
base direction, depending on the first strong directional character of
each paragraph.

Therefore, bidi.c needs to determine the paragraph start each time it
enters a new paragraph.  It needs to do that even if the buffer didn't
change a bit, because a new paragraph can have a different base
direction.  The bidi_it->new_paragraph flag is set when the iteration
goes out of a paragraph or when the iterator is re-seated far away of
its current position.  In those cases, xdisp.c calls
bidi_paragraph_init, which in turn calls bidi_find_paragraph_start.
You cannot return the beginning of previously found paragraph just
because the buffer didn't change.

So I think the cache needs to track both the beginning and the end
position of a paragraph, and invalidate the cache whenever the
iterator position (bidi_it->charpos etc.) is outside of these limits.

OTOH, I think you can avoid the need to invalidate the cache on every
buffer change, if you use markers for storing the paragraph beginning
and end.  Then you only need to invalidate if the change is at the
marker position itself, or if the paragraph was emptied.  This might
hold the cache valid for quite some time.

Thanks.



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC] position caches
  2013-03-12 17:08 ` Eli Zaretskii
@ 2013-03-14  5:40   ` Dmitry Antipov
  2013-03-14 18:23     ` Eli Zaretskii
  0 siblings, 1 reply; 4+ messages in thread
From: Dmitry Antipov @ 2013-03-14  5:40 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: emacs-devel

On 03/12/2013 09:08 PM, Eli Zaretskii wrote:

> Therefore, bidi.c needs to determine the paragraph start each time it
> enters a new paragraph.  It needs to do that even if the buffer didn't
> change a bit, because a new paragraph can have a different base
> direction.  The bidi_it->new_paragraph flag is set when the iteration
> goes out of a paragraph or when the iterator is re-seated far away of
> its current position.  In those cases, xdisp.c calls
> bidi_paragraph_init, which in turn calls bidi_find_paragraph_start.
> You cannot return the beginning of previously found paragraph just
> because the buffer didn't change.

I do not understand this. I'm trying to cache not just the previously
find paragraph start, but the previously find paragraph start against
[bidi_it->charpos, bidi_it->bytepos] buffer position; so if bidi_it is
moved in any way, cache becomes invalid even if the buffer is not changed.

Dmitry





^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC] position caches
  2013-03-14  5:40   ` Dmitry Antipov
@ 2013-03-14 18:23     ` Eli Zaretskii
  0 siblings, 0 replies; 4+ messages in thread
From: Eli Zaretskii @ 2013-03-14 18:23 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: emacs-devel

> Date: Thu, 14 Mar 2013 09:40:17 +0400
> From: Dmitry Antipov <dmantipov@yandex.ru>
> CC: emacs-devel@gnu.org
> 
> On 03/12/2013 09:08 PM, Eli Zaretskii wrote:
> 
> > Therefore, bidi.c needs to determine the paragraph start each time it
> > enters a new paragraph.  It needs to do that even if the buffer didn't
> > change a bit, because a new paragraph can have a different base
> > direction.  The bidi_it->new_paragraph flag is set when the iteration
> > goes out of a paragraph or when the iterator is re-seated far away of
> > its current position.  In those cases, xdisp.c calls
> > bidi_paragraph_init, which in turn calls bidi_find_paragraph_start.
> > You cannot return the beginning of previously found paragraph just
> > because the buffer didn't change.
> 
> I do not understand this.

Sorry, this is entirely my fault: I failed to write something without
which the above indeed makes no sense.

> I'm trying to cache not just the previously
> find paragraph start, but the previously find paragraph start against
> [bidi_it->charpos, bidi_it->bytepos] buffer position; so if bidi_it is
> moved in any way, cache becomes invalid even if the buffer is not changed.

You are talking about the line marked below:

> -  while (pos_byte > BEGV_BYTE
> -	 && n++ < MAX_PARAGRAPH_SEARCH
> -	 && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
> +  if (valid_pos_cache (b, &b->bidi_paragraph_cache)
> +      && b->bidi_paragraph_cache.bufpos == pos_byte)  <<<<<<<<<<<<<<<<<
> +    return b->bidi_paragraph_cache.posval;
> +  else

Yes, this does invalidate the cache when the iterator moves, but it
also makes the cache almost entirely useless, because the iterator
moves all the time (that's why it's called "iterator").  It almost
never needs to find the paragraph start starting at the same buffer
position.  I intended to tell you that this condition will render the
caching much less useful than it could have been, but somehow forgot.

This code runs when the iterator is "re-seated" to a different
position in the buffer.  An example which is frequently met, and thus
might benefit from caching, is when the display code calls
move_it_vertically and/or move_it_vertically_backward, which start by
moving backwards one or more lines, and then start working from there.
If the line we end up on is still within the same paragraph, being
able to not search for paragraph start again is a big win.

Another relevant example is the implementation of vertical-motion.
Each time you press C-n or C-p, we call start_display, which forces
the move_it_* routines called after that to look for the beginning of
the current paragraph, starting at a line that is not the current one.
Again per-buffer cache of the current paragraph would help here a lot,
especially when lines in the buffer are very long.

IOW, if you invalidate the cache whenever the iterator changes its
position, keeping a separate cache for each buffer is redundant,
because the next time we will display the same buffer, we will almost
surely begin from a different buffer position.

So to be useful, the cache cannot be invalidated so frequently.  It
should only be invalidated when we exit the paragraph, i.e. find
ourselves before its beginning or after its end.



^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2013-03-14 18:23 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-03-12  7:56 [RFC] position caches Dmitry Antipov
2013-03-12 17:08 ` Eli Zaretskii
2013-03-14  5:40   ` Dmitry Antipov
2013-03-14 18:23     ` Eli Zaretskii

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).