From: Dmitry Antipov <dmantipov@yandex.ru>
To: Emacs development discussions <emacs-devel@gnu.org>
Cc: Eli Zaretskii <eliz@gnu.org>
Subject: [RFC] position caches
Date: Tue, 12 Mar 2013 11:56:26 +0400 [thread overview]
Message-ID: <513EDFAA.8030803@yandex.ru> (raw)
[-- Attachment #1: Type: text/plain, Size: 390 bytes --]
This was designed in attempt to avoid long buffer scans in bidi_find_paragraph_start.
The same stuff may be used to implement simple per-buffer charpos <-> bytepos cache.
Caching the result of bidi_find_paragraph_start may improve the speed of backward
scrolling/movement (I've seen ~6x speedup for 60M buffer with average string of 5K
characters). Comments are highly appreciated.
Dmitry
[-- Attachment #2: pos_cache.patch --]
[-- Type: text/plain, Size: 9997 bytes --]
=== modified file 'src/bidi.c'
--- src/bidi.c 2013-03-08 09:34:35 +0000
+++ src/bidi.c 2013-03-12 07:39:27 +0000
@@ -1097,24 +1097,33 @@
static ptrdiff_t
bidi_find_paragraph_start (ptrdiff_t pos, ptrdiff_t pos_byte)
{
- Lisp_Object re = paragraph_start_re;
- ptrdiff_t limit = ZV, limit_byte = ZV_BYTE;
- ptrdiff_t n = 0;
+ struct buffer *b = current_buffer;
- while (pos_byte > BEGV_BYTE
- && n++ < MAX_PARAGRAPH_SEARCH
- && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
+ if (valid_pos_cache (b, &b->bidi_paragraph_cache)
+ && b->bidi_paragraph_cache.bufpos == pos_byte)
+ return b->bidi_paragraph_cache.posval;
+ else
{
- /* FIXME: What if the paragraph beginning is covered by a
- display string? And what if a display string covering some
- of the text over which we scan back includes
- paragraph_start_re? */
- DEC_BOTH (pos, pos_byte);
- pos = find_newline_no_quit (pos, pos_byte, -1, &pos_byte);
+ ptrdiff_t n = 0, savedpos = pos_byte;
+
+ while (pos_byte > BEGV_BYTE
+ && n++ < MAX_PARAGRAPH_SEARCH
+ && fast_looking_at (paragraph_start_re, pos, pos_byte,
+ ZV, ZV_BYTE, Qnil) < 0)
+ {
+ /* FIXME: What if the paragraph beginning is covered by a
+ display string? And what if a display string covering some
+ of the text over which we scan back includes
+ paragraph_start_re? */
+ DEC_BOTH (pos, pos_byte);
+ pos = find_newline_no_quit (pos, pos_byte, -1, &pos_byte);
+ }
+ if (n >= MAX_PARAGRAPH_SEARCH)
+ pos_byte = BEGV_BYTE;
+
+ update_pos_cache (b, &b->bidi_paragraph_cache, savedpos, pos_byte);
+ return pos_byte;
}
- if (n >= MAX_PARAGRAPH_SEARCH)
- pos_byte = BEGV_BYTE;
- return pos_byte;
}
/* On a 3.4 GHz machine, searching forward for a strong directional
=== modified file 'src/buffer.c'
--- src/buffer.c 2013-01-19 20:04:33 +0000
+++ src/buffer.c 2013-03-12 07:39:37 +0000
@@ -931,6 +931,8 @@
{
bset_filename (b, Qnil);
bset_file_truename (b, Qnil);
+ init_pos_cache (&b->char_byte_cache);
+ init_pos_cache (&b->bidi_paragraph_cache);
bset_directory (b, current_buffer ? BVAR (current_buffer, directory) : Qnil);
b->modtime = make_emacs_time (0, UNKNOWN_MODTIME_NSECS);
b->modtime_size = -1;
@@ -1844,8 +1846,6 @@
insisted on circular lists) so allow quitting here. */
frames_discard_buffer (buffer);
- clear_charpos_cache (b);
-
tem = Vinhibit_quit;
Vinhibit_quit = Qt;
/* Remove the buffer from the list of all buffers. */
@@ -2352,6 +2352,8 @@
current_buffer->clip_changed = 1; other_buffer->clip_changed = 1;
swapfield (newline_cache, struct region_cache *);
swapfield (width_run_cache, struct region_cache *);
+ swapfield (char_byte_cache, struct pos_cache);
+ swapfield (bidi_paragraph_cache, struct pos_cache);
current_buffer->prevent_redisplay_optimizations_p = 1;
other_buffer->prevent_redisplay_optimizations_p = 1;
swapfield (overlays_before, struct Lisp_Overlay *);
@@ -2459,9 +2461,6 @@
instead. */
bset_undo_list (current_buffer, Qt);
- /* If the cached position is for this buffer, clear it out. */
- clear_charpos_cache (current_buffer);
-
if (NILP (flag))
begv = BEGV_BYTE, zv = ZV_BYTE;
else
=== modified file 'src/buffer.h'
--- src/buffer.h 2013-01-17 05:52:13 +0000
+++ src/buffer.h 2013-03-12 07:39:54 +0000
@@ -412,7 +412,18 @@
#define BUF_FETCH_BYTE(buf, n) \
*(BUF_BYTE_ADDRESS ((buf), (n)))
-\f
+
+/* Convenient method to cache the correspondence between two
+ buffer positions. Unlike markers, the cache is not updated
+ when the buffer text is changed but just invalidated. */
+
+struct pos_cache
+ {
+ EMACS_INT modiff;
+ ptrdiff_t bufpos;
+ ptrdiff_t posval;
+ };
+
/* Define the actual buffer data structures. */
/* This data structure describes the actual text contents of a buffer.
@@ -861,6 +872,14 @@
/* Position where the overlay lists are centered. */
ptrdiff_t overlay_center;
+ /* Used by buf_charpos_to_bytepos and buf_bytepos_to_charpos.
+ Also updated with INC_BOTH and DEC_BOTH macros. */
+ struct pos_cache char_byte_cache;
+
+ /* Used by bidi_find_paragraph_start in attempt to avoid
+ long buffer scans. */
+ struct pos_cache bidi_paragraph_cache;
+
/* Changes in the buffer are recorded here for undo, and t means
don't record anything. This information belongs to the base
buffer of an indirect buffer. But we can't store it in the
@@ -869,6 +888,30 @@
Lisp_Object INTERNAL_FIELD (undo_list);
};
+/* Simple position cache API. */
+
+BUFFER_INLINE void
+init_pos_cache (struct pos_cache *pc)
+{
+ pc->modiff = 1;
+ pc->bufpos = pc->posval = 0;
+}
+
+BUFFER_INLINE bool
+valid_pos_cache (struct buffer *b, struct pos_cache *pc)
+{
+ return BUF_MODIFF (b) == pc->modiff;
+}
+
+BUFFER_INLINE void
+update_pos_cache (struct buffer *b, struct pos_cache *pc,
+ ptrdiff_t pos, ptrdiff_t val)
+{
+ pc->modiff = BUF_MODIFF (b);
+ pc->bufpos = pos;
+ pc->posval = val;
+}
+
/* Most code should use these functions to set Lisp fields in struct
buffer. */
BUFFER_INLINE void
=== modified file 'src/character.h'
--- src/character.h 2012-09-26 20:00:29 +0000
+++ src/character.h 2013-03-12 07:40:09 +0000
@@ -501,10 +501,14 @@
do \
{ \
(charpos)++; \
- if (NILP (BVAR (current_buffer, enable_multibyte_characters))) \
+ if (NILP (BVAR (current_buffer, \
+ enable_multibyte_characters))) \
(bytepos)++; \
else \
INC_POS ((bytepos)); \
+ update_pos_cache (current_buffer, \
+ ¤t_buffer->char_byte_cache, \
+ charpos, bytepos); \
} \
while (0)
@@ -515,10 +519,14 @@
do \
{ \
(charpos)--; \
- if (NILP (BVAR (current_buffer, enable_multibyte_characters))) \
+ if (NILP (BVAR (current_buffer, \
+ enable_multibyte_characters))) \
(bytepos)--; \
else \
DEC_POS ((bytepos)); \
+ update_pos_cache (current_buffer, \
+ ¤t_buffer->char_byte_cache, \
+ charpos, bytepos); \
} \
while (0)
=== modified file 'src/lisp.h'
--- src/lisp.h 2013-03-11 04:02:06 +0000
+++ src/lisp.h 2013-03-12 07:14:22 +0000
@@ -3319,7 +3319,6 @@
extern ptrdiff_t marker_position (Lisp_Object);
extern ptrdiff_t marker_byte_position (Lisp_Object);
-extern void clear_charpos_cache (struct buffer *);
extern ptrdiff_t buf_charpos_to_bytepos (struct buffer *, ptrdiff_t);
extern ptrdiff_t buf_bytepos_to_charpos (struct buffer *, ptrdiff_t);
extern void unchain_marker (struct Lisp_Marker *marker);
=== modified file 'src/marker.c'
--- src/marker.c 2013-02-19 14:44:03 +0000
+++ src/marker.c 2013-03-12 07:40:43 +0000
@@ -24,14 +24,6 @@
#include "character.h"
#include "buffer.h"
-/* Record one cached position found recently by
- buf_charpos_to_bytepos or buf_bytepos_to_charpos. */
-
-static ptrdiff_t cached_charpos;
-static ptrdiff_t cached_bytepos;
-static struct buffer *cached_buffer;
-static EMACS_INT cached_modiff;
-
/* Juanma Barranquero <lekktu@gmail.com> reported ~3x increased
bootstrap time when byte_char_debug_check is enabled; so this
is never turned on by --enable-checking configure option. */
@@ -69,13 +61,6 @@
#endif /* MARKER_DEBUG */
-void
-clear_charpos_cache (struct buffer *b)
-{
- if (cached_buffer == b)
- cached_buffer = 0;
-}
-\f
/* Converting between character positions and byte positions. */
/* There are several places in the buffer where we know
@@ -164,8 +149,8 @@
CONSIDER (BUF_BEGV (b), BUF_BEGV_BYTE (b));
CONSIDER (BUF_ZV (b), BUF_ZV_BYTE (b));
- if (b == cached_buffer && BUF_MODIFF (b) == cached_modiff)
- CONSIDER (cached_charpos, cached_bytepos);
+ if (valid_pos_cache (b, &b->char_byte_cache))
+ CONSIDER (b->char_byte_cache.bufpos, b->char_byte_cache.posval);
for (tail = BUF_MARKERS (b); tail; tail = tail->next)
{
@@ -199,11 +184,7 @@
build_marker (b, best_below, best_below_byte);
byte_char_debug_check (b, best_below, best_below_byte);
-
- cached_buffer = b;
- cached_modiff = BUF_MODIFF (b);
- cached_charpos = best_below;
- cached_bytepos = best_below_byte;
+ update_pos_cache (b, &b->char_byte_cache, best_below, best_below_byte);
return best_below_byte;
}
@@ -224,11 +205,7 @@
build_marker (b, best_above, best_above_byte);
byte_char_debug_check (b, best_above, best_above_byte);
-
- cached_buffer = b;
- cached_modiff = BUF_MODIFF (b);
- cached_charpos = best_above;
- cached_bytepos = best_above_byte;
+ update_pos_cache (b, &b->char_byte_cache, best_above, best_above_byte);
return best_above_byte;
}
@@ -307,8 +284,8 @@
CONSIDER (BUF_BEGV_BYTE (b), BUF_BEGV (b));
CONSIDER (BUF_ZV_BYTE (b), BUF_ZV (b));
- if (b == cached_buffer && BUF_MODIFF (b) == cached_modiff)
- CONSIDER (cached_bytepos, cached_charpos);
+ if (valid_pos_cache (b, &b->char_byte_cache))
+ CONSIDER (b->char_byte_cache.posval, b->char_byte_cache.bufpos);
for (tail = BUF_MARKERS (b); tail; tail = tail->next)
{
@@ -344,11 +321,7 @@
build_marker (b, best_below, best_below_byte);
byte_char_debug_check (b, best_below, best_below_byte);
-
- cached_buffer = b;
- cached_modiff = BUF_MODIFF (b);
- cached_charpos = best_below;
- cached_bytepos = best_below_byte;
+ update_pos_cache (b, &b->char_byte_cache, best_below, best_below_byte);
return best_below;
}
@@ -371,11 +344,7 @@
build_marker (b, best_above, best_above_byte);
byte_char_debug_check (b, best_above, best_above_byte);
-
- cached_buffer = b;
- cached_modiff = BUF_MODIFF (b);
- cached_charpos = best_above;
- cached_bytepos = best_above_byte;
+ update_pos_cache (b, &b->char_byte_cache, best_above, best_above_byte);
return best_above;
}
next reply other threads:[~2013-03-12 7:56 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-03-12 7:56 Dmitry Antipov [this message]
2013-03-12 17:08 ` [RFC] position caches Eli Zaretskii
2013-03-14 5:40 ` Dmitry Antipov
2013-03-14 18:23 ` Eli Zaretskii
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://www.gnu.org/software/emacs/
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=513EDFAA.8030803@yandex.ru \
--to=dmantipov@yandex.ru \
--cc=eliz@gnu.org \
--cc=emacs-devel@gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://git.savannah.gnu.org/cgit/emacs.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).