From: Stefan Monnier <monnier@IRO.UMontreal.CA>
To: Eli Zaretskii <eliz@gnu.org>
Cc: emacs-devel@gnu.org
Subject: Re: State of the overlay tree branch?
Date: Fri, 23 Mar 2018 15:39:35 -0400 [thread overview]
Message-ID: <jwvzi2yio5f.fsf-monnier+emacs@gnu.org> (raw)
In-Reply-To: <837eq2j2i5.fsf@gnu.org> (Eli Zaretskii's message of "Fri, 23 Mar 2018 17:22:10 +0300")
OK, I think I'm tired of these experiments.
Here's my current test, along with the patches I use with it.
You can test it with something like
src/emacs -Q --batch -l ~/tmp/foo.el --eval '(setq internal--bytechar-distance-increment 50 internal--randomize-markers t)' --eval '(bytechar-test 3000 nil)'
Shuffling the markers can make a noticeable difference in some cases but
we're only talking about a factor of 2 or 3. It doesn't have much
negative impact, so it's not a bad option, but the algorithmic
problem remains anyway.
Stefan
(defun bytechar-test (buffer-kb &optional forward)
(random "seed")
(with-temp-buffer
(dotimes (i (* buffer-kb 33))
(insert "lksajflahalskjdféefawrgfrüegf\n"))
(message "buffer-size = %SkB" (/ (buffer-size) 1024))
(let ((txtbuf (current-buffer))
(goto-iterations (/ 10000000 buffer-kb))
(line-iterations (/ 20000 buffer-kb))
(markers ()))
(dotimes (s 4)
(with-temp-buffer
(insert-buffer-substring txtbuf)
(let ((stepsize (lsh 10 (* 4 s))))
(if forward
(cl-loop for n from (point-min) upto (point-max) by stepsize do
(push (copy-marker n) markers))
(cl-loop for n from (point-max) downto (point-min) by stepsize do
(push (copy-marker n) markers))))
;; The GC's internal--randomize-markers just brings-to-front every
;; 8th marker, so when starting with an ordered list of markers (like
;; in our case), we need to run the GC at least 8 times before the
;; whole list starts to look somewhat shuffled.
(dotimes (i 20) (garbage-collect))
(let ((timing
(benchmark-run goto-iterations
(goto-char (+ (point-min) (random (buffer-size)))))))
(message "ols=%S goto-random time=%.4f (+ %S)"
(/ (buffer-size) (lsh 10 (* 4 s)))
(car timing) (cdr timing)))
(garbage-collect) ;throw away the transient markers
(let ((timing
(benchmark-run line-iterations
(dotimes (i 5)
(line-number-at-pos
(+ (point-min) (* i (/ (buffer-size) 4))))))))
(message "nbmks=%S pos=*/4 time=%.4f (+ %S)"
(/ (buffer-size) (lsh 10 (* 4 s)))
(car timing) (cdr timing)))
(dotimes (i 5)
(let ((timing
(benchmark-run line-iterations
(line-number-at-pos
(+ (point-min) (* i (/ (buffer-size) 4)))))))
(message "nbmks=%S pos=%S/4 time=%.4f (+ %S)"
(/ (buffer-size) (lsh 10 (* 4 s))) i
(car timing) (cdr timing))))
)))))
diff --git a/lisp/emacs-lisp/benchmark.el b/lisp/emacs-lisp/benchmark.el
index b86b56b81e..2f4e38fe35 100644
--- a/lisp/emacs-lisp/benchmark.el
+++ b/lisp/emacs-lisp/benchmark.el
@@ -50,7 +50,7 @@ benchmark-run
garbage collections that ran, and the time taken by garbage collection.
See also `benchmark-run-compiled'."
(declare (indent 1) (debug t))
- (unless (natnump repetitions)
+ (unless (or (natnump repetitions) (symbolp repetitions))
(setq forms (cons repetitions forms)
repetitions 1))
(let ((i (make-symbol "i"))
@@ -58,7 +58,7 @@ benchmark-run
(gc (make-symbol "gc")))
`(let ((,gc gc-elapsed)
(,gcs gcs-done))
- (list ,(if (> repetitions 1)
+ (list ,(if (or (symbolp repetitions) (> repetitions 1))
;; Take account of the loop overhead.
`(- (benchmark-elapse (dotimes (,i ,repetitions)
,@forms))
@@ -101,7 +101,7 @@ benchmark
For non-interactive use see also `benchmark-run' and
`benchmark-run-compiled'."
(interactive "p\nxForm: ")
- (let ((result (eval `(benchmark-run ,repetitions ,form))))
+ (let ((result (eval `(benchmark-run ,repetitions ,form) t)))
(if (zerop (nth 1 result))
(message "Elapsed time: %fs" (car result))
(message "Elapsed time: %fs (%fs in %d GCs)" (car result)
diff --git a/src/alloc.c b/src/alloc.c
index 7ba872aaee..16d11e34cd 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -7270,10 +7270,22 @@ static void
unchain_dead_markers (struct buffer *buffer)
{
struct Lisp_Marker *this, **prev = &BUF_MARKERS (buffer);
+ /* In order to try and avoid worst case behaviors in buf_charpos_to_bytepos
+ we try and randomize the order of markers here. */
+ unsigned i = 4;
while ((this = *prev))
if (this->gcmarkbit)
- prev = &this->next;
+ {
+ if (!randomize_markers || i++ % 8)
+ prev = &this->next;
+ else
+ { /* Move this one to front, just to randomize things a bit. */
+ *prev = this->next;
+ this->next = BUF_MARKERS (buffer);
+ BUF_MARKERS (buffer) = this;
+ }
+ }
else
{
this->buffer = NULL;
@@ -7752,6 +7764,9 @@ The time is in seconds as a floating point value. */);
DEFVAR_INT ("gcs-done", gcs_done,
doc: /* Accumulated number of garbage collections done. */);
+ DEFVAR_BOOL ("internal--randomize-markers", randomize_markers, doc: /* */);
+ randomize_markers = true;
+
defsubr (&Scons);
defsubr (&Slist);
defsubr (&Svector);
diff --git a/src/marker.c b/src/marker.c
index 3d808fd6fa..7c1d164927 100644
--- a/src/marker.c
+++ b/src/marker.c
@@ -133,6 +133,28 @@ CHECK_MARKER (Lisp_Object x)
CHECK_TYPE (MARKERP (x), Qmarkerp, x);
}
+/* When converting bytes from/to chars, we look through the list of
+ markers to try and find a good starting point (since markers keep
+ track of both bytepos and charpos at the same time).
+ But if there are many markers, it can take too much time to find a "good"
+ marker from which to start. Worse yet: if it takes a long time and we end
+ up finding a nearby markers, we won't add a new marker to cache this
+ result, so next time around we'll have to go through this same long list
+ to (re)find this best marker. So the further down the list of
+ markers we go, the less demanding we are w.r.t what is a good marker.
+
+ The previous code used INITIAL=50 and INCREMENT=0 and this lead to
+ really poor performance when there are many markers.
+ I haven't tried to tweak INITIAL, but my experiments on my trusty Thinkpad
+ T61 using various artificial test cases seem to suggest that INCREMENT=50
+ might be "the best compromise": it significantly improved the
+ worst case and it was rarely slower and never by much.
+
+ The asymptotic behavior is still poor, tho, so in largish buffers with many
+ overlays (e.g. 300KB and 30K overlays), it can still be a bottlneck. */
+#define BYTECHAR_DISTANCE_INITIAL 50
+#define BYTECHAR_DISTANCE_INCREMENT bytechar_distance_increment
+
/* Return the byte position corresponding to CHARPOS in B. */
ptrdiff_t
@@ -141,7 +163,7 @@ buf_charpos_to_bytepos (struct buffer *b, ptrdiff_t charpos)
struct Lisp_Marker *tail;
ptrdiff_t best_above, best_above_byte;
ptrdiff_t best_below, best_below_byte;
- ptrdiff_t distance = 50;
+ ptrdiff_t distance = BYTECHAR_DISTANCE_INITIAL;
eassert (BUF_BEG (b) <= charpos && charpos <= BUF_Z (b));
@@ -184,7 +206,7 @@ buf_charpos_to_bytepos (struct buffer *b, ptrdiff_t charpos)
if (best_above - best_below < distance)
break;
else
- distance++;
+ distance += BYTECHAR_DISTANCE_INCREMENT;
}
/* We get here if we did not exactly hit one of the known places.
@@ -296,7 +318,7 @@ buf_bytepos_to_charpos (struct buffer *b, ptrdiff_t bytepos)
struct Lisp_Marker *tail;
ptrdiff_t best_above, best_above_byte;
ptrdiff_t best_below, best_below_byte;
- ptrdiff_t distance = 50;
+ ptrdiff_t distance = BYTECHAR_DISTANCE_INITIAL;
eassert (BUF_BEG_BYTE (b) <= bytepos && bytepos <= BUF_Z_BYTE (b));
@@ -330,7 +352,7 @@ buf_bytepos_to_charpos (struct buffer *b, ptrdiff_t bytepos)
if (best_above - best_below < distance)
break;
else
- distance++;
+ distance += BYTECHAR_DISTANCE_INCREMENT;
}
/* We get here if we did not exactly hit one of the known places.
@@ -756,4 +778,9 @@ syms_of_marker (void)
defsubr (&Scopy_marker);
defsubr (&Smarker_insertion_type);
defsubr (&Sset_marker_insertion_type);
+
+ DEFVAR_INT ("internal--bytechar-distance-increment",
+ bytechar_distance_increment,
+ doc: /* Haha */);
+ bytechar_distance_increment = 50;
}
next prev parent reply other threads:[~2018-03-23 19:39 UTC|newest]
Thread overview: 54+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-03-18 20:14 State of the overlay tree branch? Sebastian Sturm
2018-03-18 20:39 ` Eli Zaretskii
2018-03-18 21:04 ` Sebastian Sturm
2018-03-18 23:03 ` Sebastian Sturm
2018-03-18 23:20 ` Sebastian Sturm
2018-03-19 6:43 ` Eli Zaretskii
2018-03-19 9:53 ` Sebastian Sturm
2018-03-19 12:57 ` Eli Zaretskii
2018-03-19 14:56 ` Stefan Monnier
2018-03-19 15:07 ` Sebastian Sturm
2018-03-19 15:13 ` Stefan Monnier
2018-03-20 1:23 ` Sebastian Sturm
2018-03-20 6:30 ` Eli Zaretskii
2018-03-21 0:36 ` Sebastian Sturm
2018-03-21 6:47 ` Eli Zaretskii
2018-03-22 13:16 ` Stefan Monnier
2018-03-22 19:54 ` Sebastian Sturm
2018-03-22 20:04 ` Sebastian Sturm
2018-03-22 20:52 ` Stefan Monnier
2018-03-22 23:11 ` Sebastian Sturm
2018-03-23 5:03 ` Stefan Monnier
2018-03-23 12:25 ` Sebastian Sturm
2018-03-23 12:47 ` Eli Zaretskii
2018-03-23 13:19 ` Stefan Monnier
2018-03-23 13:37 ` Noam Postavsky
2018-03-23 13:55 ` Stefan Monnier
2018-03-23 14:22 ` Eli Zaretskii
2018-03-23 14:39 ` Stefan Monnier
2018-03-23 19:39 ` Stefan Monnier [this message]
2018-03-25 15:11 ` Stefan Monnier
2018-03-25 16:39 ` Eli Zaretskii
2018-03-25 17:35 ` Stefan Monnier
2018-03-23 8:07 ` Eli Zaretskii
2018-03-23 9:08 ` Eli Zaretskii
2018-03-23 10:15 ` Sebastian Sturm
2018-03-23 12:39 ` Eli Zaretskii
2018-03-23 12:12 ` Stefan Monnier
2018-03-23 12:40 ` Eli Zaretskii
2018-03-23 12:55 ` Stefan Monnier
2018-03-19 6:36 ` Eli Zaretskii
2018-03-19 6:28 ` Eli Zaretskii
2018-03-21 14:14 ` Sebastien Chapuis
2018-03-21 15:35 ` Eli Zaretskii
2018-03-26 13:06 ` Stefan Monnier
2018-03-27 20:59 ` Sebastian Sturm
[not found] <<c24f8534-5245-026e-da18-f6be7b9702bf@arkona-technologies.de>
[not found] ` <<834lldp18f.fsf@gnu.org>
2018-03-18 21:37 ` Drew Adams
2018-03-19 1:33 ` Stefan Monnier
2018-03-19 6:50 ` Eli Zaretskii
2018-03-19 12:29 ` Stefan Monnier
2018-03-19 13:02 ` Eli Zaretskii
2018-03-19 13:43 ` Stefan Monnier
2018-03-19 14:28 ` Eli Zaretskii
2018-03-19 14:39 ` Stefan Monnier
2018-03-19 6:33 ` Eli Zaretskii
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://www.gnu.org/software/emacs/
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=jwvzi2yio5f.fsf-monnier+emacs@gnu.org \
--to=monnier@iro.umontreal.ca \
--cc=eliz@gnu.org \
--cc=emacs-devel@gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://git.savannah.gnu.org/cgit/emacs.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).