all messages for Emacs-related lists mirrored at yhetil.org
 help / color / mirror / code / Atom feed
From: Stefan Monnier <monnier@IRO.UMontreal.CA>
To: Eli Zaretskii <eliz@gnu.org>
Cc: emacs-devel@gnu.org
Subject: Re: State of the overlay tree branch?
Date: Fri, 23 Mar 2018 15:39:35 -0400	[thread overview]
Message-ID: <jwvzi2yio5f.fsf-monnier+emacs@gnu.org> (raw)
In-Reply-To: <837eq2j2i5.fsf@gnu.org> (Eli Zaretskii's message of "Fri, 23 Mar 2018 17:22:10 +0300")

OK, I think I'm tired of these experiments.
Here's my current test, along with the patches I use with it.

You can test it with something like

    src/emacs -Q --batch -l ~/tmp/foo.el --eval '(setq internal--bytechar-distance-increment 50 internal--randomize-markers t)' --eval '(bytechar-test 3000 nil)' 

Shuffling the markers can make a noticeable difference in some cases but
we're only talking about a factor of 2 or 3.  It doesn't have much
negative impact, so it's not a bad option, but the algorithmic
problem remains anyway.


        Stefan


(defun bytechar-test (buffer-kb &optional forward)
  (random "seed")
  (with-temp-buffer
    (dotimes (i (* buffer-kb 33))
      (insert "lksajflahalskjdféefawrgfrüegf\n"))
    (message "buffer-size = %SkB" (/ (buffer-size) 1024))
    (let ((txtbuf (current-buffer))
          (goto-iterations (/ 10000000 buffer-kb))
          (line-iterations (/ 20000 buffer-kb))
          (markers ()))
      (dotimes (s 4)
        (with-temp-buffer
          (insert-buffer-substring txtbuf)
          (let ((stepsize (lsh 10 (* 4 s))))
            (if forward
                (cl-loop for n from (point-min) upto (point-max) by stepsize do
                         (push (copy-marker n) markers))
              (cl-loop for n from (point-max) downto (point-min) by stepsize do
                       (push (copy-marker n) markers))))
          ;; The GC's internal--randomize-markers just brings-to-front every
          ;; 8th marker, so when starting with an ordered list of markers (like
          ;; in our case), we need to run the GC at least 8 times before the
          ;; whole list starts to look somewhat shuffled.
          (dotimes (i 20) (garbage-collect))
          (let ((timing
                 (benchmark-run goto-iterations
                   (goto-char (+ (point-min) (random (buffer-size)))))))
            (message "ols=%S goto-random time=%.4f (+ %S)"
                     (/ (buffer-size) (lsh 10 (* 4 s)))
                     (car timing) (cdr timing)))
          (garbage-collect)             ;throw away the transient markers
          (let ((timing
                 (benchmark-run line-iterations
                   (dotimes (i 5)
                     (line-number-at-pos
                      (+ (point-min) (* i (/ (buffer-size) 4))))))))
            (message "nbmks=%S pos=*/4 time=%.4f (+ %S)"
                     (/ (buffer-size) (lsh 10 (* 4 s)))
                     (car timing) (cdr timing)))
          (dotimes (i 5)
            (let ((timing
                   (benchmark-run line-iterations
                     (line-number-at-pos
                      (+ (point-min) (* i (/ (buffer-size) 4)))))))
              (message "nbmks=%S pos=%S/4 time=%.4f (+ %S)"
                       (/ (buffer-size) (lsh 10 (* 4 s))) i
                       (car timing) (cdr timing))))
          )))))

diff --git a/lisp/emacs-lisp/benchmark.el b/lisp/emacs-lisp/benchmark.el
index b86b56b81e..2f4e38fe35 100644
--- a/lisp/emacs-lisp/benchmark.el
+++ b/lisp/emacs-lisp/benchmark.el
@@ -50,7 +50,7 @@ benchmark-run
 garbage collections that ran, and the time taken by garbage collection.
 See also `benchmark-run-compiled'."
   (declare (indent 1) (debug t))
-  (unless (natnump repetitions)
+  (unless (or (natnump repetitions) (symbolp repetitions))
     (setq forms (cons repetitions forms)
 	  repetitions 1))
   (let ((i (make-symbol "i"))
@@ -58,7 +58,7 @@ benchmark-run
 	(gc (make-symbol "gc")))
     `(let ((,gc gc-elapsed)
 	   (,gcs gcs-done))
-       (list ,(if (> repetitions 1)
+       (list ,(if (or (symbolp repetitions) (> repetitions 1))
 		  ;; Take account of the loop overhead.
 		  `(- (benchmark-elapse (dotimes (,i ,repetitions)
 					  ,@forms))
@@ -101,7 +101,7 @@ benchmark
 For non-interactive use see also `benchmark-run' and
 `benchmark-run-compiled'."
   (interactive "p\nxForm: ")
-  (let ((result (eval `(benchmark-run ,repetitions ,form))))
+  (let ((result (eval `(benchmark-run ,repetitions ,form) t)))
     (if (zerop (nth 1 result))
 	(message "Elapsed time: %fs" (car result))
       (message "Elapsed time: %fs (%fs in %d GCs)" (car result)
diff --git a/src/alloc.c b/src/alloc.c
index 7ba872aaee..16d11e34cd 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -7270,10 +7270,22 @@ static void
 unchain_dead_markers (struct buffer *buffer)
 {
   struct Lisp_Marker *this, **prev = &BUF_MARKERS (buffer);
+  /* In order to try and avoid worst case behaviors in buf_charpos_to_bytepos
+     we try and randomize the order of markers here.  */
+  unsigned i = 4;
 
   while ((this = *prev))
     if (this->gcmarkbit)
-      prev = &this->next;
+      {
+        if (!randomize_markers || i++ % 8)
+          prev = &this->next;
+        else
+          { /* Move this one to front, just to randomize things a bit.  */
+            *prev = this->next;
+            this->next = BUF_MARKERS (buffer);
+            BUF_MARKERS (buffer) = this;
+          }
+      }
     else
       {
         this->buffer = NULL;
@@ -7752,6 +7764,9 @@ The time is in seconds as a floating point value.  */);
   DEFVAR_INT ("gcs-done", gcs_done,
               doc: /* Accumulated number of garbage collections done.  */);
 
+  DEFVAR_BOOL ("internal--randomize-markers", randomize_markers, doc: /*  */);
+  randomize_markers = true;
+  
   defsubr (&Scons);
   defsubr (&Slist);
   defsubr (&Svector);
diff --git a/src/marker.c b/src/marker.c
index 3d808fd6fa..7c1d164927 100644
--- a/src/marker.c
+++ b/src/marker.c
@@ -133,6 +133,28 @@ CHECK_MARKER (Lisp_Object x)
   CHECK_TYPE (MARKERP (x), Qmarkerp, x);
 }
 
+/* When converting bytes from/to chars, we look through the list of
+   markers to try and find a good starting point (since markers keep
+   track of both bytepos and charpos at the same time).
+   But if there are many markers, it can take too much time to find a "good"
+   marker from which to start.  Worse yet: if it takes a long time and we end
+   up finding a nearby markers, we won't add a new marker to cache this
+   result, so next time around we'll have to go through this same long list
+   to (re)find this best marker.  So the further down the list of
+   markers we go, the less demanding we are w.r.t what is a good marker.
+
+   The previous code used INITIAL=50 and INCREMENT=0 and this lead to
+   really poor performance when there are many markers.
+   I haven't tried to tweak INITIAL, but my experiments on my trusty Thinkpad
+   T61 using various artificial test cases seem to suggest that INCREMENT=50
+   might be "the best compromise": it significantly improved the
+   worst case and it was rarely slower and never by much.
+
+   The asymptotic behavior is still poor, tho, so in largish buffers with many
+   overlays (e.g. 300KB and 30K overlays), it can still be a bottlneck.  */
+#define BYTECHAR_DISTANCE_INITIAL 50
+#define BYTECHAR_DISTANCE_INCREMENT bytechar_distance_increment
+
 /* Return the byte position corresponding to CHARPOS in B.  */
 
 ptrdiff_t
@@ -141,7 +163,7 @@ buf_charpos_to_bytepos (struct buffer *b, ptrdiff_t charpos)
   struct Lisp_Marker *tail;
   ptrdiff_t best_above, best_above_byte;
   ptrdiff_t best_below, best_below_byte;
-  ptrdiff_t distance = 50;
+  ptrdiff_t distance = BYTECHAR_DISTANCE_INITIAL;
 
   eassert (BUF_BEG (b) <= charpos && charpos <= BUF_Z (b));
 
@@ -184,7 +206,7 @@ buf_charpos_to_bytepos (struct buffer *b, ptrdiff_t charpos)
       if (best_above - best_below < distance)
 	break;
       else
-        distance++;
+        distance += BYTECHAR_DISTANCE_INCREMENT;
     }
 
   /* We get here if we did not exactly hit one of the known places.
@@ -296,7 +318,7 @@ buf_bytepos_to_charpos (struct buffer *b, ptrdiff_t bytepos)
   struct Lisp_Marker *tail;
   ptrdiff_t best_above, best_above_byte;
   ptrdiff_t best_below, best_below_byte;
-  ptrdiff_t distance = 50;
+  ptrdiff_t distance = BYTECHAR_DISTANCE_INITIAL;
 
   eassert (BUF_BEG_BYTE (b) <= bytepos && bytepos <= BUF_Z_BYTE (b));
 
@@ -330,7 +352,7 @@ buf_bytepos_to_charpos (struct buffer *b, ptrdiff_t bytepos)
       if (best_above - best_below < distance)
 	break;
       else
-        distance++;
+        distance += BYTECHAR_DISTANCE_INCREMENT;
     }
 
   /* We get here if we did not exactly hit one of the known places.
@@ -756,4 +778,9 @@ syms_of_marker (void)
   defsubr (&Scopy_marker);
   defsubr (&Smarker_insertion_type);
   defsubr (&Sset_marker_insertion_type);
+
+  DEFVAR_INT ("internal--bytechar-distance-increment",
+              bytechar_distance_increment,
+              doc: /* Haha */);
+  bytechar_distance_increment = 50;
 }



  parent reply	other threads:[~2018-03-23 19:39 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-18 20:14 State of the overlay tree branch? Sebastian Sturm
2018-03-18 20:39 ` Eli Zaretskii
2018-03-18 21:04   ` Sebastian Sturm
2018-03-18 23:03     ` Sebastian Sturm
2018-03-18 23:20       ` Sebastian Sturm
2018-03-19  6:43         ` Eli Zaretskii
2018-03-19  9:53           ` Sebastian Sturm
2018-03-19 12:57             ` Eli Zaretskii
2018-03-19 14:56             ` Stefan Monnier
2018-03-19 15:07               ` Sebastian Sturm
2018-03-19 15:13                 ` Stefan Monnier
2018-03-20  1:23                 ` Sebastian Sturm
2018-03-20  6:30                   ` Eli Zaretskii
2018-03-21  0:36                     ` Sebastian Sturm
2018-03-21  6:47                       ` Eli Zaretskii
2018-03-22 13:16                       ` Stefan Monnier
2018-03-22 19:54                         ` Sebastian Sturm
2018-03-22 20:04                           ` Sebastian Sturm
2018-03-22 20:52                   ` Stefan Monnier
2018-03-22 23:11                     ` Sebastian Sturm
2018-03-23  5:03                       ` Stefan Monnier
2018-03-23 12:25                         ` Sebastian Sturm
2018-03-23 12:47                           ` Eli Zaretskii
2018-03-23 13:19                             ` Stefan Monnier
2018-03-23 13:37                               ` Noam Postavsky
2018-03-23 13:55                                 ` Stefan Monnier
2018-03-23 14:22                               ` Eli Zaretskii
2018-03-23 14:39                                 ` Stefan Monnier
2018-03-23 19:39                                 ` Stefan Monnier [this message]
2018-03-25 15:11                                   ` Stefan Monnier
2018-03-25 16:39                                     ` Eli Zaretskii
2018-03-25 17:35                                       ` Stefan Monnier
2018-03-23  8:07                       ` Eli Zaretskii
2018-03-23  9:08                         ` Eli Zaretskii
2018-03-23 10:15                           ` Sebastian Sturm
2018-03-23 12:39                             ` Eli Zaretskii
2018-03-23 12:12                           ` Stefan Monnier
2018-03-23 12:40                             ` Eli Zaretskii
2018-03-23 12:55                               ` Stefan Monnier
2018-03-19  6:36       ` Eli Zaretskii
2018-03-19  6:28     ` Eli Zaretskii
2018-03-21 14:14   ` Sebastien Chapuis
2018-03-21 15:35     ` Eli Zaretskii
2018-03-26 13:06 ` Stefan Monnier
2018-03-27 20:59   ` Sebastian Sturm
     [not found] <<c24f8534-5245-026e-da18-f6be7b9702bf@arkona-technologies.de>
     [not found] ` <<834lldp18f.fsf@gnu.org>
2018-03-18 21:37   ` Drew Adams
2018-03-19  1:33     ` Stefan Monnier
2018-03-19  6:50       ` Eli Zaretskii
2018-03-19 12:29         ` Stefan Monnier
2018-03-19 13:02           ` Eli Zaretskii
2018-03-19 13:43             ` Stefan Monnier
2018-03-19 14:28               ` Eli Zaretskii
2018-03-19 14:39                 ` Stefan Monnier
2018-03-19  6:33     ` Eli Zaretskii

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=jwvzi2yio5f.fsf-monnier+emacs@gnu.org \
    --to=monnier@iro.umontreal.ca \
    --cc=eliz@gnu.org \
    --cc=emacs-devel@gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this external index

	https://git.savannah.gnu.org/cgit/emacs.git
	https://git.savannah.gnu.org/cgit/emacs/org-mode.git

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.