unofficial mirror of bug-gnu-emacs@gnu.org 
 help / color / mirror / code / Atom feed
* bug#54532: [PATCH] sorting
@ 2022-03-22 23:59 Andrew Cohen
  2022-03-23 12:02 ` Lars Ingebrigtsen
                   ` (4 more replies)
  0 siblings, 5 replies; 19+ messages in thread
From: Andrew Cohen @ 2022-03-22 23:59 UTC (permalink / raw)
  To: 54532

[-- Attachment #1: Type: text/plain, Size: 2395 bytes --]

Tags: patch

As discussed on emacs-devel I have been working on a replacement for the
current sorting algorithm in Emacs. With the help of Mattias Engdegård
we have made a lot of progress, and the attached patch implements
TIMSORT, the sorting algorithm introduced 20 years ago in python and now
used in Android and many other places. This implementation is pretty
much always 20% to 30% faster than the current version, and in many
cases is an order of magnitude faster. The current Emacs code treats
lists and vectors differently, while the new implementation uses a
common code path. Some benchmarks (times in microseconds; all lists are
length 10K):

|                                     | oldlist | oldvec |  tim |
| (make-random-list 10000)            |    2790 |   2123 | 1557 |
| (nreverse (make-sorted-list 10000)) |    1417 |    987 |  118 |
| (make-sorted-list 10000)            |    1310 |    899 |  116 |
| (make-swapped-list 10000 3)         |    1457 |   1019 |  122 |
| (make-plus-list 10000)              |    1309 |    899 |  119 |
| (make-onepercent-list 10000)        |    1764 |   1272 |  183 |
| (make-constant-list 10000)          |    1292 |    888 |  116 |
| (make-evil-list 10000)              |    1374 |    946 |  398 |
| (make-block-list 10000 100)         |    2235 |   1646 |  919 |
| (make-block-list 10000 10)          |    2598 |   1962 | 1451 |

The patch has 4 parts:

1. Add a new `record_unwind_protect_ptr_mark` function for use with C data
    structures that use the specpdl for clean-up but also contain possibly
    unique references to Lisp objects. This is needed for the dynamic
    memory management that the new algorithm uses.
2. The actual sorting change. This removes the old sorting routines and
    puts the new implementation in a separate file, sort.c
3. A bunch of new unit tests.
4. An optimization that resolves the sorting comparison symbol into the
    corresponding function before starting the sort.


In GNU Emacs 29.0.50 (build 5, x86_64-pc-linux-gnu, GTK+ Version 3.24.33, cairo version 1.16.0)
 of 2022-03-22 built on clove
Repository revision: c3c1ee56a44541e1eb2fd7e523f7508fd330d635
Repository branch: scratch/local
System Description: Debian GNU/Linux bookworm/sid

Configured using:
 'configure --with-x-toolkit=gtk3 --with-native-compilation --with-pgtk
 --with-xwidgets'


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: patch.diff --]
[-- Type: text/patch, Size: 43863 bytes --]

From daf46703ce83cc652667e89aa50161a36e9a8575 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mattias=20Engdeg=C3=A5rd?= <mattiase@acm.org>
Date: Sat, 5 Mar 2022 11:12:54 +0100
Subject: [PATCH 1/4] Add optional GC marking function to specpdl unwind_ptr
 record

Add a new `record_unwind_protect_ptr_mark` function for use with C data
structures that use the specpdl for clean-up but also contain possibly
unique references to Lisp objects.

* src/eval.c (record_unwind_protect_ptr_mark): New.
(record_unwind_protect_module, set_unwind_protect_ptr):
Set the mark function to NULL.
(mark_specpdl): Call the mark function if present.
* src/lisp.h (unwind_ptr): Add a mark function pointer to the
SPECPDL_UNWIND_PTR case.
---
 src/eval.c | 20 ++++++++++++++++++++
 src/lisp.h |  5 ++++-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/src/eval.c b/src/eval.c
index 294d79e67a..593cbaba98 100644
--- a/src/eval.c
+++ b/src/eval.c
@@ -3612,6 +3612,20 @@ record_unwind_protect_ptr (void (*function) (void *), void *arg)
   specpdl_ptr->unwind_ptr.kind = SPECPDL_UNWIND_PTR;
   specpdl_ptr->unwind_ptr.func = function;
   specpdl_ptr->unwind_ptr.arg = arg;
+  specpdl_ptr->unwind_ptr.mark = NULL;
+  grow_specpdl ();
+}
+
+/* Like `record_unwind_protect_ptr', but also specifies a function
+   for GC-marking Lisp objects only reachable through ARG.  */
+void
+record_unwind_protect_ptr_mark (void (*function) (void *), void *arg,
+				void (*mark) (void *))
+{
+  specpdl_ptr->unwind_ptr.kind = SPECPDL_UNWIND_PTR;
+  specpdl_ptr->unwind_ptr.func = function;
+  specpdl_ptr->unwind_ptr.arg = arg;
+  specpdl_ptr->unwind_ptr.mark = mark;
   grow_specpdl ();
 }
 
@@ -3655,6 +3669,7 @@ record_unwind_protect_module (enum specbind_tag kind, void *ptr)
   specpdl_ptr->kind = kind;
   specpdl_ptr->unwind_ptr.func = NULL;
   specpdl_ptr->unwind_ptr.arg = ptr;
+  specpdl_ptr->unwind_ptr.mark = NULL;
   grow_specpdl ();
 }
 
@@ -3783,6 +3798,7 @@ set_unwind_protect_ptr (specpdl_ref count, void (*func) (void *), void *arg)
   p->unwind_ptr.kind = SPECPDL_UNWIND_PTR;
   p->unwind_ptr.func = func;
   p->unwind_ptr.arg = arg;
+  p->unwind_ptr.mark = NULL;
 }
 
 /* Pop and execute entries from the unwind-protect stack until the
@@ -4216,6 +4232,10 @@ mark_specpdl (union specbinding *first, union specbinding *ptr)
 	  break;
 
 	case SPECPDL_UNWIND_PTR:
+	  if (pdl->unwind_ptr.mark)
+	    pdl->unwind_ptr.mark (pdl->unwind_ptr.arg);
+	  break;
+
 	case SPECPDL_UNWIND_INT:
 	case SPECPDL_UNWIND_INTMAX:
         case SPECPDL_UNWIND_VOID:
diff --git a/src/lisp.h b/src/lisp.h
index deeca9bc86..315fb03fe6 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -3313,8 +3313,9 @@ #define DEFVAR_KBOARD(lname, vname, doc)			\
     } unwind_array;
     struct {
       ENUM_BF (specbind_tag) kind : CHAR_BIT;
-      void (*func) (void *);
+      void (*func) (void *);	/* Unwind function.  */
       void *arg;
+      void (*mark) (void *);	/* GC mark function (if non-null).  */
     } unwind_ptr;
     struct {
       ENUM_BF (specbind_tag) kind : CHAR_BIT;
@@ -4440,6 +4441,8 @@ xsignal (Lisp_Object error_symbol, Lisp_Object data)
 extern void record_unwind_protect (void (*) (Lisp_Object), Lisp_Object);
 extern void record_unwind_protect_array (Lisp_Object *, ptrdiff_t);
 extern void record_unwind_protect_ptr (void (*) (void *), void *);
+extern void record_unwind_protect_ptr_mark (void (*function) (void *),
+					    void *arg, void (*mark) (void *));
 extern void record_unwind_protect_int (void (*) (int), int);
 extern void record_unwind_protect_intmax (void (*) (intmax_t), intmax_t);
 extern void record_unwind_protect_void (void (*) (void));
-- 
2.34.1.575.g55b058a8bb


From d31e7628dfeb111f201161f1363a909123c14a43 Mon Sep 17 00:00:00 2001
From: Andrew G Cohen <cohen@andy.bu.edu>
Date: Thu, 10 Mar 2022 09:30:00 +0800
Subject: [PATCH 2/4] Replace list and vector sorting with TIMSORT algorithm

* src/Makefile.in (base_obj): Add sort.o.
* src/deps.mk (fns.o): Add sort.c.
* src/lisp.h: Add prototypes for inorder, tim_sort.
* src/sort.c: New file providing tim_sort.
* src/fns.c:  Remove prototypes for removed routines.
(merge_vectors, sort_vector_inplace, sort_vector_copy): Remove.
(sort_list, sort_vector): Use tim_sort.
---
 src/Makefile.in |   2 +-
 src/deps.mk     |   2 +-
 src/fns.c       | 129 ++-----
 src/lisp.h      |   3 +
 src/sort.c      | 961 ++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 996 insertions(+), 101 deletions(-)
 create mode 100644 src/sort.c

diff --git a/src/Makefile.in b/src/Makefile.in
index 3353fb16d7..e0f18dc352 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -427,7 +427,7 @@ base_obj =
 	minibuf.o fileio.o dired.o \
 	cmds.o casetab.o casefiddle.o indent.o search.o regex-emacs.o undo.o \
 	alloc.o pdumper.o data.o doc.o editfns.o callint.o \
-	eval.o floatfns.o fns.o font.o print.o lread.o $(MODULES_OBJ) \
+	eval.o floatfns.o fns.o sort.o font.o print.o lread.o $(MODULES_OBJ) \
 	syntax.o $(UNEXEC_OBJ) bytecode.o comp.o $(DYNLIB_OBJ) \
 	process.o gnutls.o callproc.o \
 	region-cache.o sound.o timefns.o atimer.o \
diff --git a/src/deps.mk b/src/deps.mk
index deffab93ec..39edd5c1dd 100644
--- a/src/deps.mk
+++ b/src/deps.mk
@@ -279,7 +279,7 @@ eval.o:
    dispextern.h lisp.h globals.h $(config_h) coding.h composite.h xterm.h \
    msdos.h
 floatfns.o: floatfns.c syssignal.h lisp.h globals.h $(config_h)
-fns.o: fns.c commands.h lisp.h $(config_h) frame.h buffer.h character.h \
+fns.o: fns.c sort.c commands.h lisp.h $(config_h) frame.h buffer.h character.h \
    keyboard.h keymap.h window.h $(INTERVALS_H) coding.h ../lib/md5.h \
    ../lib/sha1.h ../lib/sha256.h ../lib/sha512.h blockinput.h atimer.h \
    systime.h xterm.h ../lib/unistd.h globals.h
diff --git a/src/fns.c b/src/fns.c
index 06a6456380..a064e02eac 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -39,9 +39,6 @@ Copyright (C) 1985-1987, 1993-1995, 1997-2022 Free Software Foundation,
 #include "puresize.h"
 #include "gnutls.h"
 
-static void sort_vector_copy (Lisp_Object pred, ptrdiff_t len,
-			      Lisp_Object src[restrict VLA_ELEMS (len)],
-			      Lisp_Object dest[restrict VLA_ELEMS (len)]);
 enum equal_kind { EQUAL_NO_QUIT, EQUAL_PLAIN, EQUAL_INCLUDING_PROPERTIES };
 static bool internal_equal (Lisp_Object, Lisp_Object,
 			    enum equal_kind, int, Lisp_Object);
@@ -2166,8 +2163,11 @@ DEFUN ("reverse", Freverse, Sreverse, 1, 1, 0,
   return new;
 }
 
-/* Sort LIST using PREDICATE, preserving original order of elements
-   considered as equal.  */
+
+/* Stably sort LIST using PREDICATE. This converts the list to a
+   vector, sorts the vector using the TIMSORT algorithm, and returns
+   the result converted back to a list.  The input list is
+   destructively reused to hold the sorted result.*/
 
 static Lisp_Object
 sort_list (Lisp_Object list, Lisp_Object predicate)
@@ -2175,112 +2175,43 @@ sort_list (Lisp_Object list, Lisp_Object predicate)
   ptrdiff_t length = list_length (list);
   if (length < 2)
     return list;
-
-  Lisp_Object tem = Fnthcdr (make_fixnum (length / 2 - 1), list);
-  Lisp_Object back = Fcdr (tem);
-  Fsetcdr (tem, Qnil);
-
-  return merge (Fsort (list, predicate), Fsort (back, predicate), predicate);
-}
-
-/* Using PRED to compare, return whether A and B are in order.
-   Compare stably when A appeared before B in the input.  */
-static bool
-inorder (Lisp_Object pred, Lisp_Object a, Lisp_Object b)
-{
-  return NILP (call2 (pred, b, a));
-}
-
-/* Using PRED to compare, merge from ALEN-length A and BLEN-length B
-   into DEST.  Argument arrays must be nonempty and must not overlap,
-   except that B might be the last part of DEST.  */
-static void
-merge_vectors (Lisp_Object pred,
-	       ptrdiff_t alen, Lisp_Object const a[restrict VLA_ELEMS (alen)],
-	       ptrdiff_t blen, Lisp_Object const b[VLA_ELEMS (blen)],
-	       Lisp_Object dest[VLA_ELEMS (alen + blen)])
-{
-  eassume (0 < alen && 0 < blen);
-  Lisp_Object const *alim = a + alen;
-  Lisp_Object const *blim = b + blen;
-
-  while (true)
+  else
     {
-      if (inorder (pred, a[0], b[0]))
+      Lisp_Object *result;
+      USE_SAFE_ALLOCA;
+      SAFE_ALLOCA_LISP (result, length);
+      Lisp_Object tail = list;
+      for (ptrdiff_t i = 0; i < length; i++)
 	{
-	  *dest++ = *a++;
-	  if (a == alim)
-	    {
-	      if (dest != b)
-		memcpy (dest, b, (blim - b) * sizeof *dest);
-	      return;
-	    }
+	  result[i] = Fcar (tail);
+	  tail = XCDR (tail);
 	}
-      else
+      tim_sort (predicate, result, length);
+
+      ptrdiff_t i = 0;
+      tail = list;
+      while (CONSP (tail))
 	{
-	  *dest++ = *b++;
-	  if (b == blim)
-	    {
-	      memcpy (dest, a, (alim - a) * sizeof *dest);
-	      return;
-	    }
+	  XSETCAR (tail, result[i]);
+	  tail = XCDR (tail);
+	  i++;
 	}
+      SAFE_FREE ();
+      return list;
     }
 }
 
-/* Using PRED to compare, sort LEN-length VEC in place, using TMP for
-   temporary storage.  LEN must be at least 2.  */
-static void
-sort_vector_inplace (Lisp_Object pred, ptrdiff_t len,
-		     Lisp_Object vec[restrict VLA_ELEMS (len)],
-		     Lisp_Object tmp[restrict VLA_ELEMS (len >> 1)])
-{
-  eassume (2 <= len);
-  ptrdiff_t halflen = len >> 1;
-  sort_vector_copy (pred, halflen, vec, tmp);
-  if (1 < len - halflen)
-    sort_vector_inplace (pred, len - halflen, vec + halflen, vec);
-  merge_vectors (pred, halflen, tmp, len - halflen, vec + halflen, vec);
-}
-
-/* Using PRED to compare, sort from LEN-length SRC into DST.
-   Len must be positive.  */
-static void
-sort_vector_copy (Lisp_Object pred, ptrdiff_t len,
-		  Lisp_Object src[restrict VLA_ELEMS (len)],
-		  Lisp_Object dest[restrict VLA_ELEMS (len)])
-{
-  eassume (0 < len);
-  ptrdiff_t halflen = len >> 1;
-  if (halflen < 1)
-    dest[0] = src[0];
-  else
-    {
-      if (1 < halflen)
-	sort_vector_inplace (pred, halflen, src, dest);
-      if (1 < len - halflen)
-	sort_vector_inplace (pred, len - halflen, src + halflen, dest);
-      merge_vectors (pred, halflen, src, len - halflen, src + halflen, dest);
-    }
-}
-
-/* Sort VECTOR in place using PREDICATE, preserving original order of
-   elements considered as equal.  */
+/* Stably sort VECTOR ordered by PREDICATE using the TIMSORT
+   algorithm.  */
 
 static void
 sort_vector (Lisp_Object vector, Lisp_Object predicate)
 {
-  ptrdiff_t len = ASIZE (vector);
-  if (len < 2)
+  ptrdiff_t length = ASIZE (vector);
+  if (length < 2)
     return;
-  ptrdiff_t halflen = len >> 1;
-  Lisp_Object *tmp;
-  USE_SAFE_ALLOCA;
-  SAFE_ALLOCA_LISP (tmp, halflen);
-  for (ptrdiff_t i = 0; i < halflen; i++)
-    tmp[i] = make_fixnum (0);
-  sort_vector_inplace (predicate, len, XVECTOR (vector)->contents, tmp);
-  SAFE_FREE ();
+
+  tim_sort (predicate, XVECTOR (vector)->contents, length);
 }
 
 DEFUN ("sort", Fsort, Ssort, 2, 2, 0,
@@ -2326,7 +2257,7 @@ merge (Lisp_Object org_l1, Lisp_Object org_l2, Lisp_Object pred)
 	}
 
       Lisp_Object tem;
-      if (inorder (pred, Fcar (l1), Fcar (l2)))
+      if (!NILP (call2 (pred, Fcar (l1), Fcar (l2))))
 	{
 	  tem = l1;
 	  l1 = Fcdr (l1);
diff --git a/src/lisp.h b/src/lisp.h
index 315fb03fe6..3801aeec3d 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -3903,6 +3903,9 @@ #define CONS_TO_INTEGER(cons, type, var)				\
 extern Lisp_Object string_make_unibyte (Lisp_Object);
 extern void syms_of_fns (void);
 
+/* Defined in sort.c  */
+extern void tim_sort (Lisp_Object, Lisp_Object *, const ptrdiff_t);
+
 /* Defined in floatfns.c.  */
 verify (FLT_RADIX == 2 || FLT_RADIX == 16);
 enum { LOG2_FLT_RADIX = FLT_RADIX == 2 ? 1 : 4 };
diff --git a/src/sort.c b/src/sort.c
new file mode 100644
index 0000000000..e7ccc1c052
--- /dev/null
+++ b/src/sort.c
@@ -0,0 +1,961 @@
+/* Timsort for sequences.
+
+Copyright (C) 2022 Free Software Foundation, Inc.
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or (at
+your option) any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* This is a version of the cpython code implementing the TIMSORT
+   sorting algorithm described in
+   https://github.com/python/cpython/blob/main/Objects/listsort.txt.
+   This algorithm identifies and pushes naturally ordered sublists of
+   the original list, or "runs", onto a stack, and merges them
+   periodically according to a merge strategy called "powersort".
+   State is maintained during the sort in a merge_state structure,
+   which is passed around as an argument to all the subroutines.  A
+   "stretch" structure includes a pointer to the run BASE of length
+   LEN along with its POWER (a computed integer used by the powersort
+   merge strategy that depends on this run and the succeeding run.)  */
+
+
+#include <config.h>
+#include "lisp.h"
+
+
+/* MAX_MERGE_PENDING is the maximum number of entries in merge_state's
+   pending-stretch stack.  For a list with n elements, this needs at most
+   floor(log2(n)) + 1 entries even if we didn't force runs to a
+   minimal length.  So the number of bits in a ptrdiff_t is plenty large
+   enough for all cases.  */
+
+#define MAX_MERGE_PENDING (sizeof (ptrdiff_t)  * 8)
+
+/* Once we get into galloping mode, we stay there as long as both runs
+   win at least GALLOP_WIN_MIN consecutive times.  */
+
+#define GALLOP_WIN_MIN 7
+
+/* A small temp array of size MERGESTATE_TEMP_SIZE is used to avoid
+   malloc when merging small lists.  */
+
+#define MERGESTATE_TEMP_SIZE 256
+
+struct stretch
+{
+  Lisp_Object *base;
+  ptrdiff_t len;
+  int power;
+};
+
+struct reloc
+{
+  Lisp_Object **src;
+  Lisp_Object **dst;
+  ptrdiff_t *size;
+  int order; /* -1 while in merge_lo; +1 while in merg_hi; 0 otherwise.  */
+};
+
+
+typedef struct
+{
+  Lisp_Object *listbase;
+  ptrdiff_t listlen;
+
+  /* PENDING is a stack of N pending stretches yet to be merged.
+     Stretch #i starts at address base[i] and extends for len[i]
+     elements.  */
+
+  int n;
+  struct stretch pending[MAX_MERGE_PENDING];
+
+  /* The variable MIN_GALLOP, initialized to GALLOP_WIN_MIN, controls
+     when we get *into* galloping mode.  merge_lo and merge_hi tend to
+     nudge it higher for random data, and lower for highly structured
+     data.  */
+
+  ptrdiff_t min_gallop;
+
+  /* 'A' is temporary storage, able to hold ALLOCED elements, to help
+     with merges.  'A' initially points to TEMPARRAY, and subsequently
+     to newly allocated memory if needed.  */
+
+  Lisp_Object *a;
+  ptrdiff_t alloced;
+  specpdl_ref count;
+  Lisp_Object temparray[MERGESTATE_TEMP_SIZE];
+
+  /* If an exception is thrown while merging we might have to relocate
+     some list elements from temporary storage back into the list.
+     RELOC keeps track of the information needed to do this.  */
+
+  struct reloc reloc;
+
+  /* PREDICATE is the lisp comparison predicate for the sort.  */
+
+  Lisp_Object predicate;
+} merge_state;
+
+
+/* INORDER returns true iff (PREDICATE A B) is non-nil.  */
+
+static inline bool
+inorder (const Lisp_Object predicate, const Lisp_Object a, const Lisp_Object b)
+{
+  return !NILP (call2 (predicate, a, b));
+}
+
+
+/* BINARYSORT() is a stable binary insertion sort used for sorting the
+   list starting at LO and ending at HI.  On entry, LO <= START <= HI,
+   and [LO, START) is already sorted (pass START == LO if you don't
+   know!).  Even in case of error, the output slice will be some
+   permutation of the input (nothing is lost or duplicated).  */
+
+static void
+binarysort (merge_state *ms, Lisp_Object *lo, const Lisp_Object *hi,
+	    Lisp_Object *start)
+{
+  Lisp_Object pred = ms->predicate;
+
+  eassume (lo <= start && start <= hi);
+  if (lo == start)
+    ++start;
+  for (; start < hi; ++start)
+    {
+      Lisp_Object *l = lo;
+      Lisp_Object *r = start;
+      Lisp_Object pivot = *r;
+
+      eassume (l < r);
+      do {
+	Lisp_Object *p = l + ((r - l) >> 1);
+	if (inorder (pred, pivot, *p))
+	  r = p;
+	else
+	  l = p + 1;
+      } while (l < r);
+      eassume (l == r);
+      for (Lisp_Object *p = start; p > l; --p)
+	p[0] = p[-1];
+      *l = pivot;
+    }
+}
+
+
+/*  COUNT_RUN() returns the length of the run beginning at LO, in the
+    slice [LO, HI) with LO < HI.  A "run" is the longest
+    non-decreasing sequence or the longest strictly decreasing
+    sequence, with the Boolean *DESCENDING set to 0 in the former
+    case, or to 1 in the latter.  The strictness of the definition of
+    "descending" is needed so that the caller can safely reverse a
+    descending sequence without violating stability (strict > ensures
+    there are no equal elements to get out of order).  */
+
+static ptrdiff_t
+count_run (merge_state *ms, Lisp_Object *lo, const Lisp_Object *hi, bool *descending)
+{
+  Lisp_Object pred = ms->predicate;
+
+  eassume (lo < hi);
+  *descending = 0;
+  ++lo;
+  ptrdiff_t n = 1;
+  if (lo == hi)
+    return n;
+
+  n = 2;
+  if (inorder (pred, lo[0], lo[-1]))
+    {
+      *descending = 1;
+      for (lo = lo + 1; lo < hi; ++lo, ++n)
+	{
+	  if (!inorder (pred, lo[0], lo[-1]))
+	    break;
+	}
+    }
+  else
+    {
+      for (lo = lo + 1; lo < hi; ++lo, ++n)
+	{
+	  if (inorder (pred, lo[0], lo[-1]))
+	    break;
+	}
+    }
+
+  return n;
+}
+
+
+/*  GALLOP_LEFT() locates the proper position of KEY in a sorted
+    vector: if the vector contains an element equal to KEY, return the
+    position immediately to the left of the leftmost equal element.
+    [GALLOP_RIGHT() does the same except returns the position to the
+    right of the rightmost equal element (if any).]
+
+    'A' is a sorted vector with N elements, starting at A[0].  N must be > 0.
+
+    HINT is an index at which to begin the search, 0 <= HINT < N.  The closer
+    HINT is to the final result, the faster this runs.
+
+    The return value is the int k in [0, N] such that
+
+    A[k-1] < KEY <= a[k]
+
+    pretending that *(A-1) is minus infinity and A[N] is plus infinity.  IOW,
+    KEY belongs at index k; or, IOW, the first k elements of A should precede
+    KEY, and the last N-k should follow KEY.  */
+
+static ptrdiff_t
+gallop_left (merge_state *ms, const Lisp_Object key, Lisp_Object *a,
+	     const ptrdiff_t n, const ptrdiff_t hint)
+{
+  Lisp_Object pred = ms->predicate;
+
+  eassume (a && n > 0 && hint >= 0 && hint < n);
+
+  a += hint;
+  ptrdiff_t lastofs = 0;
+  ptrdiff_t ofs = 1;
+  if (inorder (pred, *a, key))
+    {
+      /* When a[hint] < key, gallop right until
+	 a[hint + lastofs] < key <= a[hint + ofs].  */
+      const ptrdiff_t maxofs = n - hint; /* This is one after the end of a.  */
+      while (ofs < maxofs)
+	{
+	  if (inorder (pred, a[ofs], key))
+	    {
+	      lastofs = ofs;
+	      eassume (ofs <= (PTRDIFF_MAX - 1) / 2);
+	      ofs = (ofs << 1) + 1;
+	    }
+	  else
+	    break; /* Here key <= a[hint+ofs].  */
+	}
+      if (ofs > maxofs)
+	ofs = maxofs;
+      /* Translate back to offsets relative to &a[0].  */
+      lastofs += hint;
+      ofs += hint;
+    }
+  else
+    {
+      /* When key <= a[hint], gallop left, until
+	 a[hint - ofs] < key <= a[hint - lastofs].  */
+      const ptrdiff_t maxofs = hint + 1;             /* Here &a[0] is lowest.  */
+      while (ofs < maxofs)
+	{
+	  if (inorder (pred, a[-ofs], key))
+	    break;
+	  /* Here key <= a[hint - ofs].  */
+	  lastofs = ofs;
+	  eassume (ofs <= (PTRDIFF_MAX - 1) / 2);
+	  ofs = (ofs << 1) + 1;
+	}
+      if (ofs > maxofs)
+	ofs = maxofs;
+      /* Translate back to use positive offsets relative to &a[0].  */
+      ptrdiff_t k = lastofs;
+      lastofs = hint - ofs;
+      ofs = hint - k;
+    }
+  a -= hint;
+
+  eassume (-1 <= lastofs && lastofs < ofs && ofs <= n);
+  /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the
+     right of lastofs but no farther right than ofs.  Do a binary
+     search, with invariant a[lastofs-1] < key <= a[ofs].  */
+  ++lastofs;
+  while (lastofs < ofs)
+    {
+      ptrdiff_t m = lastofs + ((ofs - lastofs) >> 1);
+
+      if (inorder (pred, a[m], key))
+	lastofs = m + 1;              /* Here a[m] < key.  */
+      else
+	ofs = m;                    /* Here key <= a[m].  */
+    }
+  eassume (lastofs == ofs);             /* Then a[ofs-1] < key <= a[ofs].  */
+  return ofs;
+}
+
+
+/*  GALLOP_RIGHT() is exactly like GALLOP_LEFT(), except that if KEY
+    already exists in A[0:N], it finds the position immediately to the
+    right of the rightmost equal value.
+
+    The return value is the int k in [0, N] such that
+
+    A[k-1] <= KEY < A[k].  */
+
+static ptrdiff_t
+gallop_right (merge_state *ms, const Lisp_Object key, Lisp_Object *a,
+	      const ptrdiff_t n, const ptrdiff_t hint)
+{
+  Lisp_Object pred = ms->predicate;
+
+  eassume (a && n > 0 && hint >= 0 && hint < n);
+
+  a += hint;
+  ptrdiff_t lastofs = 0;
+  ptrdiff_t ofs = 1;
+  if (inorder (pred, key, *a))
+    {
+      /* When key < a[hint], gallop left until
+	 a[hint - ofs] <= key < a[hint - lastofs].  */
+      const ptrdiff_t maxofs = hint + 1;             /* Here &a[0] is lowest.  */
+      while (ofs < maxofs)
+	{
+	  if (inorder (pred, key, a[-ofs]))
+	    {
+	      lastofs = ofs;
+	      eassume (ofs <= (PTRDIFF_MAX - 1) / 2);
+	      ofs = (ofs << 1) + 1;
+	    }
+	  else                /* Here a[hint - ofs] <= key.  */
+	    break;
+	}
+      if (ofs > maxofs)
+	ofs = maxofs;
+      /* Translate back to use positive offsets relative to &a[0].  */
+      ptrdiff_t k = lastofs;
+      lastofs = hint - ofs;
+      ofs = hint - k;
+    }
+  else
+    {
+      /* When a[hint] <= key, gallop right, until
+	 a[hint + lastofs] <= key < a[hint + ofs].  */
+      const ptrdiff_t maxofs = n - hint;             /* Here &a[n-1] is highest.  */
+      while (ofs < maxofs)
+	{
+	  if (inorder (pred, key, a[ofs]))
+	    break;
+	  /* Here a[hint + ofs] <= key.  */
+	  lastofs = ofs;
+	  eassume (ofs <= (PTRDIFF_MAX - 1) / 2);
+	  ofs = (ofs << 1) + 1;
+	}
+      if (ofs > maxofs)
+	ofs = maxofs;
+      /* Translate back to use offsets relative to &a[0].  */
+      lastofs += hint;
+      ofs += hint;
+    }
+  a -= hint;
+
+  eassume (-1 <= lastofs && lastofs < ofs && ofs <= n);
+  /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the
+     right of lastofs but no farther right than ofs.  Do a binary
+     search, with invariant a[lastofs-1] <= key < a[ofs].  */
+  ++lastofs;
+  while (lastofs < ofs)
+    {
+      ptrdiff_t m = lastofs + ((ofs - lastofs) >> 1);
+
+      if (inorder (pred, key, a[m]))
+	ofs = m;                    /* Here key < a[m].  */
+      else
+	lastofs = m + 1;              /* Here a[m] <= key.  */
+    }
+  eassume (lastofs == ofs);             /* Now  a[ofs-1] <= key < a[ofs].  */
+  return ofs;
+}
+
+
+static void
+merge_init (merge_state *ms, const ptrdiff_t list_size, Lisp_Object *lo,
+	    const Lisp_Object predicate)
+{
+  eassume (ms != NULL);
+
+  ms->a = ms->temparray;
+  ms->alloced = MERGESTATE_TEMP_SIZE;
+
+  ms->n = 0;
+  ms->min_gallop = GALLOP_WIN_MIN;
+  ms->listlen = list_size;
+  ms->listbase = lo;
+  ms->predicate = predicate;
+  ms->reloc = (struct reloc){NULL, NULL, NULL, 0};
+}
+
+
+/* The dynamically allocated memory may hold lisp objects during
+   merging.  MERGE_MARKMEM marks them so they aren't reaped during
+   GC.  */
+
+static void
+merge_markmem (void *arg)
+{
+  merge_state *ms = arg;
+  eassume (ms != NULL);
+
+  if (ms->reloc.size != NULL && *ms->reloc.size > 0)
+    {
+      eassume (ms->reloc.src != NULL);
+      mark_objects (*ms->reloc.src, *ms->reloc.size);
+    }
+}
+
+
+/* CLEANUP_MEM frees all temp storage.  If an exception occurs while
+   merging it will first relocate any lisp elements in temp storage
+   back to the original array.  */
+
+static void
+cleanup_mem (void *arg)
+{
+  merge_state *ms = arg;
+  eassume (ms != NULL);
+
+  /* If we have an exception while merging, some of the list elements
+     might only live in temp storage; we copy everything remaining in
+     the temp storage back into the original list.  This ensures that
+     the original list has all of the original elements, although
+     their order is unpredictable.  */
+
+  if (ms->reloc.order != 0 && *ms->reloc.size > 0)
+    {
+      eassume (*ms->reloc.src != NULL && *ms->reloc.dst != NULL);
+      ptrdiff_t n = *ms->reloc.size;
+      ptrdiff_t shift = ms->reloc.order == -1 ? 0 : n - 1;
+      memcpy (*ms->reloc.dst - shift, *ms->reloc.src, n * word_size);
+    }
+
+  /* Free any remaining temp storage.  */
+  xfree (ms->a);
+}
+
+
+/* MERGE_GETMEM() ensures availability of enough temp memory for NEED
+   array slots.  Any previously allocated memory is first freed, and a
+   cleanup routine is registered to free memory at the very end, or on
+   exception.  */
+
+static void
+merge_getmem (merge_state *ms, const ptrdiff_t need)
+{
+  eassume (ms != NULL);
+
+  if (ms->a == ms->temparray)
+    {
+      /* We only get here if alloc is needed and this is the first
+	 time, so we set up the unwind.  */
+      specpdl_ref count = SPECPDL_INDEX ();
+      record_unwind_protect_ptr_mark (cleanup_mem, ms, merge_markmem);
+      ms->count = count;
+    }
+  else
+    {
+      /* We have previously alloced storage.  Since we don't care
+         what's in the block we don't use realloc which would waste
+         cycles copying the old data.  We just free and alloc
+         again.  */
+      xfree (ms->a);
+    }
+  ms->a = xmalloc (need * word_size);
+  ms->alloced = need;
+}
+
+
+static inline void
+needmem (merge_state *ms, ptrdiff_t na)
+{
+  if (na > ms->alloced)
+    merge_getmem (ms, na);
+}
+
+
+/* MERGE_LO() stably merges the NA elements starting at SSA with the
+   NB elements starting at SSB = SSA + NA, in-place.  NA and NB must
+   be positive.  We also require that SSA[NA-1] belongs at the end of
+   the merge, and should have NA <= NB.  */
+
+static void
+merge_lo (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na, Lisp_Object *ssb,
+	  ptrdiff_t nb)
+{
+  Lisp_Object pred = ms->predicate;
+
+  eassume (ms && ssa && ssb && na > 0 && nb > 0);
+  eassume (ssa + na == ssb);
+  needmem (ms, na);
+  memcpy (ms->a, ssa, na * word_size);
+  Lisp_Object *dest = ssa;
+  ssa = ms->a;
+
+  ms->reloc = (struct reloc){&ssa, &dest, &na, -1};
+
+  *dest++ = *ssb++;
+  --nb;
+  if (nb == 0)
+    goto Succeed;
+  if (na == 1)
+    goto CopyB;
+
+  ptrdiff_t min_gallop = ms->min_gallop;
+  for (;;)
+    {
+      ptrdiff_t acount = 0;  /* This holds the # of consecutive times A won.  */
+
+      ptrdiff_t bcount = 0;  /* This holds the # of consecutive times B won.  */
+
+      for (;;)
+	{
+	  eassume (na > 1 && nb > 0);
+	  if (inorder (pred, *ssb, *ssa))
+	    {
+	      *dest++ = *ssb++ ;
+	      ++bcount;
+	      acount = 0;
+	      --nb;
+	      if (nb == 0)
+		goto Succeed;
+	      if (bcount >= min_gallop)
+		break;
+	    }
+	  else
+	    {
+	      *dest++ = *ssa++;
+	      ++acount;
+	      bcount = 0;
+	      --na;
+	      if (na == 1)
+		goto CopyB;
+	      if (acount >= min_gallop)
+		break;
+	    }
+	}
+
+      /* One run is winning so consistently that galloping may be a huge
+	 win.  We try that, and continue galloping until (if ever)
+	 neither run appears to be winning consistently anymore.  */
+      ++min_gallop;
+      do {
+	eassume (na > 1 && nb > 0);
+	min_gallop -= min_gallop > 1;
+	ms->min_gallop = min_gallop;
+	ptrdiff_t k = gallop_right (ms, ssb[0], ssa, na, 0);
+	acount = k;
+	if (k)
+	  {
+	    memcpy (dest, ssa, k * word_size);
+	    dest += k;
+	    ssa += k;
+	    na -= k;
+	    if (na == 1)
+	      goto CopyB;
+	    /* While na==0 is impossible now if the comparison function is
+	       consistent, we shouldn't assume that it is.  */
+	    if (na == 0)
+	      goto Succeed;
+	  }
+	*dest++ = *ssb++ ;
+	--nb;
+	if (nb == 0)
+	  goto Succeed;
+
+	k = gallop_left (ms, ssa[0], ssb, nb, 0);
+	bcount = k;
+	if (k)
+	  {
+	    memmove (dest, ssb, k * word_size);
+	    dest += k;
+	    ssb += k;
+	    nb -= k;
+	    if (nb == 0)
+	      goto Succeed;
+	  }
+	*dest++ = *ssa++;
+	--na;
+	if (na == 1)
+	  goto CopyB;
+      } while (acount >= GALLOP_WIN_MIN || bcount >= GALLOP_WIN_MIN);
+      ++min_gallop;   /* Apply a penalty for leaving galloping mode.  */
+      ms->min_gallop = min_gallop;
+    }
+ Succeed:
+  ms->reloc = (struct reloc){NULL, NULL, NULL, 0};
+
+  if (na)
+    memcpy (dest, ssa, na * word_size);
+  return;
+ CopyB:
+  eassume (na == 1 && nb > 0);
+  ms->reloc = (struct reloc){NULL, NULL, NULL, 0};
+
+  /* The last element of ssa belongs at the end of the merge.  */
+  memmove (dest, ssb, nb * word_size);
+  dest[nb] = ssa[0];
+}
+
+
+/* MERGE_HI() stably merges the NA elements starting at SSA with the
+   NB elements starting at SSB = SSA + NA, in-place.  NA and NB must
+   be positive.  We also require that SSA[NA-1] belongs at the end of
+   the merge, and should have NA >= NB.  */
+
+static void
+merge_hi (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na,
+	  Lisp_Object *ssb, ptrdiff_t nb)
+{
+  Lisp_Object pred = ms->predicate;
+
+  eassume (ms && ssa && ssb && na > 0 && nb > 0);
+  eassume (ssa + na == ssb);
+  needmem (ms, nb);
+  Lisp_Object *dest = ssb;
+  dest += nb - 1;
+  memcpy(ms->a, ssb, nb * word_size);
+  Lisp_Object *basea = ssa;
+  Lisp_Object *baseb = ms->a;
+  ssb = ms->a + nb - 1;
+  ssa += na - 1;
+
+  ms->reloc = (struct reloc){&baseb, &dest, &nb, 1};
+
+  *dest-- = *ssa--;
+  --na;
+  if (na == 0)
+    goto Succeed;
+  if (nb == 1)
+    goto CopyA;
+
+  ptrdiff_t min_gallop = ms->min_gallop;
+  for (;;) {
+    ptrdiff_t acount = 0;   /* This holds the # of consecutive times A won.  */
+    ptrdiff_t bcount = 0;   /* This holds the # of consecutive times B won.  */
+
+    for (;;) {
+      eassume (na > 0 && nb > 1);
+      if (inorder (pred, *ssb, *ssa))
+	{
+	  *dest-- = *ssa--;
+	  ++acount;
+	  bcount = 0;
+	  --na;
+	  if (na == 0)
+	    goto Succeed;
+	  if (acount >= min_gallop)
+	    break;
+	}
+      else
+	{
+	  *dest-- = *ssb--;
+	  ++bcount;
+	  acount = 0;
+	  --nb;
+	  if (nb == 1)
+	    goto CopyA;
+	  if (bcount >= min_gallop)
+	    break;
+	}
+    }
+
+    /* One run is winning so consistently that galloping may be a huge
+       win.  Try that, and continue galloping until (if ever) neither
+       run appears to be winning consistently anymore.  */
+    ++min_gallop;
+    do {
+      eassume (na > 0 && nb > 1);
+      min_gallop -= min_gallop > 1;
+      ms->min_gallop = min_gallop;
+      ptrdiff_t k = gallop_right (ms, ssb[0], basea, na, na - 1);
+      k = na - k;
+      acount = k;
+      if (k)
+	{
+	  dest += -k;
+	  ssa += -k;
+	  memmove(dest + 1, ssa + 1, k * word_size);
+	  na -= k;
+	  if (na == 0)
+	    goto Succeed;
+	}
+      *dest-- = *ssb--;
+      --nb;
+      if (nb == 1)
+	goto CopyA;
+
+      k = gallop_left (ms, ssa[0], baseb, nb, nb - 1);
+      k = nb - k;
+      bcount = k;
+      if (k)
+	{
+	  dest += -k;
+	  ssb += -k;
+	  memcpy(dest + 1, ssb + 1, k * word_size);
+	  nb -= k;
+	  if (nb == 1)
+	    goto CopyA;
+	  /* While nb==0 is impossible now if the comparison function
+	     is consistent, we shouldn't assume that it is.  */
+	  if (nb == 0)
+	    goto Succeed;
+	}
+      *dest-- = *ssa--;
+      --na;
+      if (na == 0)
+	goto Succeed;
+    } while (acount >= GALLOP_WIN_MIN || bcount >= GALLOP_WIN_MIN);
+    ++min_gallop;      /* Apply a penalty for leaving galloping mode.  */
+    ms->min_gallop = min_gallop;
+  }
+ Succeed:
+  ms->reloc = (struct reloc){NULL, NULL, NULL, 0};
+  if (nb)
+    memcpy (dest - nb + 1, baseb, nb * word_size);
+  return;
+ CopyA:
+  eassume (nb == 1 && na > 0);
+  ms->reloc = (struct reloc){NULL, NULL, NULL, 0};
+  /* The first element of ssb belongs at the front of the merge.  */
+  memmove (dest + 1 - na, ssa + 1 - na, na * word_size);
+  dest += -na;
+  ssa += -na;
+  dest[0] = ssb[0];
+}
+
+
+/* MERGE_AT() merges the two runs at stack indices I and I+1.  */
+
+static void
+merge_at (merge_state *ms, const ptrdiff_t i)
+{
+  eassume (ms != NULL);
+  eassume (ms->n >= 2);
+  eassume (i >= 0);
+  eassume (i == ms->n - 2 || i == ms->n - 3);
+
+  Lisp_Object *ssa = ms->pending[i].base;
+  ptrdiff_t na = ms->pending[i].len;
+  Lisp_Object *ssb = ms->pending[i + 1].base;
+  ptrdiff_t nb = ms->pending[i + 1].len;
+  eassume (na > 0 && nb > 0);
+  eassume (ssa + na == ssb);
+
+  /* Record the length of the combined runs; if i is the 3rd-last run
+     now, also slide over the last run (which isn't involved in this
+     merge).  The current run i+1 goes away in any case.  */
+  ms->pending[i].len = na + nb;
+  if (i == ms->n - 3)
+    ms->pending[i + 1] = ms->pending[i + 2];
+  --ms->n;
+
+  /* Where does b start in a?  Elements in a before that can be
+     ignored (they are already in place).  */
+  ptrdiff_t k = gallop_right (ms, *ssb, ssa, na, 0);
+  eassume (k >= 0);
+  ssa += k;
+  na -= k;
+  if (na == 0)
+    return;
+
+  /* Where does a end in b?  Elements in b after that can be ignored
+     (they are already in place).  */
+  nb = gallop_left (ms, ssa[na - 1], ssb, nb, nb - 1);
+  if (nb == 0)
+    return;
+  eassume (nb > 0);
+  /* Merge what remains of the runs using a temp array with size
+     min(na, nb) elements.  */
+  if (na <= nb)
+    merge_lo (ms, ssa, na, ssb, nb);
+  else
+    merge_hi (ms, ssa, na, ssb, nb);
+}
+
+
+/* POWERLOOP() computes the "power" of the first of two adjacent runs
+   begining at index S1, with the first having length N1 and the
+   second (starting at index S1+N1) having length N2.  The list has
+   total length N.  */
+
+static int
+powerloop (const ptrdiff_t s1, const ptrdiff_t n1, const ptrdiff_t n2,
+	   const ptrdiff_t n)
+{
+  eassume (s1 >= 0);
+  eassume (n1 > 0 && n2 > 0);
+  eassume (s1 + n1 + n2 <= n);
+  /* The midpoints a and b are
+     a = s1 + n1/2
+     b = s1 + n1 + n2/2 = a + (n1 + n2)/2
+
+     These may not be integers because of the "/2", so we work with
+     2*a and 2*b instead.  It makes no difference to the outcome,
+     since the bits in the expansion of (2*i)/n are merely shifted one
+     position from those of i/n.  */
+  ptrdiff_t a = 2 * s1 + n1;
+  ptrdiff_t b = a + n1 + n2;
+  int result = 0;
+  /* Emulate a/n and b/n one bit a time, until their bits differ.  */
+  for (;;)
+    {
+      ++result;
+      if (a >= n)
+	{  /* Both quotient bits are now 1.  */
+	  eassume (b >= a);
+	  a -= n;
+	  b -= n;
+	}
+      else if (b >= n)
+	{  /* a/n bit is 0 and b/n bit is 1.  */
+	  break;
+	} /* Otherwise both quotient bits are 0.  */
+      eassume (a < b && b < n);
+      a <<= 1;
+      b <<= 1;
+    }
+  return result;
+}
+
+
+/* FOUND_NEW_RUN() updates the state when a run of length N2 has been
+   identified.  If there's already a stretch on the stack, apply the
+   "powersort" merge strategy: compute the topmost stretch's "power"
+   (depth in a conceptual binary merge tree) and merge adjacent runs
+   on the stack with greater power.  */
+
+static void
+found_new_run (merge_state *ms, const ptrdiff_t n2)
+{
+  eassume (ms != NULL);
+  if (ms->n)
+    {
+      eassume (ms->n > 0);
+      struct stretch *p = ms->pending;
+      ptrdiff_t s1 = p[ms->n - 1].base - ms->listbase;
+      ptrdiff_t n1 = p[ms->n - 1].len;
+      int power = powerloop (s1, n1, n2, ms->listlen);
+      while (ms->n > 1 && p[ms->n - 2].power > power)
+	{
+	  merge_at (ms, ms->n - 2);
+	}
+      eassume (ms->n < 2 || p[ms->n - 2].power < power);
+      p[ms->n - 1].power = power;
+    }
+}
+
+
+/* MERGE_FORCE_COLLAPSE() unconditionally merges all stretches on the
+   stack until only one remains, and returns 0 on success.  This is
+   used at the end of the mergesort.  */
+
+static void
+merge_force_collapse (merge_state *ms)
+{
+  struct stretch *p = ms->pending;
+
+  eassume (ms != NULL);
+  while (ms->n > 1)
+    {
+      ptrdiff_t n = ms->n - 2;
+      if (n > 0 && p[n - 1].len < p[n + 1].len)
+	--n;
+      merge_at (ms, n);
+    }
+}
+
+
+/* MERGE_COMPUTE_MINRUN() computes a good value for the minimum run
+   length; natural runs shorter than this are boosted artificially via
+   binary insertion.
+
+   If N < 64, return N (it's too small to bother with fancy stuff).
+   Otherwise if N is an exact power of 2, return 32.  Finally, return
+   an int k, 32 <= k <= 64, such that N/k is close to, but strictly
+   less than, an exact power of 2.  */
+
+static ptrdiff_t
+merge_compute_minrun (ptrdiff_t n)
+{
+  ptrdiff_t r = 0;           /* r will become 1 if any non-zero bits are
+				shifted off.  */
+
+  eassume (n >= 0);
+  while (n >= 64)
+    {
+      r |= n & 1;
+      n >>= 1;
+    }
+  return n + r;
+}
+
+
+static void
+reverse_vector (Lisp_Object *s, const ptrdiff_t n)
+{
+  for (ptrdiff_t i = 0; i < n >> 1; i++)
+    {
+      Lisp_Object tem = s[i];
+      s[i] =  s[n - i - 1];
+      s[n - i - 1] = tem;
+    }
+}
+
+/* TIM_SORT sorts the array SEQ with LENGTH elements in the order
+   determined by PREDICATE.  */
+
+void
+tim_sort (Lisp_Object predicate, Lisp_Object *seq, const ptrdiff_t length)
+{
+  merge_state ms;
+  Lisp_Object *lo = seq;
+
+  merge_init (&ms, length, lo, predicate);
+
+
+  /* March over the array once, left to right, finding natural runs,
+     and extending short natural runs to minrun elements.  */
+  const ptrdiff_t minrun = merge_compute_minrun (length);
+  ptrdiff_t nremaining = length;
+  do {
+    bool descending;
+
+    /* Identify the next run.  */
+    ptrdiff_t n = count_run (&ms, lo, lo + nremaining, &descending);
+    if (descending)
+      reverse_vector (lo, n);
+    /* If the run is short, extend it to min(minrun, nremaining).  */
+    if (n < minrun)
+      {
+	const ptrdiff_t force = nremaining <= minrun ?
+	  nremaining : minrun;
+	binarysort (&ms, lo, lo + force, lo + n);
+	n = force;
+      }
+    eassume (ms.n == 0 || ms.pending[ms.n - 1].base +
+	     ms.pending[ms.n - 1].len == lo);
+    found_new_run (&ms, n);
+    /* Push the new run on to the stack.  */
+    eassume (ms.n < MAX_MERGE_PENDING);
+    ms.pending[ms.n].base = lo;
+    ms.pending[ms.n].len = n;
+    ++ms.n;
+    /* Advance to find the next run.  */
+    lo += n;
+    nremaining -= n;
+  } while (nremaining);
+
+  merge_force_collapse (&ms);
+  eassume (ms.n == 1);
+  eassume (ms.pending[0].len == length);
+  lo = ms.pending[0].base;
+
+  if (ms.a != ms.temparray)
+    unbind_to (ms.count, Qnil);
+}
-- 
2.34.1.575.g55b058a8bb


From 2aaa288f8209bb8d55244e1499c8da40ab3f77f4 Mon Sep 17 00:00:00 2001
From: Andrew G Cohen <cohen@andy.bu.edu>
Date: Thu, 17 Mar 2022 16:50:11 +0800
Subject: [PATCH 3/4] Add more sorting unit tests

* test/src/fns-tests.el (fns-tests-sort): New sorting unit tests.
---
 test/src/fns-tests.el | 70 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/test/src/fns-tests.el b/test/src/fns-tests.el
index 723ef4c710..5b252e184f 100644
--- a/test/src/fns-tests.el
+++ b/test/src/fns-tests.el
@@ -204,6 +204,76 @@ fns-tests-sort
 		 [-1 2 3 4 5 5 7 8 9]))
   (should (equal (sort (vector 9 5 2 -1 5 3 8 7 4) (lambda (x y) (> x y)))
 		 [9 8 7 5 5 4 3 2 -1]))
+  ;; Sort a reversed list and vector.
+  (should (equal
+	 (sort (reverse (number-sequence 1 1000)) (lambda (x y) (< x y)))
+	 (number-sequence 1 1000)))
+  (should (equal
+	   (sort (reverse (vconcat (number-sequence 1 1000)))
+                 (lambda (x y) (< x y)))
+	 (vconcat (number-sequence 1 1000))))
+  ;; Sort a constant list and vector.
+  (should (equal
+           (sort (make-vector 100 1) (lambda (x y) (> x y)))
+           (make-vector 100 1)))
+  (should (equal
+           (sort (append (make-vector 100 1) nil) (lambda (x y) (> x y)))
+           (append (make-vector 100 1) nil)))
+  ;; Sort a long list and vector with every pair reversed.
+  (let ((vec (make-vector 100000 nil))
+        (logxor-vec (make-vector 100000 nil)))
+    (dotimes (i 100000)
+      (aset logxor-vec i  (logxor i 1))
+      (aset vec i i))
+    (should (equal
+             (sort logxor-vec (lambda (x y) (< x y)))
+             vec))
+    (should (equal
+             (sort (append logxor-vec nil) (lambda (x y) (< x y)))
+             (append vec nil))))
+  ;; Sort a list and vector with seven swaps.
+  (let ((vec (make-vector 100 nil))
+        (swap-vec (make-vector 100 nil)))
+    (dotimes (i 100)
+      (aset vec i (- i 50))
+      (aset swap-vec i (- i 50)))
+    (mapc (lambda (p)
+	(let ((tmp (elt swap-vec (car p))))
+	  (aset swap-vec (car p) (elt swap-vec (cdr p)))
+	  (aset swap-vec (cdr p) tmp)))
+          '((48 . 94) (75 . 77) (33 . 41) (92 . 52)
+            (10 . 96) (1 . 14) (43 . 81)))
+    (should (equal
+             (sort (copy-sequence swap-vec) (lambda (x y) (< x y)))
+             vec))
+    (should (equal
+             (sort (append swap-vec nil) (lambda (x y) (< x y)))
+             (append vec nil))))
+  ;; Check for possible corruption after GC.
+  (let* ((size 3000)
+         (complex-vec (make-vector size nil))
+         (vec (make-vector size nil))
+         (counter 0)
+         (my-counter (lambda ()
+                       (if (< counter 500)
+                           (cl-incf counter)
+                         (setq counter 0)
+                         (garbage-collect))))
+         (rand 1)
+         (generate-random
+	  (lambda () (setq rand
+                           (logand (+ (* rand 1103515245) 12345)  2147483647)))))
+    ;; Make a complex vector and its sorted version.
+    (dotimes (i size)
+      (let ((r (funcall generate-random)))
+        (aset complex-vec i (cons r "a"))
+        (aset vec i (cons r "a"))))
+    ;; Sort it.
+    (should (equal
+             (sort complex-vec
+                   (lambda (x y) (funcall my-counter) (< (car x) (car y))))
+             (sort vec 'car-less-than-car))))
+  ;; Check for sorting stability.
   (should (equal
 	   (sort
 	    (vector
-- 
2.34.1.575.g55b058a8bb


From e0443500e7d4b8e50a5e5af01eda1ac48f0b0f7e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mattias=20Engdeg=C3=A5rd?= <mattiase@acm.org>
Date: Wed, 16 Mar 2022 17:33:07 +0100
Subject: [PATCH 4/4] Resolve sort predicate ahead of time

* src/sort.c (tim_sort): If the sort predicate is a symbol, find the
corresponding function before starting the sort.  This is especially
beneficial if the predicate was an alias (`string<`, for example).
---
 src/sort.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/sort.c b/src/sort.c
index e7ccc1c052..48e106e92d 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -913,12 +913,24 @@ reverse_vector (Lisp_Object *s, const ptrdiff_t n)
 void
 tim_sort (Lisp_Object predicate, Lisp_Object *seq, const ptrdiff_t length)
 {
+  if (SYMBOLP (predicate))
+    {
+      /* Attempt to resolve the function as far as possible ahead of time,
+	 to avoid having to do it for each call.  */
+      Lisp_Object fun = XSYMBOL (predicate)->u.s.function;
+      if (SYMBOLP (fun))
+	/* Function was an alias; use slow-path resolution.  */
+	fun = indirect_function (fun);
+      /* Don't resolve to an autoload spec; that would be very slow.  */
+      if (!NILP (fun) && !(CONSP (fun) && EQ (XCAR (fun), Qautoload)))
+	predicate = fun;
+    }
+
   merge_state ms;
   Lisp_Object *lo = seq;
 
   merge_init (&ms, length, lo, predicate);
 
-
   /* March over the array once, left to right, finding natural runs,
      and extending short natural runs to minrun elements.  */
   const ptrdiff_t minrun = merge_compute_minrun (length);
-- 
2.34.1.575.g55b058a8bb


^ permalink raw reply related	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2022-06-07  9:07 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-22 23:59 bug#54532: [PATCH] sorting Andrew Cohen
2022-03-23 12:02 ` Lars Ingebrigtsen
2022-03-23 13:30 ` Eli Zaretskii
2022-03-23 23:43   ` Andrew Cohen
2022-03-23 13:46 ` Eli Zaretskii
2022-03-23 23:31   ` Andrew Cohen
2022-03-23 20:24 ` Mattias Engdegård
2022-03-24  6:42   ` Eli Zaretskii
2022-03-24  7:22     ` Andrew Cohen
2022-03-24  8:55       ` Eli Zaretskii
2022-03-24  9:17         ` Andrew Cohen
2022-03-24  9:55           ` Mattias Engdegård
2022-03-24  9:36     ` Mattias Engdegård
2022-03-31 12:03 ` Lars Ingebrigtsen
2022-03-31 13:58   ` Eli Zaretskii
2022-03-31 23:47     ` Andrew Cohen
2022-04-01  6:26       ` Eli Zaretskii
2022-06-07  7:06         ` Stefan Kangas
     [not found]           ` <877d5tgd11.fsf@ust.hk>
2022-06-07  9:07             ` Stefan Kangas

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).