unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
From: Andrew Cohen <acohen@ust.hk>
To: emacs-devel@gnu.org
Subject: sorting in C
Date: Tue, 22 Feb 2022 10:52:06 +0800	[thread overview]
Message-ID: <87ilt7bokp.fsf@ust.hk> (raw)

[-- Attachment #1: Type: text/plain, Size: 1683 bytes --]

I have been trying to speed up some things in gnus, with some
success. But it inspired me to take a look at the sorting algorithm
in C.  I have done some simple tests and have made some (simple)
improvements which I hope to push. But I am a novice at the C parts of
emacs (and C in general) so am sharing here.

Currently, lists and vectors have different code paths, but both use a
recursive merge sort (I haven't actually looked at the vector code, I'm
just guessing from the function names). There are some minor
inefficiencies in the list code (it computes the length of the list on
each recursion, for example, rather than passing it as an
argument---changing this alone gives a 10% speedup) which I played
with. I also tried an iterative rather than recursive merge (no real
improvement). But in the end I found that the simplest large improvement
was to:

1. Use an iterative insertion sort for small lists (length < 64). I
   wrote one that is in-place and does minimal work when the list is
   sorted or reverse sorted, which is the case for much of the sorting
   in gnus.
2. Convert longer lists to a vector, hand it off to the vector code, and
   convert back to a list. This gives a 35% improvement in all cases I
   was able to test.

These are the parts I propose pushing (patch attached). But then I:

3. Replaced the current vector sorting algorithm with TIMSORT. This did
   exactly what TIMSORT is supposed to do: on random data it is
   marginally slower than the current mergesort. But on partially
   ordered data it is 5 times faster.

I plan to continue playing with changes to the vector algorithm to see
what other changes might be worthwhile.

Best,
Andy


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: sorting improvements --]
[-- Type: text/x-patch, Size: 2699 bytes --]

diff --git a/src/fns.c b/src/fns.c
index ea8428fd98..913c3f3489 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -2166,29 +2166,73 @@ DEFUN ("reverse", Freverse, Sreverse, 1, 1, 0,
   return new;
 }
 
+
+/* Using PRED to compare, return whether A and B are in order.
+   Compare stably when A appeared before B in the input.  */
+static bool
+inorder (Lisp_Object pred, Lisp_Object a, Lisp_Object b)
+{
+  return NILP (call2 (pred, b, a));
+}
+
 /* Sort LIST using PREDICATE, preserving original order of elements
    considered as equal.  */
 
+/* This converts the list to a vector, sorts the vector, and converts
+   back to a list. In simple benchmarks this is 35% faster. It
+   allocates a new vector so it uses space of order the length */
+
 static Lisp_Object
 sort_list (Lisp_Object list, Lisp_Object predicate)
 {
   ptrdiff_t length = list_length (list);
-  if (length < 2)
+  if (length < 2) {
     return list;
+  } else if (length < 64) {
+    /* use an iterative insertion sort for short lists */
+    Lisp_Object old = Qnil, new = list, before, after;
+    while (CONSP (new)) {
+      Lisp_Object next = Fcdr (new);
+      if (NILP (old) || inorder (predicate, Fcar (old), Fcar (new))) {
+        old = new;
+        new = next;
+        continue;
+      }
+      XSETCDR(old, next);
+      before = Qnil, after = list;
+      while (CONSP (after)) {
+        if (inorder (predicate, Fcar (after), Fcar (new))) {
+          before = after;  after = XCDR (after); continue;
+        } else if (NILP (before)) {
+          XSETCDR(new, list); list = new; break;}
+        XSETCDR(before, new);
+        XSETCDR(new, after);
+        break;
+      }
+      new = next;
+    }
+    return list;
+  } else {
+    Lisp_Object result = make_nil_vector (length), tail = list;
+    int i;
+    /* convert list to vector */
+    for (i=0; i < length; i++) {
+      ASET (result, i, Fcar (tail));
+      tail = XCDR (tail);
+    }
 
-  Lisp_Object tem = Fnthcdr (make_fixnum (length / 2 - 1), list);
-  Lisp_Object back = Fcdr (tem);
-  Fsetcdr (tem, Qnil);
-
-  return merge (Fsort (list, predicate), Fsort (back, predicate), predicate);
-}
+    result = Fsort (result, predicate);
 
-/* Using PRED to compare, return whether A and B are in order.
-   Compare stably when A appeared before B in the input.  */
-static bool
-inorder (Lisp_Object pred, Lisp_Object a, Lisp_Object b)
-{
-  return NILP (call2 (pred, b, a));
+    /* convert vector to list */
+    i = 0;
+    tail = list;
+    while (CONSP (tail)) {
+      XSETCAR (tail, AREF (result, i));
+      tail = XCDR (tail);
+      i++;
+    }
+    return list;
+  }
 }
 
 /* Using PRED to compare, merge from ALEN-length A and BLEN-length B

[-- Attachment #3: Type: text/plain, Size: 19 bytes --]



-- 
Andrew Cohen

             reply	other threads:[~2022-02-22  2:52 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-22  2:52 Andrew Cohen [this message]
2022-02-22 12:30 ` sorting in C Eli Zaretskii
2022-02-22 12:54   ` Andrew Cohen
2022-02-22 13:11     ` Eli Zaretskii
2022-02-23  4:14   ` Andrew Cohen
2022-02-23 12:34     ` Eli Zaretskii
2022-02-23 12:53       ` Andrew Cohen
2022-02-23 13:14         ` Eli Zaretskii
2022-02-23 13:52           ` Andrew Cohen
2022-02-23 14:06           ` Andrew Cohen
2022-02-23 14:18             ` Eli Zaretskii
2022-02-26 23:54               ` Andrew Cohen
2022-02-27  2:27                 ` Andrew Cohen
2022-02-27  7:28                   ` Eli Zaretskii
2022-02-27  9:11                     ` Andrew Cohen
2022-02-27  9:29                       ` Eli Zaretskii
2022-02-27 10:42                         ` Andrew Cohen
2022-03-04  0:13                           ` Andrew Cohen
2022-03-04  7:05                             ` Eli Zaretskii
2022-02-23 13:19         ` Yuri Khan
2022-02-23 14:12           ` Andrew Cohen
2022-02-22 13:12 ` Lars Ingebrigtsen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87ilt7bokp.fsf@ust.hk \
    --to=acohen@ust.hk \
    --cc=emacs-devel@gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).