unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
From: Andy Sonnenburg <andy22286@gmail.com>
To: Stefan Monnier <monnier@iro.umontreal.ca>
Cc: emacs-devel@gnu.org
Subject: Re: Binary Search Tree and Treap Functions bst-assq and treap-put
Date: Sun, 4 Dec 2016 12:39:53 -0500	[thread overview]
Message-ID: <CAHtDYY-iQPWFi0fd5Ld71tUHdydBZy0_Q4LgvHZs8o-9HVOj8g@mail.gmail.com> (raw)
In-Reply-To: <CAHtDYY9-Jrh3TEQd+AEvpAkcc6-n=TARfx6-qkRusRrSuosOSA@mail.gmail.com>


[-- Attachment #1.1: Type: text/plain, Size: 7602 bytes --]

I've attached the patch file and reproduced the diff below (I'm not sure
which form is preferred).

diff --git src/fns.c src/fns.c
index dfc7842..5f3cacc 100644
--- src/fns.c
+++ src/fns.c
@@ -21,6 +21,7 @@ along with GNU Emacs.  If not, see <
http://www.gnu.org/licenses/>.  */
 #include <config.h>

 #include <stdlib.h>
+#include <string.h>
 #include <unistd.h>
 #include <filevercmp.h>
 #include <intprops.h>
@@ -4751,7 +4752,181 @@ returns nil, then (funcall TEST x1 x2) also returns
nil.  */)
   return Fput (name, Qhash_table_test, list2 (test, hash));
 }

+#define EMACS_CAT_I(a, b) a ## b
+#define EMACS_CAT(a, b) EMACS_CAT_I(a, b)
+#define EMACS_LT(x, y) (XLI (x) < XLI (y))
+#define EMACS_BST_VECTOR_SIZE 4
+#define EMACS_FNV_PRIME 1099511628211
+#define EMACS_FNV_OFFSET_BASIS 14695981039346656037
+
+DEFUN ("bst-assq", Fbst_assq, Sbst_assq,
+       2, 2, 0,
+       doc: /* Return the value whose key is `eq' to KEY in BST.
+BST is either a vector whose first element is a key, second element is a
value,
+and third and fourth elements are left and right children; or a
non-vector.  The
+left child's key must be less than BST's key.  The right child's key must
be
+greater than BST's key.  Non-vector descendants of BST are ignored.  */)
+  (Lisp_Object new_key, Lisp_Object bst)
+{
+  const struct Lisp_Vector *vector;
+  const Lisp_Object *contents;
+  Lisp_Object key;
+  for (;;)
+    {
+      if (!VECTORP (bst))
+        {
+          return Qnil;
+        }
+      vector = XVECTOR (bst);
+      if (vector->header.size != EMACS_BST_VECTOR_SIZE)
+        {
+          args_out_of_range_3  (bst,
+                                make_number (0),
+                                make_number (EMACS_BST_VECTOR_SIZE));
+        }
+      contents = vector->contents;
+      key = contents[0];
+      if (EQ (key, new_key))
+        {
+          return contents[1];
+        }
+      bst = contents[2 + EMACS_LT (key, new_key)];
+    }
+}
+
+static uint64_t
+fnv_1a_hash (const void *first, const void *last)
+{
+  const unsigned char *i;
+  uint64_t hash;
+  hash = (uint64_t) EMACS_CAT(EMACS_FNV_OFFSET_BASIS, ull);
+  for (i = (const unsigned char *) first; i != last; ++i)
+    {
+      hash ^= *i;
+      hash *= (uint64_t) EMACS_CAT(EMACS_FNV_PRIME, ull);
+    }
+  return hash;
+}
+
+static Lisp_Object
+treap_singleton (Lisp_Object key, Lisp_Object value, Lisp_Object nil)
+{
+  Lisp_Object result, *contents;
+  result = make_uninit_vector (EMACS_BST_VECTOR_SIZE);
+  contents = XVECTOR (result)->contents;
+  contents[0] = key;
+  contents[1] = value;
+  contents[2] = nil;
+  contents[3] = nil;
+  return result;
+}
+
+static void
+rotate (Lisp_Object *a, Lisp_Object *b, Lisp_Object *c)
+{
+  Lisp_Object d;
+  d = *a;
+  *a = *b;
+  *b = *c;
+  *c = d;
+}
+
+static void
+tree_rotate (Lisp_Object *root, int rotation_index, int opposite_index)
+{
+  Lisp_Object *opposite, *rotation;
+  opposite = XVECTOR (*root)->contents + opposite_index;
+  rotation = XVECTOR (*opposite)->contents + rotation_index;
+  rotate (opposite, rotation, root);
+}
+
+static Lisp_Object
+make_lisp_vector (struct Lisp_Vector *vector)
+{
+  return XIL (TAG_PTR (Lisp_Vectorlike, vector));
+}
+
+static uint64_t
+treap_hash (Lisp_Object object)
+{
+  EMACS_INT i;
+  i = XLI (object);
+  return fnv_1a_hash (&i, &i + 1);
+}
+
+static Lisp_Object
+treap_put (Lisp_Object, uint64_t, Lisp_Object, Lisp_Object);
+
+static Lisp_Object
+treap_vector_put (Lisp_Object new_key,
+                  uint64_t new_hash,
+                  Lisp_Object new_value,
+                  struct Lisp_Vector *vector)
+{
+  const Lisp_Object *contents;
+  Lisp_Object key, value, new_treap, *new_contents;
+  int i, j;
+  if (vector->header.size != EMACS_BST_VECTOR_SIZE)
+    {
+      args_out_of_range_3 (make_lisp_vector (vector),
+                           make_number (0),
+                           make_number (EMACS_BST_VECTOR_SIZE));
+    }
+  contents = vector->contents;
+  key = contents[0];
+  value = contents[1];
+  new_treap = make_uninit_vector (EMACS_BST_VECTOR_SIZE);
+  new_contents = XVECTOR (new_treap)->contents;
+  new_contents[0] = key;
+  if (EQ (key, new_key))
+    {
+      new_contents[1] = new_value;
+      memcpy (new_contents + 2, contents + 2, 2 * sizeof *new_contents);
+      return new_treap;
+    }
+  new_contents[1] = value;
+  i = 2 + EMACS_LT (key, new_key);
+  j = EMACS_BST_VECTOR_SIZE + 1 - i;
+  new_contents[i] = treap_put (new_key, new_hash, new_value, contents[i]);
+  new_contents[j] = contents[j];
+  if (treap_hash (key) < new_hash)
+    {
+      tree_rotate (&new_treap, j, i);
+    }
+  return new_treap;
+}
+
+static Lisp_Object
+treap_put (Lisp_Object key, uint64_t hash, Lisp_Object value, Lisp_Object
treap)
+{
+  if (VECTORP (treap))
+    {
+      return treap_vector_put (key, hash, value, XVECTOR (treap));
+    }
+  return treap_singleton (key, value, treap);
+}
+
+DEFUN ("treap-put", Ftreap_put, Streap_put,
+       3, 3, 0,
+       doc: /* Associate KEY with VALUE in a treap derived from TREAP.
+If KEY is already present in TREAP, return a treap with VALUE replacing the
+existing value.  TREAP will not be modified, though sharing of structure
between
+the result treap and TREAP may occur.  */)
+  (Lisp_Object key, Lisp_Object value, Lisp_Object treap)
+{
+  if (VECTORP (treap))
+    {
+      return treap_vector_put (key, treap_hash (key), value, XVECTOR
(treap));
+    }
+  return treap_singleton (key, value, treap);
+}

+#undef EMACS_FNV_OFFSET_BASIS
+#undef EMACS_FNV_PRIME
+#undef EMACS_BST_VECTOR_SIZE
+#undef EMACS_LT
+#undef EMACS_CAT
+#undef EMACS_CAT_I

 /************************************************************************
  MD5, SHA-1, and SHA-2
@@ -5232,4 +5407,6 @@ this variable.  */);
   defsubr (&Ssecure_hash);
   defsubr (&Sbuffer_hash);
   defsubr (&Slocale_info);
+  defsubr (&Sbst_assq);
+  defsubr (&Streap_put);
 }
diff --git test/src/fns-tests.el test/src/fns-tests.el
index c533bad..1ef393e 100644
--- test/src/fns-tests.el
+++ test/src/fns-tests.el
@@ -245,3 +245,11 @@
   (let ((data '((foo) (bar))))
     (should (equal (mapcan #'identity data) '(foo bar)))
     (should (equal data                     '((foo bar) (bar))))))
+
+(ert-deftest fns-tests-treap-put ()
+  (let ((n 64)
+        (treap nil))
+    (dotimes (i n)
+      (dotimes (j i) (should (equal (bst-assq j treap) j)))
+      (dotimes (j (- n i)) (should (equal (bst-assq (+ i j) treap) nil)))
+      (setq treap (treap-put i i treap)))))


On Sun, Dec 4, 2016 at 12:13 PM, Andy Sonnenburg <andy22286@gmail.com>
wrote:

> It is written in C.  The only real reason C was used was performance
> concerns, real or imagined.  I can post a diff of the changes - it isn't
> that many lines.
>
> On Dec 4, 2016 12:04 PM, "Stefan Monnier" <monnier@iro.umontreal.ca>
> wrote:
>
>> > That's too bad (I mean, its good for performance, but unfortunate one of
>> > the use cases doesn't exist).  However, the treap functions may still
>> be of
>> > general use.  Let me know if there is any interest.  They are documented
>> > and tested.  They fill a gap between alists (persistent, linear lookup)
>> and
>> > hash tables (ephemeral, constant lookup) by being persistent while
>> > providing average case logarithmic lookup.
>>
>> Is it written in C or Elisp?  If it's Elisp, then we definitely would
>> welcome it into GNU ELPA (there is already an avl-tree implementation in
>> Emacs itself at lisp/emacs-lisp/avl-tree.el, but the more the merrier).
>> If it's written C, I'll let others decide whether we want to include it.
>>
>>
>>         Stefan
>>
>

[-- Attachment #1.2: Type: text/html, Size: 11130 bytes --]

[-- Attachment #2: treap.patch --]
[-- Type: text/x-patch, Size: 6402 bytes --]

diff --git src/fns.c src/fns.c
index dfc7842..5f3cacc 100644
--- src/fns.c
+++ src/fns.c
@@ -21,6 +21,7 @@ along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
 #include <config.h>
 
 #include <stdlib.h>
+#include <string.h>
 #include <unistd.h>
 #include <filevercmp.h>
 #include <intprops.h>
@@ -4751,7 +4752,181 @@ returns nil, then (funcall TEST x1 x2) also returns nil.  */)
   return Fput (name, Qhash_table_test, list2 (test, hash));
 }
 
+#define EMACS_CAT_I(a, b) a ## b
+#define EMACS_CAT(a, b) EMACS_CAT_I(a, b)
+#define EMACS_LT(x, y) (XLI (x) < XLI (y))
+#define EMACS_BST_VECTOR_SIZE 4
+#define EMACS_FNV_PRIME 1099511628211
+#define EMACS_FNV_OFFSET_BASIS 14695981039346656037
+
+DEFUN ("bst-assq", Fbst_assq, Sbst_assq,
+       2, 2, 0,
+       doc: /* Return the value whose key is `eq' to KEY in BST.
+BST is either a vector whose first element is a key, second element is a value,
+and third and fourth elements are left and right children; or a non-vector.  The
+left child's key must be less than BST's key.  The right child's key must be
+greater than BST's key.  Non-vector descendants of BST are ignored.  */)
+  (Lisp_Object new_key, Lisp_Object bst)
+{
+  const struct Lisp_Vector *vector;
+  const Lisp_Object *contents;
+  Lisp_Object key;
+  for (;;)
+    {
+      if (!VECTORP (bst))
+        {
+          return Qnil;
+        }
+      vector = XVECTOR (bst);
+      if (vector->header.size != EMACS_BST_VECTOR_SIZE)
+        {
+          args_out_of_range_3  (bst,
+                                make_number (0),
+                                make_number (EMACS_BST_VECTOR_SIZE));
+        }
+      contents = vector->contents;
+      key = contents[0];
+      if (EQ (key, new_key))
+        {
+          return contents[1];
+        }
+      bst = contents[2 + EMACS_LT (key, new_key)];
+    }
+}
+
+static uint64_t
+fnv_1a_hash (const void *first, const void *last)
+{
+  const unsigned char *i;
+  uint64_t hash;
+  hash = (uint64_t) EMACS_CAT(EMACS_FNV_OFFSET_BASIS, ull);
+  for (i = (const unsigned char *) first; i != last; ++i)
+    {
+      hash ^= *i;
+      hash *= (uint64_t) EMACS_CAT(EMACS_FNV_PRIME, ull);
+    }
+  return hash;
+}
+
+static Lisp_Object
+treap_singleton (Lisp_Object key, Lisp_Object value, Lisp_Object nil)
+{
+  Lisp_Object result, *contents;
+  result = make_uninit_vector (EMACS_BST_VECTOR_SIZE);
+  contents = XVECTOR (result)->contents;
+  contents[0] = key;
+  contents[1] = value;
+  contents[2] = nil;
+  contents[3] = nil;
+  return result;
+}
+
+static void
+rotate (Lisp_Object *a, Lisp_Object *b, Lisp_Object *c)
+{
+  Lisp_Object d;
+  d = *a;
+  *a = *b;
+  *b = *c;
+  *c = d;
+}
+
+static void
+tree_rotate (Lisp_Object *root, int rotation_index, int opposite_index)
+{
+  Lisp_Object *opposite, *rotation;
+  opposite = XVECTOR (*root)->contents + opposite_index;
+  rotation = XVECTOR (*opposite)->contents + rotation_index;
+  rotate (opposite, rotation, root);
+}
+
+static Lisp_Object
+make_lisp_vector (struct Lisp_Vector *vector)
+{
+  return XIL (TAG_PTR (Lisp_Vectorlike, vector));
+}
+
+static uint64_t
+treap_hash (Lisp_Object object)
+{
+  EMACS_INT i;
+  i = XLI (object);
+  return fnv_1a_hash (&i, &i + 1);
+}
+
+static Lisp_Object
+treap_put (Lisp_Object, uint64_t, Lisp_Object, Lisp_Object);
+
+static Lisp_Object
+treap_vector_put (Lisp_Object new_key,
+                  uint64_t new_hash,
+                  Lisp_Object new_value,
+                  struct Lisp_Vector *vector)
+{
+  const Lisp_Object *contents;
+  Lisp_Object key, value, new_treap, *new_contents;
+  int i, j;
+  if (vector->header.size != EMACS_BST_VECTOR_SIZE)
+    {
+      args_out_of_range_3 (make_lisp_vector (vector),
+                           make_number (0),
+                           make_number (EMACS_BST_VECTOR_SIZE));
+    }
+  contents = vector->contents;
+  key = contents[0];
+  value = contents[1];
+  new_treap = make_uninit_vector (EMACS_BST_VECTOR_SIZE);
+  new_contents = XVECTOR (new_treap)->contents;
+  new_contents[0] = key;
+  if (EQ (key, new_key))
+    {
+      new_contents[1] = new_value;
+      memcpy (new_contents + 2, contents + 2, 2 * sizeof *new_contents);
+      return new_treap;
+    }
+  new_contents[1] = value;
+  i = 2 + EMACS_LT (key, new_key);
+  j = EMACS_BST_VECTOR_SIZE + 1 - i;
+  new_contents[i] = treap_put (new_key, new_hash, new_value, contents[i]);
+  new_contents[j] = contents[j];
+  if (treap_hash (key) < new_hash)
+    {
+      tree_rotate (&new_treap, j, i);
+    }
+  return new_treap;
+}
+
+static Lisp_Object
+treap_put (Lisp_Object key, uint64_t hash, Lisp_Object value, Lisp_Object treap)
+{
+  if (VECTORP (treap))
+    {
+      return treap_vector_put (key, hash, value, XVECTOR (treap));
+    }
+  return treap_singleton (key, value, treap);
+}
+
+DEFUN ("treap-put", Ftreap_put, Streap_put,
+       3, 3, 0,
+       doc: /* Associate KEY with VALUE in a treap derived from TREAP.
+If KEY is already present in TREAP, return a treap with VALUE replacing the
+existing value.  TREAP will not be modified, though sharing of structure between
+the result treap and TREAP may occur.  */)
+  (Lisp_Object key, Lisp_Object value, Lisp_Object treap)
+{
+  if (VECTORP (treap))
+    {
+      return treap_vector_put (key, treap_hash (key), value, XVECTOR (treap));
+    }
+  return treap_singleton (key, value, treap);
+}
 
+#undef EMACS_FNV_OFFSET_BASIS
+#undef EMACS_FNV_PRIME
+#undef EMACS_BST_VECTOR_SIZE
+#undef EMACS_LT
+#undef EMACS_CAT
+#undef EMACS_CAT_I
 \f
 /************************************************************************
 			MD5, SHA-1, and SHA-2
@@ -5232,4 +5407,6 @@ this variable.  */);
   defsubr (&Ssecure_hash);
   defsubr (&Sbuffer_hash);
   defsubr (&Slocale_info);
+  defsubr (&Sbst_assq);
+  defsubr (&Streap_put);
 }
diff --git test/src/fns-tests.el test/src/fns-tests.el
index c533bad..1ef393e 100644
--- test/src/fns-tests.el
+++ test/src/fns-tests.el
@@ -245,3 +245,11 @@
   (let ((data '((foo) (bar))))
     (should (equal (mapcan #'identity data) '(foo bar)))
     (should (equal data                     '((foo bar) (bar))))))
+
+(ert-deftest fns-tests-treap-put ()
+  (let ((n 64)
+        (treap nil))
+    (dotimes (i n)
+      (dotimes (j i) (should (equal (bst-assq j treap) j)))
+      (dotimes (j (- n i)) (should (equal (bst-assq (+ i j) treap) nil)))
+      (setq treap (treap-put i i treap)))))

  reply	other threads:[~2016-12-04 17:39 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-12-04  1:42 Binary Search Tree and Treap Functions bst-assq and treap-put Andy Sonnenburg
2016-12-04  4:33 ` Stefan Monnier
     [not found]   ` <CAHtDYY-xis2R4Nbvq_8Ht0nKsm6KjGqW=NSC7O3+5FvNF9w+Dg@mail.gmail.com>
     [not found]     ` <CAHtDYY9__BVOAs+vX=Tj8Bf31X6-Duv3f9MS8vcGwsnO2x74+w@mail.gmail.com>
2016-12-04 12:14       ` Andy Sonnenburg
2016-12-04 17:04         ` Stefan Monnier
2016-12-04 17:13           ` Andy Sonnenburg
2016-12-04 17:39             ` Andy Sonnenburg [this message]
2016-12-04 17:41             ` Eli Zaretskii
2016-12-12  6:15           ` John Wiegley
2016-12-12 12:56             ` Stefan Monnier
2016-12-12 16:46               ` John Wiegley
2016-12-12 16:58                 ` Stefan Monnier
2016-12-12 17:06                   ` John Wiegley
2016-12-12 17:29                     ` Stefan Monnier
2017-10-22 16:35   ` Noam Postavsky
2017-10-22 16:44     ` Andreas Schwab
2017-10-22 16:44     ` Stefan Monnier
2017-10-22 17:12       ` Noam Postavsky
2017-10-23  0:37         ` Stefan Monnier

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAHtDYY-iQPWFi0fd5Ld71tUHdydBZy0_Q4LgvHZs8o-9HVOj8g@mail.gmail.com \
    --to=andy22286@gmail.com \
    --cc=emacs-devel@gnu.org \
    --cc=monnier@iro.umontreal.ca \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).