From: Andy Sonnenburg <andy22286@gmail.com>
To: Stefan Monnier <monnier@iro.umontreal.ca>
Cc: emacs-devel@gnu.org
Subject: Re: Binary Search Tree and Treap Functions bst-assq and treap-put
Date: Sun, 4 Dec 2016 12:39:53 -0500 [thread overview]
Message-ID: <CAHtDYY-iQPWFi0fd5Ld71tUHdydBZy0_Q4LgvHZs8o-9HVOj8g@mail.gmail.com> (raw)
In-Reply-To: <CAHtDYY9-Jrh3TEQd+AEvpAkcc6-n=TARfx6-qkRusRrSuosOSA@mail.gmail.com>
[-- Attachment #1.1: Type: text/plain, Size: 7602 bytes --]
I've attached the patch file and reproduced the diff below (I'm not sure
which form is preferred).
diff --git src/fns.c src/fns.c
index dfc7842..5f3cacc 100644
--- src/fns.c
+++ src/fns.c
@@ -21,6 +21,7 @@ along with GNU Emacs. If not, see <
http://www.gnu.org/licenses/>. */
#include <config.h>
#include <stdlib.h>
+#include <string.h>
#include <unistd.h>
#include <filevercmp.h>
#include <intprops.h>
@@ -4751,7 +4752,181 @@ returns nil, then (funcall TEST x1 x2) also returns
nil. */)
return Fput (name, Qhash_table_test, list2 (test, hash));
}
+#define EMACS_CAT_I(a, b) a ## b
+#define EMACS_CAT(a, b) EMACS_CAT_I(a, b)
+#define EMACS_LT(x, y) (XLI (x) < XLI (y))
+#define EMACS_BST_VECTOR_SIZE 4
+#define EMACS_FNV_PRIME 1099511628211
+#define EMACS_FNV_OFFSET_BASIS 14695981039346656037
+
+DEFUN ("bst-assq", Fbst_assq, Sbst_assq,
+ 2, 2, 0,
+ doc: /* Return the value whose key is `eq' to KEY in BST.
+BST is either a vector whose first element is a key, second element is a
value,
+and third and fourth elements are left and right children; or a
non-vector. The
+left child's key must be less than BST's key. The right child's key must
be
+greater than BST's key. Non-vector descendants of BST are ignored. */)
+ (Lisp_Object new_key, Lisp_Object bst)
+{
+ const struct Lisp_Vector *vector;
+ const Lisp_Object *contents;
+ Lisp_Object key;
+ for (;;)
+ {
+ if (!VECTORP (bst))
+ {
+ return Qnil;
+ }
+ vector = XVECTOR (bst);
+ if (vector->header.size != EMACS_BST_VECTOR_SIZE)
+ {
+ args_out_of_range_3 (bst,
+ make_number (0),
+ make_number (EMACS_BST_VECTOR_SIZE));
+ }
+ contents = vector->contents;
+ key = contents[0];
+ if (EQ (key, new_key))
+ {
+ return contents[1];
+ }
+ bst = contents[2 + EMACS_LT (key, new_key)];
+ }
+}
+
+static uint64_t
+fnv_1a_hash (const void *first, const void *last)
+{
+ const unsigned char *i;
+ uint64_t hash;
+ hash = (uint64_t) EMACS_CAT(EMACS_FNV_OFFSET_BASIS, ull);
+ for (i = (const unsigned char *) first; i != last; ++i)
+ {
+ hash ^= *i;
+ hash *= (uint64_t) EMACS_CAT(EMACS_FNV_PRIME, ull);
+ }
+ return hash;
+}
+
+static Lisp_Object
+treap_singleton (Lisp_Object key, Lisp_Object value, Lisp_Object nil)
+{
+ Lisp_Object result, *contents;
+ result = make_uninit_vector (EMACS_BST_VECTOR_SIZE);
+ contents = XVECTOR (result)->contents;
+ contents[0] = key;
+ contents[1] = value;
+ contents[2] = nil;
+ contents[3] = nil;
+ return result;
+}
+
+static void
+rotate (Lisp_Object *a, Lisp_Object *b, Lisp_Object *c)
+{
+ Lisp_Object d;
+ d = *a;
+ *a = *b;
+ *b = *c;
+ *c = d;
+}
+
+static void
+tree_rotate (Lisp_Object *root, int rotation_index, int opposite_index)
+{
+ Lisp_Object *opposite, *rotation;
+ opposite = XVECTOR (*root)->contents + opposite_index;
+ rotation = XVECTOR (*opposite)->contents + rotation_index;
+ rotate (opposite, rotation, root);
+}
+
+static Lisp_Object
+make_lisp_vector (struct Lisp_Vector *vector)
+{
+ return XIL (TAG_PTR (Lisp_Vectorlike, vector));
+}
+
+static uint64_t
+treap_hash (Lisp_Object object)
+{
+ EMACS_INT i;
+ i = XLI (object);
+ return fnv_1a_hash (&i, &i + 1);
+}
+
+static Lisp_Object
+treap_put (Lisp_Object, uint64_t, Lisp_Object, Lisp_Object);
+
+static Lisp_Object
+treap_vector_put (Lisp_Object new_key,
+ uint64_t new_hash,
+ Lisp_Object new_value,
+ struct Lisp_Vector *vector)
+{
+ const Lisp_Object *contents;
+ Lisp_Object key, value, new_treap, *new_contents;
+ int i, j;
+ if (vector->header.size != EMACS_BST_VECTOR_SIZE)
+ {
+ args_out_of_range_3 (make_lisp_vector (vector),
+ make_number (0),
+ make_number (EMACS_BST_VECTOR_SIZE));
+ }
+ contents = vector->contents;
+ key = contents[0];
+ value = contents[1];
+ new_treap = make_uninit_vector (EMACS_BST_VECTOR_SIZE);
+ new_contents = XVECTOR (new_treap)->contents;
+ new_contents[0] = key;
+ if (EQ (key, new_key))
+ {
+ new_contents[1] = new_value;
+ memcpy (new_contents + 2, contents + 2, 2 * sizeof *new_contents);
+ return new_treap;
+ }
+ new_contents[1] = value;
+ i = 2 + EMACS_LT (key, new_key);
+ j = EMACS_BST_VECTOR_SIZE + 1 - i;
+ new_contents[i] = treap_put (new_key, new_hash, new_value, contents[i]);
+ new_contents[j] = contents[j];
+ if (treap_hash (key) < new_hash)
+ {
+ tree_rotate (&new_treap, j, i);
+ }
+ return new_treap;
+}
+
+static Lisp_Object
+treap_put (Lisp_Object key, uint64_t hash, Lisp_Object value, Lisp_Object
treap)
+{
+ if (VECTORP (treap))
+ {
+ return treap_vector_put (key, hash, value, XVECTOR (treap));
+ }
+ return treap_singleton (key, value, treap);
+}
+
+DEFUN ("treap-put", Ftreap_put, Streap_put,
+ 3, 3, 0,
+ doc: /* Associate KEY with VALUE in a treap derived from TREAP.
+If KEY is already present in TREAP, return a treap with VALUE replacing the
+existing value. TREAP will not be modified, though sharing of structure
between
+the result treap and TREAP may occur. */)
+ (Lisp_Object key, Lisp_Object value, Lisp_Object treap)
+{
+ if (VECTORP (treap))
+ {
+ return treap_vector_put (key, treap_hash (key), value, XVECTOR
(treap));
+ }
+ return treap_singleton (key, value, treap);
+}
+#undef EMACS_FNV_OFFSET_BASIS
+#undef EMACS_FNV_PRIME
+#undef EMACS_BST_VECTOR_SIZE
+#undef EMACS_LT
+#undef EMACS_CAT
+#undef EMACS_CAT_I
/************************************************************************
MD5, SHA-1, and SHA-2
@@ -5232,4 +5407,6 @@ this variable. */);
defsubr (&Ssecure_hash);
defsubr (&Sbuffer_hash);
defsubr (&Slocale_info);
+ defsubr (&Sbst_assq);
+ defsubr (&Streap_put);
}
diff --git test/src/fns-tests.el test/src/fns-tests.el
index c533bad..1ef393e 100644
--- test/src/fns-tests.el
+++ test/src/fns-tests.el
@@ -245,3 +245,11 @@
(let ((data '((foo) (bar))))
(should (equal (mapcan #'identity data) '(foo bar)))
(should (equal data '((foo bar) (bar))))))
+
+(ert-deftest fns-tests-treap-put ()
+ (let ((n 64)
+ (treap nil))
+ (dotimes (i n)
+ (dotimes (j i) (should (equal (bst-assq j treap) j)))
+ (dotimes (j (- n i)) (should (equal (bst-assq (+ i j) treap) nil)))
+ (setq treap (treap-put i i treap)))))
On Sun, Dec 4, 2016 at 12:13 PM, Andy Sonnenburg <andy22286@gmail.com>
wrote:
> It is written in C. The only real reason C was used was performance
> concerns, real or imagined. I can post a diff of the changes - it isn't
> that many lines.
>
> On Dec 4, 2016 12:04 PM, "Stefan Monnier" <monnier@iro.umontreal.ca>
> wrote:
>
>> > That's too bad (I mean, its good for performance, but unfortunate one of
>> > the use cases doesn't exist). However, the treap functions may still
>> be of
>> > general use. Let me know if there is any interest. They are documented
>> > and tested. They fill a gap between alists (persistent, linear lookup)
>> and
>> > hash tables (ephemeral, constant lookup) by being persistent while
>> > providing average case logarithmic lookup.
>>
>> Is it written in C or Elisp? If it's Elisp, then we definitely would
>> welcome it into GNU ELPA (there is already an avl-tree implementation in
>> Emacs itself at lisp/emacs-lisp/avl-tree.el, but the more the merrier).
>> If it's written C, I'll let others decide whether we want to include it.
>>
>>
>> Stefan
>>
>
[-- Attachment #1.2: Type: text/html, Size: 11130 bytes --]
[-- Attachment #2: treap.patch --]
[-- Type: text/x-patch, Size: 6402 bytes --]
diff --git src/fns.c src/fns.c
index dfc7842..5f3cacc 100644
--- src/fns.c
+++ src/fns.c
@@ -21,6 +21,7 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
#include <stdlib.h>
+#include <string.h>
#include <unistd.h>
#include <filevercmp.h>
#include <intprops.h>
@@ -4751,7 +4752,181 @@ returns nil, then (funcall TEST x1 x2) also returns nil. */)
return Fput (name, Qhash_table_test, list2 (test, hash));
}
+#define EMACS_CAT_I(a, b) a ## b
+#define EMACS_CAT(a, b) EMACS_CAT_I(a, b)
+#define EMACS_LT(x, y) (XLI (x) < XLI (y))
+#define EMACS_BST_VECTOR_SIZE 4
+#define EMACS_FNV_PRIME 1099511628211
+#define EMACS_FNV_OFFSET_BASIS 14695981039346656037
+
+DEFUN ("bst-assq", Fbst_assq, Sbst_assq,
+ 2, 2, 0,
+ doc: /* Return the value whose key is `eq' to KEY in BST.
+BST is either a vector whose first element is a key, second element is a value,
+and third and fourth elements are left and right children; or a non-vector. The
+left child's key must be less than BST's key. The right child's key must be
+greater than BST's key. Non-vector descendants of BST are ignored. */)
+ (Lisp_Object new_key, Lisp_Object bst)
+{
+ const struct Lisp_Vector *vector;
+ const Lisp_Object *contents;
+ Lisp_Object key;
+ for (;;)
+ {
+ if (!VECTORP (bst))
+ {
+ return Qnil;
+ }
+ vector = XVECTOR (bst);
+ if (vector->header.size != EMACS_BST_VECTOR_SIZE)
+ {
+ args_out_of_range_3 (bst,
+ make_number (0),
+ make_number (EMACS_BST_VECTOR_SIZE));
+ }
+ contents = vector->contents;
+ key = contents[0];
+ if (EQ (key, new_key))
+ {
+ return contents[1];
+ }
+ bst = contents[2 + EMACS_LT (key, new_key)];
+ }
+}
+
+static uint64_t
+fnv_1a_hash (const void *first, const void *last)
+{
+ const unsigned char *i;
+ uint64_t hash;
+ hash = (uint64_t) EMACS_CAT(EMACS_FNV_OFFSET_BASIS, ull);
+ for (i = (const unsigned char *) first; i != last; ++i)
+ {
+ hash ^= *i;
+ hash *= (uint64_t) EMACS_CAT(EMACS_FNV_PRIME, ull);
+ }
+ return hash;
+}
+
+static Lisp_Object
+treap_singleton (Lisp_Object key, Lisp_Object value, Lisp_Object nil)
+{
+ Lisp_Object result, *contents;
+ result = make_uninit_vector (EMACS_BST_VECTOR_SIZE);
+ contents = XVECTOR (result)->contents;
+ contents[0] = key;
+ contents[1] = value;
+ contents[2] = nil;
+ contents[3] = nil;
+ return result;
+}
+
+static void
+rotate (Lisp_Object *a, Lisp_Object *b, Lisp_Object *c)
+{
+ Lisp_Object d;
+ d = *a;
+ *a = *b;
+ *b = *c;
+ *c = d;
+}
+
+static void
+tree_rotate (Lisp_Object *root, int rotation_index, int opposite_index)
+{
+ Lisp_Object *opposite, *rotation;
+ opposite = XVECTOR (*root)->contents + opposite_index;
+ rotation = XVECTOR (*opposite)->contents + rotation_index;
+ rotate (opposite, rotation, root);
+}
+
+static Lisp_Object
+make_lisp_vector (struct Lisp_Vector *vector)
+{
+ return XIL (TAG_PTR (Lisp_Vectorlike, vector));
+}
+
+static uint64_t
+treap_hash (Lisp_Object object)
+{
+ EMACS_INT i;
+ i = XLI (object);
+ return fnv_1a_hash (&i, &i + 1);
+}
+
+static Lisp_Object
+treap_put (Lisp_Object, uint64_t, Lisp_Object, Lisp_Object);
+
+static Lisp_Object
+treap_vector_put (Lisp_Object new_key,
+ uint64_t new_hash,
+ Lisp_Object new_value,
+ struct Lisp_Vector *vector)
+{
+ const Lisp_Object *contents;
+ Lisp_Object key, value, new_treap, *new_contents;
+ int i, j;
+ if (vector->header.size != EMACS_BST_VECTOR_SIZE)
+ {
+ args_out_of_range_3 (make_lisp_vector (vector),
+ make_number (0),
+ make_number (EMACS_BST_VECTOR_SIZE));
+ }
+ contents = vector->contents;
+ key = contents[0];
+ value = contents[1];
+ new_treap = make_uninit_vector (EMACS_BST_VECTOR_SIZE);
+ new_contents = XVECTOR (new_treap)->contents;
+ new_contents[0] = key;
+ if (EQ (key, new_key))
+ {
+ new_contents[1] = new_value;
+ memcpy (new_contents + 2, contents + 2, 2 * sizeof *new_contents);
+ return new_treap;
+ }
+ new_contents[1] = value;
+ i = 2 + EMACS_LT (key, new_key);
+ j = EMACS_BST_VECTOR_SIZE + 1 - i;
+ new_contents[i] = treap_put (new_key, new_hash, new_value, contents[i]);
+ new_contents[j] = contents[j];
+ if (treap_hash (key) < new_hash)
+ {
+ tree_rotate (&new_treap, j, i);
+ }
+ return new_treap;
+}
+
+static Lisp_Object
+treap_put (Lisp_Object key, uint64_t hash, Lisp_Object value, Lisp_Object treap)
+{
+ if (VECTORP (treap))
+ {
+ return treap_vector_put (key, hash, value, XVECTOR (treap));
+ }
+ return treap_singleton (key, value, treap);
+}
+
+DEFUN ("treap-put", Ftreap_put, Streap_put,
+ 3, 3, 0,
+ doc: /* Associate KEY with VALUE in a treap derived from TREAP.
+If KEY is already present in TREAP, return a treap with VALUE replacing the
+existing value. TREAP will not be modified, though sharing of structure between
+the result treap and TREAP may occur. */)
+ (Lisp_Object key, Lisp_Object value, Lisp_Object treap)
+{
+ if (VECTORP (treap))
+ {
+ return treap_vector_put (key, treap_hash (key), value, XVECTOR (treap));
+ }
+ return treap_singleton (key, value, treap);
+}
+#undef EMACS_FNV_OFFSET_BASIS
+#undef EMACS_FNV_PRIME
+#undef EMACS_BST_VECTOR_SIZE
+#undef EMACS_LT
+#undef EMACS_CAT
+#undef EMACS_CAT_I
\f
/************************************************************************
MD5, SHA-1, and SHA-2
@@ -5232,4 +5407,6 @@ this variable. */);
defsubr (&Ssecure_hash);
defsubr (&Sbuffer_hash);
defsubr (&Slocale_info);
+ defsubr (&Sbst_assq);
+ defsubr (&Streap_put);
}
diff --git test/src/fns-tests.el test/src/fns-tests.el
index c533bad..1ef393e 100644
--- test/src/fns-tests.el
+++ test/src/fns-tests.el
@@ -245,3 +245,11 @@
(let ((data '((foo) (bar))))
(should (equal (mapcan #'identity data) '(foo bar)))
(should (equal data '((foo bar) (bar))))))
+
+(ert-deftest fns-tests-treap-put ()
+ (let ((n 64)
+ (treap nil))
+ (dotimes (i n)
+ (dotimes (j i) (should (equal (bst-assq j treap) j)))
+ (dotimes (j (- n i)) (should (equal (bst-assq (+ i j) treap) nil)))
+ (setq treap (treap-put i i treap)))))
next prev parent reply other threads:[~2016-12-04 17:39 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-12-04 1:42 Binary Search Tree and Treap Functions bst-assq and treap-put Andy Sonnenburg
2016-12-04 4:33 ` Stefan Monnier
[not found] ` <CAHtDYY-xis2R4Nbvq_8Ht0nKsm6KjGqW=NSC7O3+5FvNF9w+Dg@mail.gmail.com>
[not found] ` <CAHtDYY9__BVOAs+vX=Tj8Bf31X6-Duv3f9MS8vcGwsnO2x74+w@mail.gmail.com>
2016-12-04 12:14 ` Andy Sonnenburg
2016-12-04 17:04 ` Stefan Monnier
2016-12-04 17:13 ` Andy Sonnenburg
2016-12-04 17:39 ` Andy Sonnenburg [this message]
2016-12-04 17:41 ` Eli Zaretskii
2016-12-12 6:15 ` John Wiegley
2016-12-12 12:56 ` Stefan Monnier
2016-12-12 16:46 ` John Wiegley
2016-12-12 16:58 ` Stefan Monnier
2016-12-12 17:06 ` John Wiegley
2016-12-12 17:29 ` Stefan Monnier
2017-10-22 16:35 ` Noam Postavsky
2017-10-22 16:44 ` Andreas Schwab
2017-10-22 16:44 ` Stefan Monnier
2017-10-22 17:12 ` Noam Postavsky
2017-10-23 0:37 ` Stefan Monnier
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://www.gnu.org/software/emacs/
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=CAHtDYY-iQPWFi0fd5Ld71tUHdydBZy0_Q4LgvHZs8o-9HVOj8g@mail.gmail.com \
--to=andy22286@gmail.com \
--cc=emacs-devel@gnu.org \
--cc=monnier@iro.umontreal.ca \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://git.savannah.gnu.org/cgit/emacs.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).