all messages for Emacs-related lists mirrored at yhetil.org
 help / color / mirror / code / Atom feed
* bug#73883: [PATCH] Fix internal_equal so that it uses at most one hash table
@ 2024-10-19 10:53 Ethan Kong
  2024-10-20 11:47 ` bug#73883: [PATCH] Fix 'equal' freezing on large cyclic objects Ethan Kong
  2024-10-20 11:55 ` bug#73883: [PATCH] Fix internal_equal so that it uses at most one hash table Stefan Kangas
  0 siblings, 2 replies; 3+ messages in thread
From: Ethan Kong @ 2024-10-19 10:53 UTC (permalink / raw)
  To: 73883


[-- Attachment #1.1: Type: text/plain, Size: 2850 bytes --]

Tags: patch

Hi,

When I use 'equal' on large, cyclic objects, emacs freezes.

Reproduce:

    ;; emacs -Q
    (require 'org-element)

    ;; get two identical `org-element' trees of this file
    ;;
https://github.com/emacs-mirror/emacs/blob/master/doc/misc/modus-themes.org
    (let ((file (expand-file-name "doc/misc/modus-themes.org"
                                  source-directory)))
      (with-current-buffer (find-file-noselect file)
        (setq org-o1 (org-element-parse-buffer))
        (org-element-cache-reset)
        (setq org-o2 (org-element-parse-buffer))))

    ;; the test
    (equal org-o1 org-o2)

This equal call freezes emacs. And the memory usage of emacs grows
quickly, until I quit. Reproduced with emacs 29.4 and the master branch.


The cause:

Current code on the master branch:

    static bool
    internal_equal (Lisp_Object o1, Lisp_Object o2, enum equal_kind
equal_kind,
                    int depth, Lisp_Object ht)
    {
     tail_recurse:
      if (depth > 10)
        {
          // ...
          if (NILP (ht))
            ht = CALLN (Fmake_hash_table, QCtest, Qeq);
          // ...
        }
      // ...
            if (! internal_equal (XCAR (o1), XCAR (o2),
                                  equal_kind, depth + 1, ht))
              return false;
            // ...
    }

When internal_equal calls itself, it passes the hash table argument 'ht'
by value. As a result, each internal_equal(depth=11) call initializes
its own hash table, separately recording the objects it encounters in
further recursive calls. This leads to excessive 'hash_put' and
significant memory allocation.


Fix:

Make internal_equal pass the hash table by pointer (see the patch).

With this patch, the test above returns quickly:

    (benchmark-run 100 (equal org-o1 org-o2))

(2.562444 20 0.30933900000000014)

This will also improve the performance for smaller cyclic objects:

    ;; emacs -Q
    (defun make-cyclic (n m)
      (let* ((l1 (make-list n 1))
             (l2 (make-list m 2)))
        (setf (nth (1- m) l2) l1)
        (setf (nth (1- n) l1) l2)
        l1))

    (let ((a (make-cyclic 100 7))
          (b (make-cyclic 100 7)))
      (cl-assert (equal a b))
      (benchmark-run 10000 (equal a b)))

Current:
(0.530081 32 0.49294199999999977)

With the patch:
(0.036401 1 0.0173350000000001)

And it passes all the tests in 'make fns-tests'.


P.S. Could I get the copyright assignment paperwork please?

Best,
Ethan Kong


In GNU Emacs 29.4 (build 2, x86_64-w64-mingw32) of 2024-07-05 built on
 AVALON
Windowing system distributor 'Microsoft Corp.', version 10.0.22631
System Description: Microsoft Windows 10 Pro (v10.0.2009.22631.4317)

Configured using:
 'configure --with-modules --without-dbus --with-native-compilation=aot
 --without-compress-install --with-sqlite3 --with-tree-sitter
 CFLAGS=-O2'

[-- Attachment #1.2: Type: text/html, Size: 3571 bytes --]

[-- Attachment #2: 0001-Fix-internal_equal-so-that-it-uses-at-most-one-hash-.patch --]
[-- Type: application/octet-stream, Size: 2830 bytes --]

From 425270179bb22630b7ad5871fcfcee84737b55fb Mon Sep 17 00:00:00 2001
From: Ethan Kong <ek.ethan.kong@gmail.com>
Date: Sat, 19 Oct 2024 12:43:27 +0800
Subject: [PATCH] Fix internal_equal so that it uses at most one hash table

* src/fns.c (internal_equal): Delegate to internal_equal_1.
(internal_equal_1): New function. Pass the hash table argument by
pointer instead of by value.
---
 src/fns.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/src/fns.c b/src/fns.c
index 2de04d06519..ef6922c137b 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -2823,8 +2823,8 @@ equal_no_quit (Lisp_Object o1, Lisp_Object o2)
    if EQUAL_KIND == EQUAL_NO_QUIT.  */
 
 static bool
-internal_equal (Lisp_Object o1, Lisp_Object o2, enum equal_kind equal_kind,
-		int depth, Lisp_Object ht)
+internal_equal_1 (Lisp_Object o1, Lisp_Object o2, enum equal_kind equal_kind,
+		  int depth, Lisp_Object *ht)
 {
  tail_recurse:
   if (depth > 10)
@@ -2832,13 +2832,13 @@ internal_equal (Lisp_Object o1, Lisp_Object o2, enum equal_kind equal_kind,
       eassert (equal_kind != EQUAL_NO_QUIT);
       if (depth > 200)
 	error ("Stack overflow in equal");
-      if (NILP (ht))
-	ht = CALLN (Fmake_hash_table, QCtest, Qeq);
+      if (NILP (*ht))
+	*ht = CALLN (Fmake_hash_table, QCtest, Qeq);
       switch (XTYPE (o1))
 	{
 	case Lisp_Cons: case Lisp_Vectorlike:
 	  {
-	    struct Lisp_Hash_Table *h = XHASH_TABLE (ht);
+	    struct Lisp_Hash_Table *h = XHASH_TABLE (*ht);
 	    hash_hash_t hash = hash_from_key (h, o1);
 	    ptrdiff_t i = hash_lookup_with_hash (h, o1, hash);
 	    if (i >= 0)
@@ -2888,8 +2888,8 @@ internal_equal (Lisp_Object o1, Lisp_Object o2, enum equal_kind equal_kind,
 	  {
 	    if (! CONSP (o2))
 	      return false;
-	    if (! internal_equal (XCAR (o1), XCAR (o2),
-				  equal_kind, depth + 1, ht))
+	    if (! internal_equal_1 (XCAR (o1), XCAR (o2),
+				    equal_kind, depth + 1, ht))
 	      return false;
 	    o2 = XCDR (o2);
 	    if (EQ (XCDR (o1), o2))
@@ -2964,7 +2964,7 @@ internal_equal (Lisp_Object o1, Lisp_Object o2, enum equal_kind equal_kind,
 	    Lisp_Object v1, v2;
 	    v1 = AREF (o1, i);
 	    v2 = AREF (o2, i);
-	    if (!internal_equal (v1, v2, equal_kind, depth + 1, ht))
+	    if (!internal_equal_1 (v1, v2, equal_kind, depth + 1, ht))
 	      return false;
 	  }
 	return true;
@@ -2985,6 +2985,13 @@ internal_equal (Lisp_Object o1, Lisp_Object o2, enum equal_kind equal_kind,
   return false;
 }
 
+static bool
+internal_equal (Lisp_Object o1, Lisp_Object o2, enum equal_kind equal_kind,
+		int depth, Lisp_Object ht)
+{
+  return internal_equal_1 (o1, o2, equal_kind, depth, &ht);
+}
+
 /* Return -1/0/1 for the </=/> lexicographic relation between bool-vectors.  */
 static int
 bool_vector_cmp (Lisp_Object a, Lisp_Object b)
-- 
2.43.0.windows.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* bug#73883: [PATCH] Fix 'equal' freezing on large cyclic objects
  2024-10-19 10:53 bug#73883: [PATCH] Fix internal_equal so that it uses at most one hash table Ethan Kong
@ 2024-10-20 11:47 ` Ethan Kong
  2024-10-20 11:55 ` bug#73883: [PATCH] Fix internal_equal so that it uses at most one hash table Stefan Kangas
  1 sibling, 0 replies; 3+ messages in thread
From: Ethan Kong @ 2024-10-20 11:47 UTC (permalink / raw)
  To: 73883


[-- Attachment #1.1: Type: text/plain, Size: 3436 bytes --]

I realize the previous title might have been misleading, so I'm replying 
with a
more suitable one. The summary line of the patch is also updated.

On 2024/10/19 18:53, Ethan Kong wrote:
> Tags: patch
>
> Hi,
>
> When I use 'equal' on large, cyclic objects, emacs freezes.
>
> Reproduce:
>
>     ;; emacs -Q
>     (require 'org-element)
>
>     ;; get two identical `org-element' trees of this file
>     ;; 
> https://github.com/emacs-mirror/emacs/blob/master/doc/misc/modus-themes.org
>     (let ((file (expand-file-name "doc/misc/modus-themes.org 
> <http://modus-themes.org>"
>                                   source-directory)))
>       (with-current-buffer (find-file-noselect file)
>         (setq org-o1 (org-element-parse-buffer))
>         (org-element-cache-reset)
>         (setq org-o2 (org-element-parse-buffer))))
>
>     ;; the test
>     (equal org-o1 org-o2)
>
> This equal call freezes emacs. And the memory usage of emacs grows
> quickly, until I quit. Reproduced with emacs 29.4 and the master branch.
>
>
> The cause:
>
> Current code on the master branch:
>
>     static bool
>     internal_equal (Lisp_Object o1, Lisp_Object o2, enum equal_kind 
> equal_kind,
>                     int depth, Lisp_Object ht)
>     {
>      tail_recurse:
>       if (depth > 10)
>         {
>           // ...
>           if (NILP (ht))
>             ht = CALLN (Fmake_hash_table, QCtest, Qeq);
>           // ...
>         }
>       // ...
>             if (! internal_equal (XCAR (o1), XCAR (o2),
>                                   equal_kind, depth + 1, ht))
>               return false;
>             // ...
>     }
>
> When internal_equal calls itself, it passes the hash table argument 'ht'
> by value. As a result, each internal_equal(depth=11) call initializes
> its own hash table, separately recording the objects it encounters in
> further recursive calls. This leads to excessive 'hash_put' and
> significant memory allocation.
>
>
> Fix:
>
> Make internal_equal pass the hash table by pointer (see the patch).
>
> With this patch, the test above returns quickly:
>
>     (benchmark-run 100 (equal org-o1 org-o2))
>
> (2.562444 20 0.30933900000000014)
>
> This will also improve the performance for smaller cyclic objects:
>
>     ;; emacs -Q
>     (defun make-cyclic (n m)
>       (let* ((l1 (make-list n 1))
>              (l2 (make-list m 2)))
>         (setf (nth (1- m) l2) l1)
>         (setf (nth (1- n) l1) l2)
>         l1))
>
>     (let ((a (make-cyclic 100 7))
>           (b (make-cyclic 100 7)))
>       (cl-assert (equal a b))
>       (benchmark-run 10000 (equal a b)))
>
> Current:
> (0.530081 32 0.49294199999999977)
>
> With the patch:
> (0.036401 1 0.0173350000000001)
>
> And it passes all the tests in 'make fns-tests'.
>
>
> P.S. Could I get the copyright assignment paperwork please?
>
> Best,
> Ethan Kong
>
>
> In GNU Emacs 29.4 (build 2, x86_64-w64-mingw32) of 2024-07-05 built on
>  AVALON
> Windowing system distributor 'Microsoft Corp.', version 10.0.22631
> System Description: Microsoft Windows 10 Pro (v10.0.2009.22631.4317)
>
> Configured using:
>  'configure --with-modules --without-dbus --with-native-compilation=aot
>  --without-compress-install --with-sqlite3 --with-tree-sitter
>  CFLAGS=-O2'

[-- Attachment #1.2: Type: text/html, Size: 5274 bytes --]

[-- Attachment #2: 0001-Fix-equal-freezing-on-large-cyclic-objects.patch --]
[-- Type: text/plain, Size: 2817 bytes --]

From 9fbb76eaf0b53dc3c729d87597063e61237175d7 Mon Sep 17 00:00:00 2001
From: Ethan Kong <ek.ethan.kong@gmail.com>
Date: Sat, 19 Oct 2024 12:43:27 +0800
Subject: [PATCH] Fix 'equal' freezing on large cyclic objects

* src/fns.c (internal_equal): Delegate to internal_equal_1.
(internal_equal_1): New function. Pass the hash table argument by
pointer instead of by value.
---
 src/fns.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/src/fns.c b/src/fns.c
index 2de04d06519..ef6922c137b 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -2823,8 +2823,8 @@ equal_no_quit (Lisp_Object o1, Lisp_Object o2)
    if EQUAL_KIND == EQUAL_NO_QUIT.  */
 
 static bool
-internal_equal (Lisp_Object o1, Lisp_Object o2, enum equal_kind equal_kind,
-		int depth, Lisp_Object ht)
+internal_equal_1 (Lisp_Object o1, Lisp_Object o2, enum equal_kind equal_kind,
+		  int depth, Lisp_Object *ht)
 {
  tail_recurse:
   if (depth > 10)
@@ -2832,13 +2832,13 @@ internal_equal (Lisp_Object o1, Lisp_Object o2, enum equal_kind equal_kind,
       eassert (equal_kind != EQUAL_NO_QUIT);
       if (depth > 200)
 	error ("Stack overflow in equal");
-      if (NILP (ht))
-	ht = CALLN (Fmake_hash_table, QCtest, Qeq);
+      if (NILP (*ht))
+	*ht = CALLN (Fmake_hash_table, QCtest, Qeq);
       switch (XTYPE (o1))
 	{
 	case Lisp_Cons: case Lisp_Vectorlike:
 	  {
-	    struct Lisp_Hash_Table *h = XHASH_TABLE (ht);
+	    struct Lisp_Hash_Table *h = XHASH_TABLE (*ht);
 	    hash_hash_t hash = hash_from_key (h, o1);
 	    ptrdiff_t i = hash_lookup_with_hash (h, o1, hash);
 	    if (i >= 0)
@@ -2888,8 +2888,8 @@ internal_equal (Lisp_Object o1, Lisp_Object o2, enum equal_kind equal_kind,
 	  {
 	    if (! CONSP (o2))
 	      return false;
-	    if (! internal_equal (XCAR (o1), XCAR (o2),
-				  equal_kind, depth + 1, ht))
+	    if (! internal_equal_1 (XCAR (o1), XCAR (o2),
+				    equal_kind, depth + 1, ht))
 	      return false;
 	    o2 = XCDR (o2);
 	    if (EQ (XCDR (o1), o2))
@@ -2964,7 +2964,7 @@ internal_equal (Lisp_Object o1, Lisp_Object o2, enum equal_kind equal_kind,
 	    Lisp_Object v1, v2;
 	    v1 = AREF (o1, i);
 	    v2 = AREF (o2, i);
-	    if (!internal_equal (v1, v2, equal_kind, depth + 1, ht))
+	    if (!internal_equal_1 (v1, v2, equal_kind, depth + 1, ht))
 	      return false;
 	  }
 	return true;
@@ -2985,6 +2985,13 @@ internal_equal (Lisp_Object o1, Lisp_Object o2, enum equal_kind equal_kind,
   return false;
 }
 
+static bool
+internal_equal (Lisp_Object o1, Lisp_Object o2, enum equal_kind equal_kind,
+		int depth, Lisp_Object ht)
+{
+  return internal_equal_1 (o1, o2, equal_kind, depth, &ht);
+}
+
 /* Return -1/0/1 for the </=/> lexicographic relation between bool-vectors.  */
 static int
 bool_vector_cmp (Lisp_Object a, Lisp_Object b)
-- 
2.43.0.windows.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* bug#73883: [PATCH] Fix internal_equal so that it uses at most one hash table
  2024-10-19 10:53 bug#73883: [PATCH] Fix internal_equal so that it uses at most one hash table Ethan Kong
  2024-10-20 11:47 ` bug#73883: [PATCH] Fix 'equal' freezing on large cyclic objects Ethan Kong
@ 2024-10-20 11:55 ` Stefan Kangas
  1 sibling, 0 replies; 3+ messages in thread
From: Stefan Kangas @ 2024-10-20 11:55 UTC (permalink / raw)
  To: Ethan Kong, 73883

Ethan Kong <ek.ethan.kong@gmail.com> writes:

> P.S. Could I get the copyright assignment paperwork please?

Form sent off-list.





^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-10-20 11:55 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-10-19 10:53 bug#73883: [PATCH] Fix internal_equal so that it uses at most one hash table Ethan Kong
2024-10-20 11:47 ` bug#73883: [PATCH] Fix 'equal' freezing on large cyclic objects Ethan Kong
2024-10-20 11:55 ` bug#73883: [PATCH] Fix internal_equal so that it uses at most one hash table Stefan Kangas

Code repositories for project(s) associated with this external index

	https://git.savannah.gnu.org/cgit/emacs.git
	https://git.savannah.gnu.org/cgit/emacs/org-mode.git

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.