unofficial mirror of notmuch@notmuchmail.org
 help / color / mirror / code / Atom feed
* v1: fix for threading of replies to ghost messages
@ 2018-07-20 23:37 David Bremner
  2018-07-20 23:37 ` [PATCH 1/9] util: add DEBUG_PRINTF, rename error_util.h -> debug_print.h David Bremner
                   ` (9 more replies)
  0 siblings, 10 replies; 13+ messages in thread
From: David Bremner @ 2018-07-20 23:37 UTC (permalink / raw)
  To: notmuch

See the thread at id:87efgmmysi.fsf@len.workgroup for discussion.

This series finally bites the bullet and uses the References header
for threading.

Please test this series, there are lots of tricky cases with threading.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 1/9] util: add DEBUG_PRINTF, rename error_util.h -> debug_print.h
  2018-07-20 23:37 v1: fix for threading of replies to ghost messages David Bremner
@ 2018-07-20 23:37 ` David Bremner
  2018-07-20 23:37 ` [PATCH 2/9] test: start threading test corpus David Bremner
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 13+ messages in thread
From: David Bremner @ 2018-07-20 23:37 UTC (permalink / raw)
  To: notmuch

It seemed silly to have two headers that were almost copies.
The new macro is intended for debugging, and probably should not be
left in released code.
---
 command-line-arguments.c             |  2 +-
 lib/notmuch-private.h                |  2 +-
 util/Makefile.local                  |  4 +--
 util/debug_print.c                   | 26 ++++++++++++++++++
 util/{error_util.h => debug_print.h} | 22 ++++++++++++---
 util/error_util.c                    | 40 ----------------------------
 util/hex-escape.c                    |  2 +-
 util/util.c                          |  2 +-
 util/xutil.c                         |  2 +-
 9 files changed, 51 insertions(+), 51 deletions(-)
 create mode 100644 util/debug_print.c
 rename util/{error_util.h => debug_print.h} (75%)
 delete mode 100644 util/error_util.c

diff --git a/command-line-arguments.c b/command-line-arguments.c
index d64aa85b..90d69453 100644
--- a/command-line-arguments.c
+++ b/command-line-arguments.c
@@ -1,7 +1,7 @@
 #include <assert.h>
 #include <string.h>
 #include <stdio.h>
-#include "error_util.h"
+#include "debug_print.h"
 #include "command-line-arguments.h"
 
 typedef enum {
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 3764a6a9..499d73d4 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -50,7 +50,7 @@ NOTMUCH_BEGIN_DECLS
 #include "gmime-extra.h"
 
 #include "xutil.h"
-#include "error_util.h"
+#include "debug_print.h"
 #include "string-util.h"
 #include "crypto.h"
 
diff --git a/util/Makefile.local b/util/Makefile.local
index ba03230e..ccf1bd79 100644
--- a/util/Makefile.local
+++ b/util/Makefile.local
@@ -3,9 +3,9 @@
 dir := util
 extra_cflags += -I$(srcdir)/$(dir)
 
-libnotmuch_util_c_srcs := $(dir)/xutil.c $(dir)/error_util.c $(dir)/hex-escape.c \
+libnotmuch_util_c_srcs := $(dir)/xutil.c $(dir)/hex-escape.c \
 		  $(dir)/string-util.c $(dir)/talloc-extra.c $(dir)/zlib-extra.c \
-		$(dir)/util.c $(dir)/gmime-extra.c $(dir)/crypto.c
+		$(dir)/util.c $(dir)/gmime-extra.c $(dir)/crypto.c $(dir)/debug_print.c
 
 libnotmuch_util_modules := $(libnotmuch_util_c_srcs:.c=.o)
 
diff --git a/util/debug_print.c b/util/debug_print.c
new file mode 100644
index 00000000..9f00f69b
--- /dev/null
+++ b/util/debug_print.c
@@ -0,0 +1,26 @@
+#include "debug_print.h"
+
+void
+_debug_printf (const char *format, ...)
+{
+    va_list va_args;
+
+    va_start (va_args, format);
+    vfprintf (stderr, format, va_args);
+    va_end (va_args);
+    
+}
+
+void
+_internal_error (const char *format, ...)
+{
+    va_list va_args;
+
+    va_start (va_args, format);
+
+    fprintf (stderr, "Internal error: ");
+    vfprintf (stderr, format, va_args);
+
+    va_end (va_args);
+    exit (1);
+}
diff --git a/util/error_util.h b/util/debug_print.h
similarity index 75%
rename from util/error_util.h
rename to util/debug_print.h
index 4bb338a2..4a096945 100644
--- a/util/error_util.h
+++ b/util/debug_print.h
@@ -1,6 +1,7 @@
-/* error_util.h - Provide the INTERNAL_ERROR macro
+/* debug_print.h - Provide the INTERNAL_ERROR and DEBUG_PRINTF macros
  *
  * Copyright © 2009 Carl Worth
+ * Copyright © 2018 David Bremner
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -18,13 +19,26 @@
  * Author: Carl Worth <cworth@cworth.org>
  */
 
-#ifndef ERROR_UTIL_H
-#define ERROR_UTIL_H
+#ifndef DEBUG_PRINT_H
+#define DEBUG_PRINT_H
 
 #include <talloc.h>
-
 #include "function-attributes.h"
 
+void
+_debug_printf (const char *format, ...) PRINTF_ATTRIBUTE (1, 2);
+
+/*
+ * Provide a quick way to silence debugging output.
+ */
+
+#ifdef DEBUG_PRINT
+#define DEBUG_PRINTF(format, ...) _debug_printf(format " (%s).\n",	\
+						##__VA_ARGS__, __location__)
+#else
+#define DEBUG_PRINTF(format, ...) /* ignored */
+#endif
+
 /* There's no point in continuing when we've detected that we've done
  * something wrong internally (as opposed to the user passing in a
  * bogus value).
diff --git a/util/error_util.c b/util/error_util.c
deleted file mode 100644
index e64162c7..00000000
--- a/util/error_util.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/* error_util.c - internal error utilities for notmuch.
- *
- * Copyright © 2009 Carl Worth
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see https://www.gnu.org/licenses/ .
- *
- * Author: Carl Worth <cworth@cworth.org>
- */
-
-#include <stdlib.h>
-#include <stdarg.h>
-#include <stdio.h>
-
-#include "error_util.h"
-
-void
-_internal_error (const char *format, ...)
-{
-    va_list va_args;
-
-    va_start (va_args, format);
-
-    fprintf (stderr, "Internal error: ");
-    vfprintf (stderr, format, va_args);
-
-    va_end (va_args);
-    exit (1);
-}
-
diff --git a/util/hex-escape.c b/util/hex-escape.c
index 8883ff90..ff20c702 100644
--- a/util/hex-escape.c
+++ b/util/hex-escape.c
@@ -22,7 +22,7 @@
 #include <string.h>
 #include <talloc.h>
 #include <ctype.h>
-#include "error_util.h"
+#include "debug_print.h"
 #include "hex-escape.h"
 
 static const char *output_charset =
diff --git a/util/util.c b/util/util.c
index 06659b35..75df0ead 100644
--- a/util/util.c
+++ b/util/util.c
@@ -1,5 +1,5 @@
 #include "util.h"
-#include "error_util.h"
+#include "debug_print.h"
 #include <string.h>
 #include <errno.h>
 
diff --git a/util/xutil.c b/util/xutil.c
index f211eaaa..11042c9e 100644
--- a/util/xutil.c
+++ b/util/xutil.c
@@ -22,7 +22,7 @@
 #include <string.h>
 
 #include "xutil.h"
-#include "error_util.h"
+#include "debug_print.h"
 
 void *
 xcalloc (size_t nmemb, size_t size)
-- 
2.18.0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/9] test: start threading test corpus
  2018-07-20 23:37 v1: fix for threading of replies to ghost messages David Bremner
  2018-07-20 23:37 ` [PATCH 1/9] util: add DEBUG_PRINTF, rename error_util.h -> debug_print.h David Bremner
@ 2018-07-20 23:37 ` David Bremner
  2018-07-20 23:37 ` [PATCH 3/9] test: add known broken tests for "ghost roots" David Bremner
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 13+ messages in thread
From: David Bremner @ 2018-07-20 23:37 UTC (permalink / raw)
  To: notmuch

The first set of messages in this corpus contains a reply to a
non-existent message, which causes it to be mistakenly classified as a
toplevel message in the thread.
---
 test/corpora/threading/ghost-root/child             | 9 +++++++++
 test/corpora/threading/ghost-root/fake-root         | 9 +++++++++
 test/corpora/threading/ghost-root/grand-child       | 9 +++++++++
 test/corpora/threading/ghost-root/grand-child2      | 9 +++++++++
 test/corpora/threading/ghost-root/great-grand-child | 9 +++++++++
 test/corpora/threading/ghost-root/real-root         | 7 +++++++
 6 files changed, 52 insertions(+)
 create mode 100644 test/corpora/threading/ghost-root/child
 create mode 100644 test/corpora/threading/ghost-root/fake-root
 create mode 100644 test/corpora/threading/ghost-root/grand-child
 create mode 100644 test/corpora/threading/ghost-root/grand-child2
 create mode 100644 test/corpora/threading/ghost-root/great-grand-child
 create mode 100644 test/corpora/threading/ghost-root/real-root

diff --git a/test/corpora/threading/ghost-root/child b/test/corpora/threading/ghost-root/child
new file mode 100644
index 00000000..4c36af95
--- /dev/null
+++ b/test/corpora/threading/ghost-root/child
@@ -0,0 +1,9 @@
+From: Alice <alice@example.org>
+To: Daniel <daniel@example.org>
+Subject: child message
+Message-ID: <001-child@example.org>
+In-Reply-To: <000-real-root@example.org>
+References:  <000-real-root@example.org>
+Date: Fri, 17 Jun 2016 22:14:41 -0400
+
+
diff --git a/test/corpora/threading/ghost-root/fake-root b/test/corpora/threading/ghost-root/fake-root
new file mode 100644
index 00000000..102bb228
--- /dev/null
+++ b/test/corpora/threading/ghost-root/fake-root
@@ -0,0 +1,9 @@
+From: Mallory <mallory@example.org>
+To: Daniel <daniel@example.org>
+Subject: fake root message
+Message-ID: <001-fake-message-root@example.org>
+In-Reply-to: <nonexistent-message@example.org>
+References: <000-real-root@example.org> <001-child@example.org> <nonexistent-message@example.org>
+Date: Thu, 16 Jun 2016 22:14:41 -0400
+
+This message has no reply-to
diff --git a/test/corpora/threading/ghost-root/grand-child b/test/corpora/threading/ghost-root/grand-child
new file mode 100644
index 00000000..5f77ac36
--- /dev/null
+++ b/test/corpora/threading/ghost-root/grand-child
@@ -0,0 +1,9 @@
+From: Alice <alice@example.org>
+To: Daniel <daniel@example.org>
+Subject: grand-child message
+Message-ID: <001-grand-child@example.org>
+In-Reply-To: <001-child@example.org>
+References:  <000-real-root@example.org> <001-child@example.org>
+Date: Fri, 17 Jun 2016 22:24:41 -0400
+
+
diff --git a/test/corpora/threading/ghost-root/grand-child2 b/test/corpora/threading/ghost-root/grand-child2
new file mode 100644
index 00000000..59682a95
--- /dev/null
+++ b/test/corpora/threading/ghost-root/grand-child2
@@ -0,0 +1,9 @@
+From: Daniel <daniel@example.org>
+To: Alice <alice@example.org>
+Subject: grand-child message 2
+Message-ID: <001-grand-child2@example.org>
+In-Reply-To: <001-child@example.org>
+References:  <000-real-root@example.org> <001-child@example.org>
+Date: Fri, 17 Jun 2016 22:34:41 -0400
+
+
diff --git a/test/corpora/threading/ghost-root/great-grand-child b/test/corpora/threading/ghost-root/great-grand-child
new file mode 100644
index 00000000..287a8954
--- /dev/null
+++ b/test/corpora/threading/ghost-root/great-grand-child
@@ -0,0 +1,9 @@
+From: Alice <alice@example.org>
+To: Daniel <daniel@example.org>
+Subject: great grand-child message
+Message-ID: <001-great-grand-child@example.org>
+In-Reply-To: <001-grand-child@example.org>
+References:  <000-real-root@example.org> <001-grand-child@example.org>
+Date: Fri, 17 Jun 2016 22:44:41 -0400
+
+
diff --git a/test/corpora/threading/ghost-root/real-root b/test/corpora/threading/ghost-root/real-root
new file mode 100644
index 00000000..97c9a6c4
--- /dev/null
+++ b/test/corpora/threading/ghost-root/real-root
@@ -0,0 +1,7 @@
+From: Alice <alice@example.org>
+To: Daniel <daniel@example.org>
+Subject: root message
+Message-ID: <000-real-root@example.org>
+Date: Thu, 16 Jun 2016 22:14:41 -0400
+
+This message has no reply-to
-- 
2.18.0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 3/9] test: add known broken tests for "ghost roots"
  2018-07-20 23:37 v1: fix for threading of replies to ghost messages David Bremner
  2018-07-20 23:37 ` [PATCH 1/9] util: add DEBUG_PRINTF, rename error_util.h -> debug_print.h David Bremner
  2018-07-20 23:37 ` [PATCH 2/9] test: start threading test corpus David Bremner
@ 2018-07-20 23:37 ` David Bremner
  2018-07-20 23:37 ` [PATCH 4/9] lib: read reference terms into message struct David Bremner
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 13+ messages in thread
From: David Bremner @ 2018-07-20 23:37 UTC (permalink / raw)
  To: notmuch

This documents the bug discussed at

     id:87efgmmysi.fsf@len.workgroup

The underlying issue is that the reply to a ghost (missing) message is
falsely classified as a root message in _resolve_thread_relationships.

The first test is simpler / more robust, but also easier to fool.
---
 test/T260-thread-order.sh                   | 27 +++++++++++++++++++++
 test/corpora/threading/ghost-root/fake-root |  2 +-
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/test/T260-thread-order.sh b/test/T260-thread-order.sh
index fea61275..ce8636b9 100755
--- a/test/T260-thread-order.sh
+++ b/test/T260-thread-order.sh
@@ -75,4 +75,31 @@ $(for ((i = 0; i < $nthreads; i++)); do
 done
 test_expect_equal "$output" "$expected"
 
+add_email_corpus threading
+
+test_begin_subtest "reply to ghost"
+test_subtest_known_broken
+notmuch show --entire-thread=true id:000-real-root@example.org | grep ^Subject: | head -1  > OUTPUT
+cat <<EOF > EXPECTED
+Subject: root message
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "reply to ghost (tree view)"
+test_subtest_known_broken
+test_emacs '(notmuch-tree "tag:inbox")
+	    (notmuch-test-wait)
+	    (test-output)
+	    (delete-other-windows)'
+cat <<EOF > EXPECTED
+  2016-06-17  Alice                 ┬►root message                                        (inbox unread)
+  2016-06-18  Alice                 ╰┬►child message                                      (inbox unread)
+  2016-06-18  Alice                  ├┬►grand-child message                               (inbox unread)
+  2016-06-18  Alice                  │╰─►great grand-child message                        (inbox unread)
+  2016-06-18  Daniel                 ├─►grand-child message 2                             (inbox unread)
+  2016-06-17  Mallory                ╰─►fake root message                                 (inbox unread)
+End of search results.
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
 test_done
diff --git a/test/corpora/threading/ghost-root/fake-root b/test/corpora/threading/ghost-root/fake-root
index 102bb228..5be228fd 100644
--- a/test/corpora/threading/ghost-root/fake-root
+++ b/test/corpora/threading/ghost-root/fake-root
@@ -6,4 +6,4 @@ In-Reply-to: <nonexistent-message@example.org>
 References: <000-real-root@example.org> <001-child@example.org> <nonexistent-message@example.org>
 Date: Thu, 16 Jun 2016 22:14:41 -0400
 
-This message has no reply-to
+This message has a reply-to to a non-existent message
-- 
2.18.0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 4/9] lib: read reference terms into message struct.
  2018-07-20 23:37 v1: fix for threading of replies to ghost messages David Bremner
                   ` (2 preceding siblings ...)
  2018-07-20 23:37 ` [PATCH 3/9] test: add known broken tests for "ghost roots" David Bremner
@ 2018-07-20 23:37 ` David Bremner
  2018-07-20 23:37 ` [PATCH 5/9] lib/thread: refactor in-reply-to test David Bremner
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 13+ messages in thread
From: David Bremner @ 2018-07-20 23:37 UTC (permalink / raw)
  To: notmuch

The plan is to use these in resolving threads.
---
 lib/message.cc        | 17 +++++++++++++++++
 lib/notmuch-private.h |  3 +++
 2 files changed, 20 insertions(+)

diff --git a/lib/message.cc b/lib/message.cc
index 153e4bed..790d26f5 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -41,6 +41,7 @@ struct _notmuch_message {
     notmuch_message_file_t *message_file;
     notmuch_string_list_t *property_term_list;
     notmuch_string_map_t *property_map;
+    notmuch_string_list_t *reference_list;
     notmuch_message_list_t *replies;
     unsigned long flags;
     /* For flags that are initialized on-demand, lazy_flags indicates
@@ -129,6 +130,7 @@ _notmuch_message_create_for_document (const void *talloc_owner,
     message->author = NULL;
     message->property_term_list = NULL;
     message->property_map = NULL;
+    message->reference_list = NULL;
 
     message->replies = _notmuch_message_list_create (message);
     if (unlikely (message->replies == NULL)) {
@@ -349,6 +351,7 @@ _notmuch_message_ensure_metadata (notmuch_message_t *message, void *field)
 	*type_prefix = _find_prefix ("type"),
 	*filename_prefix = _find_prefix ("file-direntry"),
 	*property_prefix = _find_prefix ("property"),
+	*reference_prefix = _find_prefix ("reference"),
 	*replyto_prefix = _find_prefix ("replyto");
 
     /* We do this all in a single pass because Xapian decompresses the
@@ -413,6 +416,14 @@ _notmuch_message_ensure_metadata (notmuch_message_t *message, void *field)
 		    _notmuch_database_get_terms_with_prefix (message, i, end,
 							 property_prefix);
 
+	    /* get references */
+	    assert (strcmp (property_prefix, reference_prefix) < 0);
+	    if (!message->reference_list) {
+		message->reference_list =
+		    _notmuch_database_get_terms_with_prefix (message, i, end,
+							     reference_prefix);
+	    }
+
 	    /* Get reply to */
 	    assert (strcmp (property_prefix, replyto_prefix) < 0);
 	    if (!message->in_reply_to)
@@ -588,6 +599,12 @@ _notmuch_message_add_reply (notmuch_message_t *message,
     _notmuch_message_list_add_message (message->replies, reply);
 }
 
+const notmuch_string_list_t *
+_notmuch_message_get_references (notmuch_message_t *message)
+{
+    return message->reference_list;
+}
+
 notmuch_messages_t *
 notmuch_message_get_replies (notmuch_message_t *message)
 {
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 499d73d4..e7bbc156 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -580,6 +580,9 @@ _notmuch_string_list_append (notmuch_string_list_t *list,
 void
 _notmuch_string_list_sort (notmuch_string_list_t *list);
 
+const notmuch_string_list_t *
+_notmuch_message_get_references(notmuch_message_t *message);
+
 /* string-map.c */
 typedef struct _notmuch_string_map  notmuch_string_map_t;
 typedef struct _notmuch_string_map_iterator notmuch_string_map_iterator_t;
-- 
2.18.0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 5/9] lib/thread: refactor in-reply-to test.
  2018-07-20 23:37 v1: fix for threading of replies to ghost messages David Bremner
                   ` (3 preceding siblings ...)
  2018-07-20 23:37 ` [PATCH 4/9] lib: read reference terms into message struct David Bremner
@ 2018-07-20 23:37 ` David Bremner
  2018-07-20 23:37 ` [PATCH 6/9] lib: initial fix for "ghost replyto" David Bremner
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 13+ messages in thread
From: David Bremner @ 2018-07-20 23:37 UTC (permalink / raw)
  To: notmuch

This is not a complete win in code-size, but it makes the code (which
is about to get more complicated) easier to follow
---
 lib/thread.cc | 40 +++++++++++++++++++++++-----------------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/lib/thread.cc b/lib/thread.cc
index e961c76b..93508359 100644
--- a/lib/thread.cc
+++ b/lib/thread.cc
@@ -387,12 +387,30 @@ _thread_add_matched_message (notmuch_thread_t *thread,
     _thread_add_matched_author (thread, _notmuch_message_get_author (hashed_message));
 }
 
+static bool
+_parent_via_in_reply_to (notmuch_thread_t *thread, notmuch_message_t *message) {
+    notmuch_message_t *parent;
+    const char *in_reply_to;
+
+    in_reply_to = _notmuch_message_get_in_reply_to (message);
+    DEBUG_PRINTF("checking in_reply_to=%s\n", in_reply_to);
+
+    if (in_reply_to && strlen (in_reply_to) &&
+	g_hash_table_lookup_extended (thread->message_hash,
+				      in_reply_to, NULL,
+				      (void **) &parent)) {
+	_notmuch_message_add_reply (parent, message);
+	return true;
+    } else {
+	return false;
+    }
+}
+
 static void
 _resolve_thread_relationships (notmuch_thread_t *thread)
 {
     notmuch_message_node_t *node, *first_node;
-    notmuch_message_t *message, *parent;
-    const char *in_reply_to;
+    notmuch_message_t *message;
 
     first_node = thread->message_list->head;
     if (! first_node)
@@ -400,13 +418,7 @@ _resolve_thread_relationships (notmuch_thread_t *thread)
 
     for (node = first_node->next; node; node = node->next) {
 	message = node->message;
-	in_reply_to = _notmuch_message_get_in_reply_to (message);
-	if (in_reply_to && strlen (in_reply_to) &&
-	    g_hash_table_lookup_extended (thread->message_hash,
-					  in_reply_to, NULL,
-					  (void **) &parent))
-	    _notmuch_message_add_reply (parent, message);
-	else
+	if (! _parent_via_in_reply_to (thread, message))
 	    _notmuch_message_list_add_message (thread->toplevel_list, message);
     }
 
@@ -418,14 +430,8 @@ _resolve_thread_relationships (notmuch_thread_t *thread)
      */
     if (first_node) {
 	message = first_node->message;
-	in_reply_to = _notmuch_message_get_in_reply_to (message);
-	if (thread->toplevel_list->head &&
-	    in_reply_to && strlen (in_reply_to) &&
-	    g_hash_table_lookup_extended (thread->message_hash,
-					  in_reply_to, NULL,
-					  (void **) &parent))
-	    _notmuch_message_add_reply (parent, message);
-	else
+	if (! thread->toplevel_list->head ||
+	    ! _parent_via_in_reply_to (thread, message))
 	    _notmuch_message_list_add_message (thread->toplevel_list, message);
     }
 
-- 
2.18.0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 6/9] lib: initial fix for "ghost replyto"
  2018-07-20 23:37 v1: fix for threading of replies to ghost messages David Bremner
                   ` (4 preceding siblings ...)
  2018-07-20 23:37 ` [PATCH 5/9] lib/thread: refactor in-reply-to test David Bremner
@ 2018-07-20 23:37 ` David Bremner
  2018-07-20 23:37 ` [PATCH 7/9] lib: calculate message depth in thread David Bremner
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 13+ messages in thread
From: David Bremner @ 2018-07-20 23:37 UTC (permalink / raw)
  To: notmuch

This fixes the failing test of _resolve_thread_relationships
introduced above, but will probably place messages in-reply-to
ghosts (i.e. in-reply-to missing messages) too far up in the thread
in more complicated examples. In particular it does not follow the
suggestion in the XXX: comment to choose the deepest parent.
---
 lib/thread.cc             | 53 ++++++++++++++++++++++++++++++++++-----
 test/T260-thread-order.sh |  1 -
 2 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/lib/thread.cc b/lib/thread.cc
index 93508359..417235ea 100644
--- a/lib/thread.cc
+++ b/lib/thread.cc
@@ -406,22 +406,52 @@ _parent_via_in_reply_to (notmuch_thread_t *thread, notmuch_message_t *message) {
     }
 }
 
+static void
+_parent_or_toplevel (notmuch_thread_t *thread, notmuch_message_t *message)
+{
+    bool found = false;
+    notmuch_message_t *parent = NULL;
+    const notmuch_string_list_t *references =
+	_notmuch_message_get_references (message);
+    for (notmuch_string_node_t *ref_node = references->head;
+	 ! found && ref_node; ref_node = ref_node->next) {
+	if ((found = g_hash_table_lookup_extended (thread->message_hash,
+						   ref_node->string, NULL,
+						   (void **) &parent))) {
+	    _notmuch_message_add_reply (parent, message);
+	}
+    }
+    if (! found)
+	_notmuch_message_list_add_message (thread->toplevel_list, message);
+}
+
 static void
 _resolve_thread_relationships (notmuch_thread_t *thread)
 {
     notmuch_message_node_t *node, *first_node;
     notmuch_message_t *message;
+    void *local;
+    notmuch_message_list_t *maybe_toplevel_list;
 
     first_node = thread->message_list->head;
     if (! first_node)
 	return;
 
+    local = talloc_new (thread);
+    maybe_toplevel_list = _notmuch_message_list_create (local);
+
     for (node = first_node->next; node; node = node->next) {
 	message = node->message;
 	if (! _parent_via_in_reply_to (thread, message))
-	    _notmuch_message_list_add_message (thread->toplevel_list, message);
+	    _notmuch_message_list_add_message (maybe_toplevel_list, message);
     }
 
+    for (notmuch_messages_t *roots = _notmuch_messages_create (maybe_toplevel_list);
+	 notmuch_messages_valid (roots);
+	 notmuch_messages_move_to_next (roots)) {
+	notmuch_message_t *message = notmuch_messages_get (roots);
+	_parent_or_toplevel (thread, message);
+    }
     /*
      * if we reach the end of the list without finding a top-level
      * message, that means the thread is a cycle (or set of cycles)
@@ -431,20 +461,31 @@ _resolve_thread_relationships (notmuch_thread_t *thread)
     if (first_node) {
 	message = first_node->message;
 	if (! thread->toplevel_list->head ||
-	    ! _parent_via_in_reply_to (thread, message))
-	    _notmuch_message_list_add_message (thread->toplevel_list, message);
+	    ! _parent_via_in_reply_to (thread, message)) {
+	    /*
+	     * If the oldest message happens to be in-reply-to a
+	     * missing message, we only check for references if there
+	     * is some other candidate for root message.
+	     */
+	    if (thread->toplevel_list->head)
+		_parent_or_toplevel (thread, message);
+	    else
+		_notmuch_message_list_add_message (thread->toplevel_list, message);
+	}
     }
 
     /* XXX: After scanning through the entire list looking for parents
      * via "In-Reply-To", we should do a second pass that looks at the
-     * list of messages IDs in the "References" header instead. (And
-     * for this the parent would be the "deepest" message of all the
-     * messages found in the "References" list.)
+     * list of messages IDs in the "References" header instead.
+     * Unlike the current quick fix, the parent should be the
+     * "deepest" message of all the messages found in the "References"
+     * list.
      *
      * Doing this will allow messages and sub-threads to be positioned
      * correctly in the thread even when an intermediate message is
      * missing from the thread.
      */
+    talloc_free (local);
 }
 
 /* Create a new notmuch_thread_t object by finding the thread
diff --git a/test/T260-thread-order.sh b/test/T260-thread-order.sh
index ce8636b9..9565f296 100755
--- a/test/T260-thread-order.sh
+++ b/test/T260-thread-order.sh
@@ -78,7 +78,6 @@ test_expect_equal "$output" "$expected"
 add_email_corpus threading
 
 test_begin_subtest "reply to ghost"
-test_subtest_known_broken
 notmuch show --entire-thread=true id:000-real-root@example.org | grep ^Subject: | head -1  > OUTPUT
 cat <<EOF > EXPECTED
 Subject: root message
-- 
2.18.0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 7/9] lib: calculate message depth in thread
  2018-07-20 23:37 v1: fix for threading of replies to ghost messages David Bremner
                   ` (5 preceding siblings ...)
  2018-07-20 23:37 ` [PATCH 6/9] lib: initial fix for "ghost replyto" David Bremner
@ 2018-07-20 23:37 ` David Bremner
  2018-07-20 23:37 ` [PATCH 8/9] lib/thread: rewrite _parent_or_toplevel to use depths David Bremner
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 13+ messages in thread
From: David Bremner @ 2018-07-20 23:37 UTC (permalink / raw)
  To: notmuch

This will be used in reparenting messages without useful in-reply-to,
but with useful references
---
 lib/message.cc        | 23 +++++++++++++++++++++++
 lib/notmuch-private.h |  5 +++++
 2 files changed, 28 insertions(+)

diff --git a/lib/message.cc b/lib/message.cc
index 790d26f5..1567f14b 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -32,6 +32,7 @@ struct _notmuch_message {
     int frozen;
     char *message_id;
     char *thread_id;
+    size_t thread_depth;
     char *in_reply_to;
     notmuch_string_list_t *tag_list;
     notmuch_string_list_t *filename_term_list;
@@ -118,6 +119,9 @@ _notmuch_message_create_for_document (const void *talloc_owner,
     /* the message is initially not synchronized with Xapian */
     message->last_view = 0;
 
+    /* Calculated after the thread structure is computed */
+    message->thread_depth = 0;
+
     /* Each of these will be lazily created as needed. */
     message->message_id = NULL;
     message->thread_id = NULL;
@@ -599,6 +603,25 @@ _notmuch_message_add_reply (notmuch_message_t *message,
     _notmuch_message_list_add_message (message->replies, reply);
 }
 
+size_t
+_notmuch_message_get_thread_depth (notmuch_message_t *message) {
+    return message->thread_depth;
+}
+
+void
+_notmuch_message_label_depths (notmuch_message_t *message,
+			       size_t depth)
+{
+    message->thread_depth = depth;
+
+    for (notmuch_messages_t *messages = _notmuch_messages_create (message->replies);
+	 notmuch_messages_valid (messages);
+	 notmuch_messages_move_to_next (messages)) {
+	notmuch_message_t *child = notmuch_messages_get (messages);
+	_notmuch_message_label_depths (child, depth+1);
+    }
+}
+
 const notmuch_string_list_t *
 _notmuch_message_get_references (notmuch_message_t *message)
 {
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index e7bbc156..385cf7a5 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -539,6 +539,11 @@ _notmuch_message_remove_unprefixed_terms (notmuch_message_t *message);
 const char *
 _notmuch_message_get_thread_id_only(notmuch_message_t *message);
 
+size_t _notmuch_message_get_thread_depth (notmuch_message_t *message);
+
+void
+_notmuch_message_label_depths (notmuch_message_t *message,
+			       size_t depth);
 /* sha1.c */
 
 char *
-- 
2.18.0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 8/9] lib/thread: rewrite _parent_or_toplevel to use depths
  2018-07-20 23:37 v1: fix for threading of replies to ghost messages David Bremner
                   ` (6 preceding siblings ...)
  2018-07-20 23:37 ` [PATCH 7/9] lib: calculate message depth in thread David Bremner
@ 2018-07-20 23:37 ` David Bremner
  2018-07-20 23:37 ` [PATCH 9/9] lib/thread: change _resolve_thread_relationships " David Bremner
  2018-07-21 17:35 ` v1: fix for threading of replies to ghost messages Gregor Zattler
  9 siblings, 0 replies; 13+ messages in thread
From: David Bremner @ 2018-07-20 23:37 UTC (permalink / raw)
  To: notmuch

This is part 1/2 of changing the reparenting of alleged toplevel
messages to use a "deep" reference rather than just the first one
found.
---
 lib/thread.cc | 34 +++++++++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/lib/thread.cc b/lib/thread.cc
index 417235ea..2eed724b 100644
--- a/lib/thread.cc
+++ b/lib/thread.cc
@@ -409,20 +409,40 @@ _parent_via_in_reply_to (notmuch_thread_t *thread, notmuch_message_t *message) {
 static void
 _parent_or_toplevel (notmuch_thread_t *thread, notmuch_message_t *message)
 {
-    bool found = false;
+    size_t max_depth = 0;
+    notmuch_message_t *new_parent;
     notmuch_message_t *parent = NULL;
     const notmuch_string_list_t *references =
 	_notmuch_message_get_references (message);
+
+    DEBUG_PRINTF("trying to reparent via references: %s\n",
+		     notmuch_message_get_message_id (message));
+
     for (notmuch_string_node_t *ref_node = references->head;
-	 ! found && ref_node; ref_node = ref_node->next) {
-	if ((found = g_hash_table_lookup_extended (thread->message_hash,
-						   ref_node->string, NULL,
-						   (void **) &parent))) {
-	    _notmuch_message_add_reply (parent, message);
+	 ref_node; ref_node = ref_node->next) {
+	DEBUG_PRINTF("checking reference=%s\n", ref_node->string);
+	if ((g_hash_table_lookup_extended (thread->message_hash,
+					   ref_node->string, NULL,
+					   (void **) &new_parent))) {
+	    size_t new_depth = _notmuch_message_get_thread_depth (new_parent);
+	    DEBUG_PRINTF("got depth %lu\n", new_depth);
+	    if (new_depth > max_depth || !parent) {
+		DEBUG_PRINTF("adding at depth %lu parent=%s\n", new_depth, ref_node->string);
+		max_depth = new_depth;
+		parent = new_parent;
+	    }
 	}
     }
-    if (! found)
+    if (parent) {
+	DEBUG_PRINTF("adding reply %s to parent=%s\n",
+		 notmuch_message_get_message_id (message),
+		 notmuch_message_get_message_id (parent));
+	_notmuch_message_add_reply (parent, message);
+    } else {
+	DEBUG_PRINTF("adding as toplevel %s\n",
+		 notmuch_message_get_message_id (message));
 	_notmuch_message_list_add_message (thread->toplevel_list, message);
+    }
 }
 
 static void
-- 
2.18.0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 9/9] lib/thread: change _resolve_thread_relationships to use depths
  2018-07-20 23:37 v1: fix for threading of replies to ghost messages David Bremner
                   ` (7 preceding siblings ...)
  2018-07-20 23:37 ` [PATCH 8/9] lib/thread: rewrite _parent_or_toplevel to use depths David Bremner
@ 2018-07-20 23:37 ` David Bremner
  2018-07-21 17:35 ` v1: fix for threading of replies to ghost messages Gregor Zattler
  9 siblings, 0 replies; 13+ messages in thread
From: David Bremner @ 2018-07-20 23:37 UTC (permalink / raw)
  To: notmuch

We (finally) implement the XXX comment. It requires a bit of care not
to reparent all of the possible toplevel messages. There is some
slightly naughty low level access to the list structures here that can
be cleaned up in a later commit.
---
 lib/thread.cc             | 53 +++++++++++++++++++--------------------
 test/T260-thread-order.sh |  1 -
 2 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/lib/thread.cc b/lib/thread.cc
index 2eed724b..6a567caa 100644
--- a/lib/thread.cc
+++ b/lib/thread.cc
@@ -466,12 +466,6 @@ _resolve_thread_relationships (notmuch_thread_t *thread)
 	    _notmuch_message_list_add_message (maybe_toplevel_list, message);
     }
 
-    for (notmuch_messages_t *roots = _notmuch_messages_create (maybe_toplevel_list);
-	 notmuch_messages_valid (roots);
-	 notmuch_messages_move_to_next (roots)) {
-	notmuch_message_t *message = notmuch_messages_get (roots);
-	_parent_or_toplevel (thread, message);
-    }
     /*
      * if we reach the end of the list without finding a top-level
      * message, that means the thread is a cycle (or set of cycles)
@@ -480,31 +474,36 @@ _resolve_thread_relationships (notmuch_thread_t *thread)
      */
     if (first_node) {
 	message = first_node->message;
-	if (! thread->toplevel_list->head ||
+	DEBUG_PRINTF("checking first message  %s\n",
+		     notmuch_message_get_message_id (message));
+
+	if (! maybe_toplevel_list->head ||
 	    ! _parent_via_in_reply_to (thread, message)) {
-	    /*
-	     * If the oldest message happens to be in-reply-to a
-	     * missing message, we only check for references if there
-	     * is some other candidate for root message.
-	     */
-	    if (thread->toplevel_list->head)
-		_parent_or_toplevel (thread, message);
-	    else
-		_notmuch_message_list_add_message (thread->toplevel_list, message);
+	    DEBUG_PRINTF("adding first message as toplevel = %s\n",
+		     notmuch_message_get_message_id (message));
+	    _notmuch_message_list_add_message (maybe_toplevel_list, message);
 	}
     }
 
-    /* XXX: After scanning through the entire list looking for parents
-     * via "In-Reply-To", we should do a second pass that looks at the
-     * list of messages IDs in the "References" header instead.
-     * Unlike the current quick fix, the parent should be the
-     * "deepest" message of all the messages found in the "References"
-     * list.
-     *
-     * Doing this will allow messages and sub-threads to be positioned
-     * correctly in the thread even when an intermediate message is
-     * missing from the thread.
-     */
+    for (notmuch_messages_t *messages = _notmuch_messages_create (maybe_toplevel_list);
+	 notmuch_messages_valid (messages);
+	 notmuch_messages_move_to_next (messages))
+    {
+	notmuch_message_t *message = notmuch_messages_get (messages);
+	_notmuch_message_label_depths (message, 0);
+    }
+
+    for (notmuch_messages_t *roots = _notmuch_messages_create (maybe_toplevel_list);
+	 notmuch_messages_valid (roots);
+	 notmuch_messages_move_to_next (roots)) {
+	notmuch_message_t *message = notmuch_messages_get (roots);
+	/* XXX add _notmuch_messages_count */
+	if (roots->iterator->next || thread->toplevel_list->head)
+	    _parent_or_toplevel (thread, message);
+	else
+	    _notmuch_message_list_add_message (thread->toplevel_list, message);
+    }
+
     talloc_free (local);
 }
 
diff --git a/test/T260-thread-order.sh b/test/T260-thread-order.sh
index 9565f296..0dd985c2 100755
--- a/test/T260-thread-order.sh
+++ b/test/T260-thread-order.sh
@@ -85,7 +85,6 @@ EOF
 test_expect_equal_file EXPECTED OUTPUT
 
 test_begin_subtest "reply to ghost (tree view)"
-test_subtest_known_broken
 test_emacs '(notmuch-tree "tag:inbox")
 	    (notmuch-test-wait)
 	    (test-output)
-- 
2.18.0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: v1: fix for threading of replies to ghost messages
  2018-07-20 23:37 v1: fix for threading of replies to ghost messages David Bremner
                   ` (8 preceding siblings ...)
  2018-07-20 23:37 ` [PATCH 9/9] lib/thread: change _resolve_thread_relationships " David Bremner
@ 2018-07-21 17:35 ` Gregor Zattler
  2018-07-23  8:41   ` David Bremner
  9 siblings, 1 reply; 13+ messages in thread
From: Gregor Zattler @ 2018-07-21 17:35 UTC (permalink / raw)
  To: notmuch

Hi David,
* David Bremner <david@tethera.net> [2018-07-21; 08:37]:
> See the thread at id:87efgmmysi.fsf@len.workgroup for discussion.
>
> This series finally bites the bullet and uses the References header
> for threading.
>
> Please test this series, there are lots of tricky cases with threading.

I applied the patches and looked with notmuch-emacs for threads
which before would probably show wrong ordering of messages.  The
half a dozen threads were all shown in the correct order with
respect to parents and children, but I found from those not
disclosable RT ticket mails (I mentioned in above referenced
thread):

- Threads where the "Resolved" message was shown first but it's
  References: header contains only a fake Message-Id: (which is
  strange: why does notmuch show this resolved-mail in this
  thread?).  


- one thread where two children of one message were shown with
  correct (meaning: same) indentation but the older one before
  (over) the newer one.


I'll ask if I may disclose this threads if this it would be
helpful for diagnosis.


Thanks for your attention and work regarding my problem report
with notmuch.

Ciao; Gregor 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: v1: fix for threading of replies to ghost messages
  2018-07-21 17:35 ` v1: fix for threading of replies to ghost messages Gregor Zattler
@ 2018-07-23  8:41   ` David Bremner
  2018-07-23 12:09     ` Gregor Zattler
  0 siblings, 1 reply; 13+ messages in thread
From: David Bremner @ 2018-07-23  8:41 UTC (permalink / raw)
  To: Gregor Zattler, notmuch

Gregor Zattler <telegraph@gmx.net> writes:

>
> I applied the patches and looked with notmuch-emacs for threads
> which before would probably show wrong ordering of messages.  The
> half a dozen threads were all shown in the correct order with
> respect to parents and children, but I found from those not
> disclosable RT ticket mails (I mentioned in above referenced
> thread):

Can you also try the series at

    id:20180723082259.32440-1-david@tethera.net?

I'm not sure if it's related, but it does fix replies for a different
bug tracker. You will have to reindex the messages in question
(e.g. using notmuch reindex).

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: v1: fix for threading of replies to ghost messages
  2018-07-23  8:41   ` David Bremner
@ 2018-07-23 12:09     ` Gregor Zattler
  0 siblings, 0 replies; 13+ messages in thread
From: Gregor Zattler @ 2018-07-23 12:09 UTC (permalink / raw)
  To: notmuch

Hi David,
* David Bremner <david@tethera.net> [2018-07-23; 16:41]:
> Gregor Zattler <telegraph@gmx.net> writes:
>> I applied the patches and looked with notmuch-emacs for threads
>> which before would probably show wrong ordering of messages.  The
>> half a dozen threads were all shown in the correct order with
>> respect to parents and children, but I found from those not
>> disclosable RT ticket mails (I mentioned in above referenced
>> thread):
>
> Can you also try the series at
>
>     id:20180723082259.32440-1-david@tethera.net?
>
> I'm not sure if it's related, but it does fix replies for a different
> bug tracker. You will have to reindex the messages in question
> (e.g. using notmuch reindex).

I applied this patch set on top of the other one, copied
the relevant threads to a test-maildir, changed path in
.notmuch-config, thus building a new index: Same order of
messages in notmuch-emacs-show as with the first patch set only.

I asked for permission to disclose two small threads, have to
wait for answer, will report back


Thanks again; Gregor
-- 
 -... --- .-. . -.. ..--.. ...-.-

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2018-07-23 12:10 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-20 23:37 v1: fix for threading of replies to ghost messages David Bremner
2018-07-20 23:37 ` [PATCH 1/9] util: add DEBUG_PRINTF, rename error_util.h -> debug_print.h David Bremner
2018-07-20 23:37 ` [PATCH 2/9] test: start threading test corpus David Bremner
2018-07-20 23:37 ` [PATCH 3/9] test: add known broken tests for "ghost roots" David Bremner
2018-07-20 23:37 ` [PATCH 4/9] lib: read reference terms into message struct David Bremner
2018-07-20 23:37 ` [PATCH 5/9] lib/thread: refactor in-reply-to test David Bremner
2018-07-20 23:37 ` [PATCH 6/9] lib: initial fix for "ghost replyto" David Bremner
2018-07-20 23:37 ` [PATCH 7/9] lib: calculate message depth in thread David Bremner
2018-07-20 23:37 ` [PATCH 8/9] lib/thread: rewrite _parent_or_toplevel to use depths David Bremner
2018-07-20 23:37 ` [PATCH 9/9] lib/thread: change _resolve_thread_relationships " David Bremner
2018-07-21 17:35 ` v1: fix for threading of replies to ghost messages Gregor Zattler
2018-07-23  8:41   ` David Bremner
2018-07-23 12:09     ` Gregor Zattler

Code repositories for project(s) associated with this public inbox

	https://yhetil.org/notmuch.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).