* [PATCH 01/15] util: add DEBUG_PRINTF, rename error_util.h -> debug_print.h
2018-07-30 22:45 Threading patches v2 David Bremner
@ 2018-07-30 22:45 ` David Bremner
2018-07-30 22:45 ` [PATCH 02/15] test: start threading test corpus David Bremner
` (14 subsequent siblings)
15 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2018-07-30 22:45 UTC (permalink / raw)
To: notmuch
It seemed silly to have two headers that were almost copies.
The new macro is intended for debugging, and probably should not be
left in released code.
---
command-line-arguments.c | 2 +-
lib/notmuch-private.h | 2 +-
util/Makefile.local | 4 +--
util/debug_print.c | 26 ++++++++++++++++++
util/{error_util.h => debug_print.h} | 22 ++++++++++++---
util/error_util.c | 40 ----------------------------
util/hex-escape.c | 2 +-
util/util.c | 2 +-
util/xutil.c | 2 +-
9 files changed, 51 insertions(+), 51 deletions(-)
create mode 100644 util/debug_print.c
rename util/{error_util.h => debug_print.h} (75%)
delete mode 100644 util/error_util.c
diff --git a/command-line-arguments.c b/command-line-arguments.c
index d64aa85b..90d69453 100644
--- a/command-line-arguments.c
+++ b/command-line-arguments.c
@@ -1,7 +1,7 @@
#include <assert.h>
#include <string.h>
#include <stdio.h>
-#include "error_util.h"
+#include "debug_print.h"
#include "command-line-arguments.h"
typedef enum {
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 3764a6a9..499d73d4 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -50,7 +50,7 @@ NOTMUCH_BEGIN_DECLS
#include "gmime-extra.h"
#include "xutil.h"
-#include "error_util.h"
+#include "debug_print.h"
#include "string-util.h"
#include "crypto.h"
diff --git a/util/Makefile.local b/util/Makefile.local
index ba03230e..ccf1bd79 100644
--- a/util/Makefile.local
+++ b/util/Makefile.local
@@ -3,9 +3,9 @@
dir := util
extra_cflags += -I$(srcdir)/$(dir)
-libnotmuch_util_c_srcs := $(dir)/xutil.c $(dir)/error_util.c $(dir)/hex-escape.c \
+libnotmuch_util_c_srcs := $(dir)/xutil.c $(dir)/hex-escape.c \
$(dir)/string-util.c $(dir)/talloc-extra.c $(dir)/zlib-extra.c \
- $(dir)/util.c $(dir)/gmime-extra.c $(dir)/crypto.c
+ $(dir)/util.c $(dir)/gmime-extra.c $(dir)/crypto.c $(dir)/debug_print.c
libnotmuch_util_modules := $(libnotmuch_util_c_srcs:.c=.o)
diff --git a/util/debug_print.c b/util/debug_print.c
new file mode 100644
index 00000000..9f00f69b
--- /dev/null
+++ b/util/debug_print.c
@@ -0,0 +1,26 @@
+#include "debug_print.h"
+
+void
+_debug_printf (const char *format, ...)
+{
+ va_list va_args;
+
+ va_start (va_args, format);
+ vfprintf (stderr, format, va_args);
+ va_end (va_args);
+
+}
+
+void
+_internal_error (const char *format, ...)
+{
+ va_list va_args;
+
+ va_start (va_args, format);
+
+ fprintf (stderr, "Internal error: ");
+ vfprintf (stderr, format, va_args);
+
+ va_end (va_args);
+ exit (1);
+}
diff --git a/util/error_util.h b/util/debug_print.h
similarity index 75%
rename from util/error_util.h
rename to util/debug_print.h
index 4bb338a2..4a096945 100644
--- a/util/error_util.h
+++ b/util/debug_print.h
@@ -1,6 +1,7 @@
-/* error_util.h - Provide the INTERNAL_ERROR macro
+/* debug_print.h - Provide the INTERNAL_ERROR and DEBUG_PRINTF macros
*
* Copyright © 2009 Carl Worth
+ * Copyright © 2018 David Bremner
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -18,13 +19,26 @@
* Author: Carl Worth <cworth@cworth.org>
*/
-#ifndef ERROR_UTIL_H
-#define ERROR_UTIL_H
+#ifndef DEBUG_PRINT_H
+#define DEBUG_PRINT_H
#include <talloc.h>
-
#include "function-attributes.h"
+void
+_debug_printf (const char *format, ...) PRINTF_ATTRIBUTE (1, 2);
+
+/*
+ * Provide a quick way to silence debugging output.
+ */
+
+#ifdef DEBUG_PRINT
+#define DEBUG_PRINTF(format, ...) _debug_printf(format " (%s).\n", \
+ ##__VA_ARGS__, __location__)
+#else
+#define DEBUG_PRINTF(format, ...) /* ignored */
+#endif
+
/* There's no point in continuing when we've detected that we've done
* something wrong internally (as opposed to the user passing in a
* bogus value).
diff --git a/util/error_util.c b/util/error_util.c
deleted file mode 100644
index e64162c7..00000000
--- a/util/error_util.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/* error_util.c - internal error utilities for notmuch.
- *
- * Copyright © 2009 Carl Worth
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see https://www.gnu.org/licenses/ .
- *
- * Author: Carl Worth <cworth@cworth.org>
- */
-
-#include <stdlib.h>
-#include <stdarg.h>
-#include <stdio.h>
-
-#include "error_util.h"
-
-void
-_internal_error (const char *format, ...)
-{
- va_list va_args;
-
- va_start (va_args, format);
-
- fprintf (stderr, "Internal error: ");
- vfprintf (stderr, format, va_args);
-
- va_end (va_args);
- exit (1);
-}
-
diff --git a/util/hex-escape.c b/util/hex-escape.c
index 8883ff90..ff20c702 100644
--- a/util/hex-escape.c
+++ b/util/hex-escape.c
@@ -22,7 +22,7 @@
#include <string.h>
#include <talloc.h>
#include <ctype.h>
-#include "error_util.h"
+#include "debug_print.h"
#include "hex-escape.h"
static const char *output_charset =
diff --git a/util/util.c b/util/util.c
index 06659b35..75df0ead 100644
--- a/util/util.c
+++ b/util/util.c
@@ -1,5 +1,5 @@
#include "util.h"
-#include "error_util.h"
+#include "debug_print.h"
#include <string.h>
#include <errno.h>
diff --git a/util/xutil.c b/util/xutil.c
index f211eaaa..11042c9e 100644
--- a/util/xutil.c
+++ b/util/xutil.c
@@ -22,7 +22,7 @@
#include <string.h>
#include "xutil.h"
-#include "error_util.h"
+#include "debug_print.h"
void *
xcalloc (size_t nmemb, size_t size)
--
2.18.0
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 02/15] test: start threading test corpus
2018-07-30 22:45 Threading patches v2 David Bremner
2018-07-30 22:45 ` [PATCH 01/15] util: add DEBUG_PRINTF, rename error_util.h -> debug_print.h David Bremner
@ 2018-07-30 22:45 ` David Bremner
2018-07-30 22:45 ` [PATCH 03/15] test: add known broken tests for "ghost roots" David Bremner
` (13 subsequent siblings)
15 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2018-07-30 22:45 UTC (permalink / raw)
To: notmuch
The first set of messages in this corpus contains a reply to a
non-existent message, which causes it to be mistakenly classified as a
toplevel message in the thread.
---
test/corpora/threading/ghost-root/child | 9 +++++++++
test/corpora/threading/ghost-root/fake-root | 9 +++++++++
test/corpora/threading/ghost-root/grand-child | 9 +++++++++
test/corpora/threading/ghost-root/grand-child2 | 9 +++++++++
test/corpora/threading/ghost-root/great-grand-child | 9 +++++++++
test/corpora/threading/ghost-root/real-root | 7 +++++++
6 files changed, 52 insertions(+)
create mode 100644 test/corpora/threading/ghost-root/child
create mode 100644 test/corpora/threading/ghost-root/fake-root
create mode 100644 test/corpora/threading/ghost-root/grand-child
create mode 100644 test/corpora/threading/ghost-root/grand-child2
create mode 100644 test/corpora/threading/ghost-root/great-grand-child
create mode 100644 test/corpora/threading/ghost-root/real-root
diff --git a/test/corpora/threading/ghost-root/child b/test/corpora/threading/ghost-root/child
new file mode 100644
index 00000000..4c36af95
--- /dev/null
+++ b/test/corpora/threading/ghost-root/child
@@ -0,0 +1,9 @@
+From: Alice <alice@example.org>
+To: Daniel <daniel@example.org>
+Subject: child message
+Message-ID: <001-child@example.org>
+In-Reply-To: <000-real-root@example.org>
+References: <000-real-root@example.org>
+Date: Fri, 17 Jun 2016 22:14:41 -0400
+
+
diff --git a/test/corpora/threading/ghost-root/fake-root b/test/corpora/threading/ghost-root/fake-root
new file mode 100644
index 00000000..a698185d
--- /dev/null
+++ b/test/corpora/threading/ghost-root/fake-root
@@ -0,0 +1,9 @@
+From: Mallory <mallory@example.org>
+To: Daniel <daniel@example.org>
+Subject: fake root message
+Message-ID: <001-fake-message-root@example.org>
+In-Reply-to: <nonexistent-message@example.org>
+References: <000-real-root@example.org> <001-child@example.org> <nonexistent-message@example.org>
+Date: Thu, 16 Jun 2016 22:14:41 -0400
+
+This message has an in-reply-to pointing to a non-existent message
diff --git a/test/corpora/threading/ghost-root/grand-child b/test/corpora/threading/ghost-root/grand-child
new file mode 100644
index 00000000..5f77ac36
--- /dev/null
+++ b/test/corpora/threading/ghost-root/grand-child
@@ -0,0 +1,9 @@
+From: Alice <alice@example.org>
+To: Daniel <daniel@example.org>
+Subject: grand-child message
+Message-ID: <001-grand-child@example.org>
+In-Reply-To: <001-child@example.org>
+References: <000-real-root@example.org> <001-child@example.org>
+Date: Fri, 17 Jun 2016 22:24:41 -0400
+
+
diff --git a/test/corpora/threading/ghost-root/grand-child2 b/test/corpora/threading/ghost-root/grand-child2
new file mode 100644
index 00000000..59682a95
--- /dev/null
+++ b/test/corpora/threading/ghost-root/grand-child2
@@ -0,0 +1,9 @@
+From: Daniel <daniel@example.org>
+To: Alice <alice@example.org>
+Subject: grand-child message 2
+Message-ID: <001-grand-child2@example.org>
+In-Reply-To: <001-child@example.org>
+References: <000-real-root@example.org> <001-child@example.org>
+Date: Fri, 17 Jun 2016 22:34:41 -0400
+
+
diff --git a/test/corpora/threading/ghost-root/great-grand-child b/test/corpora/threading/ghost-root/great-grand-child
new file mode 100644
index 00000000..287a8954
--- /dev/null
+++ b/test/corpora/threading/ghost-root/great-grand-child
@@ -0,0 +1,9 @@
+From: Alice <alice@example.org>
+To: Daniel <daniel@example.org>
+Subject: great grand-child message
+Message-ID: <001-great-grand-child@example.org>
+In-Reply-To: <001-grand-child@example.org>
+References: <000-real-root@example.org> <001-grand-child@example.org>
+Date: Fri, 17 Jun 2016 22:44:41 -0400
+
+
diff --git a/test/corpora/threading/ghost-root/real-root b/test/corpora/threading/ghost-root/real-root
new file mode 100644
index 00000000..f1b16a0c
--- /dev/null
+++ b/test/corpora/threading/ghost-root/real-root
@@ -0,0 +1,7 @@
+From: Alice <alice@example.org>
+To: Daniel <daniel@example.org>
+Subject: root message
+Message-ID: <000-real-root@example.org>
+Date: Thu, 16 Jun 2016 22:14:41 -0400
+
+This message has no in-reply-to
--
2.18.0
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 03/15] test: add known broken tests for "ghost roots"
2018-07-30 22:45 Threading patches v2 David Bremner
2018-07-30 22:45 ` [PATCH 01/15] util: add DEBUG_PRINTF, rename error_util.h -> debug_print.h David Bremner
2018-07-30 22:45 ` [PATCH 02/15] test: start threading test corpus David Bremner
@ 2018-07-30 22:45 ` David Bremner
2018-07-30 22:45 ` [PATCH 04/15] lib/thread: sort child messages by date David Bremner
` (12 subsequent siblings)
15 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2018-07-30 22:45 UTC (permalink / raw)
To: notmuch
This documents the bug discussed at
id:87efgmmysi.fsf@len.workgroup
The underlying issue is that the reply to a ghost (missing) message is
falsely classified as a root message in _resolve_thread_relationships.
The first test is simpler / more robust, but also easier to fool.
---
test/T510-thread-replies.sh | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/test/T510-thread-replies.sh b/test/T510-thread-replies.sh
index 6837ff17..b7322198 100755
--- a/test/T510-thread-replies.sh
+++ b/test/T510-thread-replies.sh
@@ -164,5 +164,31 @@ expected='[[[{"id": "XXXXX", "match": true, "excluded": false,
expected=`echo "$expected" | notmuch_json_show_sanitize`
test_expect_equal_json "$output" "$expected"
+add_email_corpus threading
+
+test_begin_subtest "reply to ghost"
+test_subtest_known_broken
+notmuch show --entire-thread=true id:000-real-root@example.org | grep ^Subject: | head -1 > OUTPUT
+cat <<EOF > EXPECTED
+Subject: root message
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "reply to ghost (tree view)"
+test_subtest_known_broken
+test_emacs '(notmuch-tree "id:000-real-root@example.org")
+ (notmuch-test-wait)
+ (test-output)
+ (delete-other-windows)'
+cat <<EOF > EXPECTED
+ 2016-06-17 Alice ┬►root message (inbox unread)
+ 2016-06-18 Alice ╰┬►child message (inbox unread)
+ 2016-06-17 Mallory ├─►fake root message (inbox unread)
+ 2016-06-18 Alice ├┬►grand-child message (inbox unread)
+ 2016-06-18 Alice │╰─►great grand-child message (inbox unread)
+ 2016-06-18 Daniel ╰─►grand-child message 2 (inbox unread)
+End of search results.
+EOF
+test_expect_equal_file EXPECTED OUTPUT
test_done
--
2.18.0
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 04/15] lib/thread: sort child messages by date
2018-07-30 22:45 Threading patches v2 David Bremner
` (2 preceding siblings ...)
2018-07-30 22:45 ` [PATCH 03/15] test: add known broken tests for "ghost roots" David Bremner
@ 2018-07-30 22:45 ` David Bremner
2018-07-30 22:45 ` [PATCH 05/15] lib: read reference terms into message struct David Bremner
` (11 subsequent siblings)
15 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2018-07-30 22:45 UTC (permalink / raw)
To: notmuch
This will not should anything currently, as the child messages are
already added in date order. In the future we will add some messages
in a second pass out of order and the sorting will be useful.
---
lib/message.cc | 41 +++++++++++++++++++++++++++++++++++++++++
lib/notmuch-private.h | 3 +++
lib/thread.cc | 9 +++++++++
3 files changed, 53 insertions(+)
diff --git a/lib/message.cc b/lib/message.cc
index 153e4bed..107dcf35 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -588,6 +588,47 @@ _notmuch_message_add_reply (notmuch_message_t *message,
_notmuch_message_list_add_message (message->replies, reply);
}
+static int
+_cmpmsg (const void *pa, const void *pb)
+{
+ notmuch_message_t **a = (notmuch_message_t **) pa;
+ notmuch_message_t **b = (notmuch_message_t **) pb;
+ time_t time_a = notmuch_message_get_date (*a);
+ time_t time_b = notmuch_message_get_date (*b);
+
+ return (int) difftime (time_a, time_b);
+}
+
+void
+_notmuch_message_sort_subtree (notmuch_message_t *root)
+{
+ size_t child_count = 0;
+ size_t child_capacity = 16;
+ notmuch_message_t **children = talloc_zero_array (root, notmuch_message_t *, child_capacity);
+
+ for (notmuch_messages_t *messages = _notmuch_messages_create (root->replies);
+ notmuch_messages_valid (messages);
+ notmuch_messages_move_to_next (messages)) {
+ notmuch_message_t *child = notmuch_messages_get (messages);
+ if (child_count >= child_capacity) {
+ child_capacity *= 2;
+ children = talloc_realloc (root, children, notmuch_message_t *, child_capacity);
+ }
+ children[child_count++] = child;
+ _notmuch_message_sort_subtree (child);
+ }
+
+ notmuch_message_list_t *new_replies = _notmuch_message_list_create (root);
+
+ qsort (children, child_count, sizeof (notmuch_message_t *), _cmpmsg);
+ for (size_t i=0; i<child_count; i++){
+ _notmuch_message_list_add_message (new_replies, children[i]);
+ }
+ talloc_free (root->replies);
+ root->replies = new_replies;
+ talloc_free (children);
+}
+
notmuch_messages_t *
notmuch_message_get_replies (notmuch_message_t *message)
{
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 499d73d4..10bb7040 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -539,6 +539,9 @@ _notmuch_message_remove_unprefixed_terms (notmuch_message_t *message);
const char *
_notmuch_message_get_thread_id_only(notmuch_message_t *message);
+void
+_notmuch_message_sort_subtree (notmuch_message_t *message);
+
/* sha1.c */
char *
diff --git a/lib/thread.cc b/lib/thread.cc
index e961c76b..db592a3a 100644
--- a/lib/thread.cc
+++ b/lib/thread.cc
@@ -429,6 +429,15 @@ _resolve_thread_relationships (notmuch_thread_t *thread)
_notmuch_message_list_add_message (thread->toplevel_list, message);
}
+ for (notmuch_messages_t *messages = _notmuch_messages_create (thread->toplevel_list);
+ notmuch_messages_valid (messages);
+ notmuch_messages_move_to_next (messages))
+ {
+ notmuch_message_t *message = notmuch_messages_get (messages);
+ _notmuch_message_sort_subtree (message);
+ }
+
+
/* XXX: After scanning through the entire list looking for parents
* via "In-Reply-To", we should do a second pass that looks at the
* list of messages IDs in the "References" header instead. (And
--
2.18.0
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 05/15] lib: read reference terms into message struct.
2018-07-30 22:45 Threading patches v2 David Bremner
` (3 preceding siblings ...)
2018-07-30 22:45 ` [PATCH 04/15] lib/thread: sort child messages by date David Bremner
@ 2018-07-30 22:45 ` David Bremner
2018-07-30 22:45 ` [PATCH 06/15] lib/thread: refactor in-reply-to test David Bremner
` (10 subsequent siblings)
15 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2018-07-30 22:45 UTC (permalink / raw)
To: notmuch
The plan is to use these in resolving threads.
---
lib/message.cc | 17 +++++++++++++++++
lib/notmuch-private.h | 3 +++
2 files changed, 20 insertions(+)
diff --git a/lib/message.cc b/lib/message.cc
index 107dcf35..db5ce0a2 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -41,6 +41,7 @@ struct _notmuch_message {
notmuch_message_file_t *message_file;
notmuch_string_list_t *property_term_list;
notmuch_string_map_t *property_map;
+ notmuch_string_list_t *reference_list;
notmuch_message_list_t *replies;
unsigned long flags;
/* For flags that are initialized on-demand, lazy_flags indicates
@@ -129,6 +130,7 @@ _notmuch_message_create_for_document (const void *talloc_owner,
message->author = NULL;
message->property_term_list = NULL;
message->property_map = NULL;
+ message->reference_list = NULL;
message->replies = _notmuch_message_list_create (message);
if (unlikely (message->replies == NULL)) {
@@ -349,6 +351,7 @@ _notmuch_message_ensure_metadata (notmuch_message_t *message, void *field)
*type_prefix = _find_prefix ("type"),
*filename_prefix = _find_prefix ("file-direntry"),
*property_prefix = _find_prefix ("property"),
+ *reference_prefix = _find_prefix ("reference"),
*replyto_prefix = _find_prefix ("replyto");
/* We do this all in a single pass because Xapian decompresses the
@@ -413,6 +416,14 @@ _notmuch_message_ensure_metadata (notmuch_message_t *message, void *field)
_notmuch_database_get_terms_with_prefix (message, i, end,
property_prefix);
+ /* get references */
+ assert (strcmp (property_prefix, reference_prefix) < 0);
+ if (!message->reference_list) {
+ message->reference_list =
+ _notmuch_database_get_terms_with_prefix (message, i, end,
+ reference_prefix);
+ }
+
/* Get reply to */
assert (strcmp (property_prefix, replyto_prefix) < 0);
if (!message->in_reply_to)
@@ -588,6 +599,12 @@ _notmuch_message_add_reply (notmuch_message_t *message,
_notmuch_message_list_add_message (message->replies, reply);
}
+const notmuch_string_list_t *
+_notmuch_message_get_references (notmuch_message_t *message)
+{
+ return message->reference_list;
+}
+
static int
_cmpmsg (const void *pa, const void *pb)
{
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 10bb7040..7603e1f4 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -583,6 +583,9 @@ _notmuch_string_list_append (notmuch_string_list_t *list,
void
_notmuch_string_list_sort (notmuch_string_list_t *list);
+const notmuch_string_list_t *
+_notmuch_message_get_references(notmuch_message_t *message);
+
/* string-map.c */
typedef struct _notmuch_string_map notmuch_string_map_t;
typedef struct _notmuch_string_map_iterator notmuch_string_map_iterator_t;
--
2.18.0
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 06/15] lib/thread: refactor in-reply-to test.
2018-07-30 22:45 Threading patches v2 David Bremner
` (4 preceding siblings ...)
2018-07-30 22:45 ` [PATCH 05/15] lib: read reference terms into message struct David Bremner
@ 2018-07-30 22:45 ` David Bremner
2018-07-30 22:45 ` [PATCH 07/15] lib: calculate message depth in thread David Bremner
` (9 subsequent siblings)
15 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2018-07-30 22:45 UTC (permalink / raw)
To: notmuch
This is not a complete win in code-size, but it makes the code (which
is about to get more complicated) easier to follow. In particular the
second pass (which looks a bit wasteful here) will be needed when we
reparent by references.
---
lib/thread.cc | 107 ++++++++++++++++++++++++++----------
test/T510-thread-replies.sh | 1 -
2 files changed, 77 insertions(+), 31 deletions(-)
diff --git a/lib/thread.cc b/lib/thread.cc
index db592a3a..9f923843 100644
--- a/lib/thread.cc
+++ b/lib/thread.cc
@@ -387,29 +387,72 @@ _thread_add_matched_message (notmuch_thread_t *thread,
_thread_add_matched_author (thread, _notmuch_message_get_author (hashed_message));
}
+static bool
+_parent_via_in_reply_to (notmuch_thread_t *thread, notmuch_message_t *message) {
+ notmuch_message_t *parent;
+ const char *in_reply_to;
+
+ in_reply_to = _notmuch_message_get_in_reply_to (message);
+ DEBUG_PRINTF("checking message = %s in_reply_to=%s\n",
+ notmuch_message_get_message_id (message), in_reply_to);
+
+ if (in_reply_to && strlen (in_reply_to) &&
+ g_hash_table_lookup_extended (thread->message_hash,
+ in_reply_to, NULL,
+ (void **) &parent)) {
+ _notmuch_message_add_reply (parent, message);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+static void
+_parent_or_toplevel (notmuch_thread_t *thread, notmuch_message_t *message)
+{
+ bool found = false;
+ notmuch_message_t *parent = NULL;
+ const notmuch_string_list_t *references =
+ _notmuch_message_get_references (message);
+ for (notmuch_string_node_t *ref_node = references->head;
+ ! found && ref_node; ref_node = ref_node->next) {
+ if ((found = g_hash_table_lookup_extended (thread->message_hash,
+ ref_node->string, NULL,
+ (void **) &parent))) {
+ _notmuch_message_add_reply (parent, message);
+ }
+ }
+ if (! found)
+ _notmuch_message_list_add_message (thread->toplevel_list, message);
+}
+
static void
_resolve_thread_relationships (notmuch_thread_t *thread)
{
notmuch_message_node_t *node, *first_node;
- notmuch_message_t *message, *parent;
- const char *in_reply_to;
+ notmuch_message_t *message;
+ void *local;
+ notmuch_message_list_t *maybe_toplevel_list;
first_node = thread->message_list->head;
if (! first_node)
return;
+ local = talloc_new (thread);
+ maybe_toplevel_list = _notmuch_message_list_create (local);
+
for (node = first_node->next; node; node = node->next) {
message = node->message;
- in_reply_to = _notmuch_message_get_in_reply_to (message);
- if (in_reply_to && strlen (in_reply_to) &&
- g_hash_table_lookup_extended (thread->message_hash,
- in_reply_to, NULL,
- (void **) &parent))
- _notmuch_message_add_reply (parent, message);
- else
- _notmuch_message_list_add_message (thread->toplevel_list, message);
+ if (! _parent_via_in_reply_to (thread, message))
+ _notmuch_message_list_add_message (maybe_toplevel_list, message);
}
+ for (notmuch_messages_t *roots = _notmuch_messages_create (maybe_toplevel_list);
+ notmuch_messages_valid (roots);
+ notmuch_messages_move_to_next (roots)) {
+ notmuch_message_t *message = notmuch_messages_get (roots);
+ _parent_or_toplevel (thread, message);
+ }
/*
* if we reach the end of the list without finding a top-level
* message, that means the thread is a cycle (or set of cycles)
@@ -418,17 +461,31 @@ _resolve_thread_relationships (notmuch_thread_t *thread)
*/
if (first_node) {
message = first_node->message;
- in_reply_to = _notmuch_message_get_in_reply_to (message);
- if (thread->toplevel_list->head &&
- in_reply_to && strlen (in_reply_to) &&
- g_hash_table_lookup_extended (thread->message_hash,
- in_reply_to, NULL,
- (void **) &parent))
- _notmuch_message_add_reply (parent, message);
- else
- _notmuch_message_list_add_message (thread->toplevel_list, message);
+ if (! thread->toplevel_list->head ||
+ ! _parent_via_in_reply_to (thread, message)) {
+ /*
+ * If the oldest message happens to be in-reply-to a
+ * missing message, we only check for references if there
+ * is some other candidate for root message.
+ */
+ if (thread->toplevel_list->head)
+ _parent_or_toplevel (thread, message);
+ else
+ _notmuch_message_list_add_message (thread->toplevel_list, message);
+ }
}
+ /* XXX: After scanning through the entire list looking for parents
+ * via "In-Reply-To", we should do a second pass that looks at the
+ * list of messages IDs in the "References" header instead.
+ * Unlike the current quick fix, the parent should be the
+ * "deepest" message of all the messages found in the "References"
+ * list.
+ *
+ * Doing this will allow messages and sub-threads to be positioned
+ * correctly in the thread even when an intermediate message is
+ * missing from the thread.
+ */
for (notmuch_messages_t *messages = _notmuch_messages_create (thread->toplevel_list);
notmuch_messages_valid (messages);
notmuch_messages_move_to_next (messages))
@@ -437,17 +494,7 @@ _resolve_thread_relationships (notmuch_thread_t *thread)
_notmuch_message_sort_subtree (message);
}
-
- /* XXX: After scanning through the entire list looking for parents
- * via "In-Reply-To", we should do a second pass that looks at the
- * list of messages IDs in the "References" header instead. (And
- * for this the parent would be the "deepest" message of all the
- * messages found in the "References" list.)
- *
- * Doing this will allow messages and sub-threads to be positioned
- * correctly in the thread even when an intermediate message is
- * missing from the thread.
- */
+ talloc_free (local);
}
/* Create a new notmuch_thread_t object by finding the thread
diff --git a/test/T510-thread-replies.sh b/test/T510-thread-replies.sh
index b7322198..3ee2ee78 100755
--- a/test/T510-thread-replies.sh
+++ b/test/T510-thread-replies.sh
@@ -167,7 +167,6 @@ test_expect_equal_json "$output" "$expected"
add_email_corpus threading
test_begin_subtest "reply to ghost"
-test_subtest_known_broken
notmuch show --entire-thread=true id:000-real-root@example.org | grep ^Subject: | head -1 > OUTPUT
cat <<EOF > EXPECTED
Subject: root message
--
2.18.0
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 07/15] lib: calculate message depth in thread
2018-07-30 22:45 Threading patches v2 David Bremner
` (5 preceding siblings ...)
2018-07-30 22:45 ` [PATCH 06/15] lib/thread: refactor in-reply-to test David Bremner
@ 2018-07-30 22:45 ` David Bremner
2018-07-30 22:45 ` [PATCH 08/15] lib/thread: rewrite _parent_or_toplevel to use depths David Bremner
` (8 subsequent siblings)
15 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2018-07-30 22:45 UTC (permalink / raw)
To: notmuch
This will be used in reparenting messages without useful in-reply-to,
but with useful references
---
lib/message.cc | 23 +++++++++++++++++++++++
lib/notmuch-private.h | 5 +++++
2 files changed, 28 insertions(+)
diff --git a/lib/message.cc b/lib/message.cc
index db5ce0a2..3c547298 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -32,6 +32,7 @@ struct _notmuch_message {
int frozen;
char *message_id;
char *thread_id;
+ size_t thread_depth;
char *in_reply_to;
notmuch_string_list_t *tag_list;
notmuch_string_list_t *filename_term_list;
@@ -118,6 +119,9 @@ _notmuch_message_create_for_document (const void *talloc_owner,
/* the message is initially not synchronized with Xapian */
message->last_view = 0;
+ /* Calculated after the thread structure is computed */
+ message->thread_depth = 0;
+
/* Each of these will be lazily created as needed. */
message->message_id = NULL;
message->thread_id = NULL;
@@ -599,6 +603,25 @@ _notmuch_message_add_reply (notmuch_message_t *message,
_notmuch_message_list_add_message (message->replies, reply);
}
+size_t
+_notmuch_message_get_thread_depth (notmuch_message_t *message) {
+ return message->thread_depth;
+}
+
+void
+_notmuch_message_label_depths (notmuch_message_t *message,
+ size_t depth)
+{
+ message->thread_depth = depth;
+
+ for (notmuch_messages_t *messages = _notmuch_messages_create (message->replies);
+ notmuch_messages_valid (messages);
+ notmuch_messages_move_to_next (messages)) {
+ notmuch_message_t *child = notmuch_messages_get (messages);
+ _notmuch_message_label_depths (child, depth+1);
+ }
+}
+
const notmuch_string_list_t *
_notmuch_message_get_references (notmuch_message_t *message)
{
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 7603e1f4..083fc013 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -539,6 +539,11 @@ _notmuch_message_remove_unprefixed_terms (notmuch_message_t *message);
const char *
_notmuch_message_get_thread_id_only(notmuch_message_t *message);
+size_t _notmuch_message_get_thread_depth (notmuch_message_t *message);
+
+void
+_notmuch_message_label_depths (notmuch_message_t *message,
+ size_t depth);
void
_notmuch_message_sort_subtree (notmuch_message_t *message);
--
2.18.0
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 08/15] lib/thread: rewrite _parent_or_toplevel to use depths
2018-07-30 22:45 Threading patches v2 David Bremner
` (6 preceding siblings ...)
2018-07-30 22:45 ` [PATCH 07/15] lib: calculate message depth in thread David Bremner
@ 2018-07-30 22:45 ` David Bremner
2018-07-30 22:45 ` [PATCH 09/15] lib/thread: change _resolve_thread_relationships " David Bremner
` (7 subsequent siblings)
15 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2018-07-30 22:45 UTC (permalink / raw)
To: notmuch
This is part 1/2 of changing the reparenting of alleged toplevel
messages to use a "deep" reference rather than just the first one
found.
---
lib/thread.cc | 34 +++++++++++++++++++++++++++-------
1 file changed, 27 insertions(+), 7 deletions(-)
diff --git a/lib/thread.cc b/lib/thread.cc
index 9f923843..3fc35f22 100644
--- a/lib/thread.cc
+++ b/lib/thread.cc
@@ -410,20 +410,40 @@ _parent_via_in_reply_to (notmuch_thread_t *thread, notmuch_message_t *message) {
static void
_parent_or_toplevel (notmuch_thread_t *thread, notmuch_message_t *message)
{
- bool found = false;
+ size_t max_depth = 0;
+ notmuch_message_t *new_parent;
notmuch_message_t *parent = NULL;
const notmuch_string_list_t *references =
_notmuch_message_get_references (message);
+
+ DEBUG_PRINTF("trying to reparent via references: %s\n",
+ notmuch_message_get_message_id (message));
+
for (notmuch_string_node_t *ref_node = references->head;
- ! found && ref_node; ref_node = ref_node->next) {
- if ((found = g_hash_table_lookup_extended (thread->message_hash,
- ref_node->string, NULL,
- (void **) &parent))) {
- _notmuch_message_add_reply (parent, message);
+ ref_node; ref_node = ref_node->next) {
+ DEBUG_PRINTF("checking reference=%s\n", ref_node->string);
+ if ((g_hash_table_lookup_extended (thread->message_hash,
+ ref_node->string, NULL,
+ (void **) &new_parent))) {
+ size_t new_depth = _notmuch_message_get_thread_depth (new_parent);
+ DEBUG_PRINTF("got depth %lu\n", new_depth);
+ if (new_depth > max_depth || !parent) {
+ DEBUG_PRINTF("adding at depth %lu parent=%s\n", new_depth, ref_node->string);
+ max_depth = new_depth;
+ parent = new_parent;
+ }
}
}
- if (! found)
+ if (parent) {
+ DEBUG_PRINTF("adding reply %s to parent=%s\n",
+ notmuch_message_get_message_id (message),
+ notmuch_message_get_message_id (parent));
+ _notmuch_message_add_reply (parent, message);
+ } else {
+ DEBUG_PRINTF("adding as toplevel %s\n",
+ notmuch_message_get_message_id (message));
_notmuch_message_list_add_message (thread->toplevel_list, message);
+ }
}
static void
--
2.18.0
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 09/15] lib/thread: change _resolve_thread_relationships to use depths
2018-07-30 22:45 Threading patches v2 David Bremner
` (7 preceding siblings ...)
2018-07-30 22:45 ` [PATCH 08/15] lib/thread: rewrite _parent_or_toplevel to use depths David Bremner
@ 2018-07-30 22:45 ` David Bremner
2018-07-30 22:45 ` [PATCH 10/15] test: add known broken test for good In-Reply-To / bad References David Bremner
` (6 subsequent siblings)
15 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2018-07-30 22:45 UTC (permalink / raw)
To: notmuch
We (finally) implement the XXX comment. It requires a bit of care not
to reparent all of the possible toplevel messages. There is some
slightly naughty low level access to the list structures here that can
be cleaned up in a later commit.
---
lib/thread.cc | 53 ++++++++++++++++++-------------------
test/T510-thread-replies.sh | 1 -
2 files changed, 26 insertions(+), 28 deletions(-)
diff --git a/lib/thread.cc b/lib/thread.cc
index 3fc35f22..a5047103 100644
--- a/lib/thread.cc
+++ b/lib/thread.cc
@@ -467,12 +467,6 @@ _resolve_thread_relationships (notmuch_thread_t *thread)
_notmuch_message_list_add_message (maybe_toplevel_list, message);
}
- for (notmuch_messages_t *roots = _notmuch_messages_create (maybe_toplevel_list);
- notmuch_messages_valid (roots);
- notmuch_messages_move_to_next (roots)) {
- notmuch_message_t *message = notmuch_messages_get (roots);
- _parent_or_toplevel (thread, message);
- }
/*
* if we reach the end of the list without finding a top-level
* message, that means the thread is a cycle (or set of cycles)
@@ -481,31 +475,36 @@ _resolve_thread_relationships (notmuch_thread_t *thread)
*/
if (first_node) {
message = first_node->message;
- if (! thread->toplevel_list->head ||
+ DEBUG_PRINTF("checking first message %s\n",
+ notmuch_message_get_message_id (message));
+
+ if (! maybe_toplevel_list->head ||
! _parent_via_in_reply_to (thread, message)) {
- /*
- * If the oldest message happens to be in-reply-to a
- * missing message, we only check for references if there
- * is some other candidate for root message.
- */
- if (thread->toplevel_list->head)
- _parent_or_toplevel (thread, message);
- else
- _notmuch_message_list_add_message (thread->toplevel_list, message);
+ DEBUG_PRINTF("adding first message as toplevel = %s\n",
+ notmuch_message_get_message_id (message));
+ _notmuch_message_list_add_message (maybe_toplevel_list, message);
}
}
- /* XXX: After scanning through the entire list looking for parents
- * via "In-Reply-To", we should do a second pass that looks at the
- * list of messages IDs in the "References" header instead.
- * Unlike the current quick fix, the parent should be the
- * "deepest" message of all the messages found in the "References"
- * list.
- *
- * Doing this will allow messages and sub-threads to be positioned
- * correctly in the thread even when an intermediate message is
- * missing from the thread.
- */
+ for (notmuch_messages_t *messages = _notmuch_messages_create (maybe_toplevel_list);
+ notmuch_messages_valid (messages);
+ notmuch_messages_move_to_next (messages))
+ {
+ notmuch_message_t *message = notmuch_messages_get (messages);
+ _notmuch_message_label_depths (message, 0);
+ }
+
+ for (notmuch_messages_t *roots = _notmuch_messages_create (maybe_toplevel_list);
+ notmuch_messages_valid (roots);
+ notmuch_messages_move_to_next (roots)) {
+ notmuch_message_t *message = notmuch_messages_get (roots);
+ /* XXX add _notmuch_messages_count */
+ if (roots->iterator->next || thread->toplevel_list->head)
+ _parent_or_toplevel (thread, message);
+ else
+ _notmuch_message_list_add_message (thread->toplevel_list, message);
+ }
+
for (notmuch_messages_t *messages = _notmuch_messages_create (thread->toplevel_list);
notmuch_messages_valid (messages);
notmuch_messages_move_to_next (messages))
diff --git a/test/T510-thread-replies.sh b/test/T510-thread-replies.sh
index 3ee2ee78..84a3e6e8 100755
--- a/test/T510-thread-replies.sh
+++ b/test/T510-thread-replies.sh
@@ -174,7 +174,6 @@ EOF
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "reply to ghost (tree view)"
-test_subtest_known_broken
test_emacs '(notmuch-tree "id:000-real-root@example.org")
(notmuch-test-wait)
(test-output)
--
2.18.0
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 10/15] test: add known broken test for good In-Reply-To / bad References
2018-07-30 22:45 Threading patches v2 David Bremner
` (8 preceding siblings ...)
2018-07-30 22:45 ` [PATCH 09/15] lib/thread: change _resolve_thread_relationships " David Bremner
@ 2018-07-30 22:45 ` David Bremner
2018-07-30 22:45 ` [PATCH 11/15] test/thread-replies: mangle In-Reply-To's David Bremner
` (5 subsequent siblings)
15 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2018-07-30 22:45 UTC (permalink / raw)
To: notmuch
The current scheme of choosing the replyto (i.e. the default parent
for threading purposes) does not work well for mailers that put
the oldest Reference last.
---
test/T510-thread-replies.sh | 15 +++++++++++++++
test/corpora/threading/parent-priority/cur/child | 11 +++++++++++
.../threading/parent-priority/cur/grand-child | 10 ++++++++++
test/corpora/threading/parent-priority/cur/root | 7 +++++++
4 files changed, 43 insertions(+)
create mode 100644 test/corpora/threading/parent-priority/cur/child
create mode 100644 test/corpora/threading/parent-priority/cur/grand-child
create mode 100644 test/corpora/threading/parent-priority/cur/root
diff --git a/test/T510-thread-replies.sh b/test/T510-thread-replies.sh
index 84a3e6e8..4d0e0665 100755
--- a/test/T510-thread-replies.sh
+++ b/test/T510-thread-replies.sh
@@ -189,4 +189,19 @@ End of search results.
EOF
test_expect_equal_file EXPECTED OUTPUT
+test_begin_subtest "trusting reply-to (tree view)"
+test_subtest_known_broken
+test_emacs '(notmuch-tree "id:B00-root@example.org")
+ (notmuch-test-wait)
+ (test-output)
+ (delete-other-windows)'
+cat <<EOF > EXPECTED
+ 2016-06-17 Alice ┬►root message (inbox unread)
+ 2016-06-18 Alice ╰┬►child message (inbox unread)
+ 2016-06-18 Alice ╰─►grand-child message (inbox unread)
+End of search results.
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+
test_done
diff --git a/test/corpora/threading/parent-priority/cur/child b/test/corpora/threading/parent-priority/cur/child
new file mode 100644
index 00000000..23ee6495
--- /dev/null
+++ b/test/corpora/threading/parent-priority/cur/child
@@ -0,0 +1,11 @@
+From: Alice <alice@example.org>
+To: Daniel <daniel@example.org>
+Subject: child message
+Message-ID: <B01-child@example.org>
+In-Reply-To: <B00-root@example.org>
+References: <B00--root@example.org>
+Date: Fri, 17 Jun 2016 22:14:41 -0400
+
+This is a normal-ish reply, and has both a references header and an
+in-reply-to header.
+
diff --git a/test/corpora/threading/parent-priority/cur/grand-child b/test/corpora/threading/parent-priority/cur/grand-child
new file mode 100644
index 00000000..028371d4
--- /dev/null
+++ b/test/corpora/threading/parent-priority/cur/grand-child
@@ -0,0 +1,10 @@
+From: Alice <alice@example.org>
+To: Daniel <daniel@example.org>
+Subject: grand-child message
+Message-ID: <B01-grand-child@example.org>
+In-Reply-To: <B01-child@example.org>
+References: <B01-child@example.org> <B00-root@example.org>
+Date: Fri, 17 Jun 2016 22:24:41 -0400
+
+This has the references headers in the wrong order, with oldest first.
+Debbugs does this.
diff --git a/test/corpora/threading/parent-priority/cur/root b/test/corpora/threading/parent-priority/cur/root
new file mode 100644
index 00000000..3990843d
--- /dev/null
+++ b/test/corpora/threading/parent-priority/cur/root
@@ -0,0 +1,7 @@
+From: Alice <alice@example.org>
+To: Daniel <daniel@example.org>
+Subject: root message
+Message-ID: <B00-root@example.org>
+Date: Thu, 16 Jun 2016 22:14:41 -0400
+
+This message has no reply-to
--
2.18.0
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 11/15] test/thread-replies: mangle In-Reply-To's
2018-07-30 22:45 Threading patches v2 David Bremner
` (9 preceding siblings ...)
2018-07-30 22:45 ` [PATCH 10/15] test: add known broken test for good In-Reply-To / bad References David Bremner
@ 2018-07-30 22:45 ` David Bremner
2018-07-30 22:45 ` [PATCH 12/15] util/string-util: export skip_space David Bremner
` (4 subsequent siblings)
15 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2018-07-30 22:45 UTC (permalink / raw)
To: notmuch
In a future commit, we will start trusting In-Reply-To's when they
look sane (i.e. a single message-id). Modify these tests so they will
keep passing (i.e. keep choosing References) when that happens.
---
test/T510-thread-replies.sh | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/test/T510-thread-replies.sh b/test/T510-thread-replies.sh
index 4d0e0665..d94c9a0c 100755
--- a/test/T510-thread-replies.sh
+++ b/test/T510-thread-replies.sh
@@ -45,10 +45,10 @@ expected='[[[{"id": "foo@one.com",
expected=`echo "$expected" | notmuch_json_show_sanitize`
test_expect_equal_json "$output" "$expected"
-test_begin_subtest "Prefer References to In-Reply-To"
+test_begin_subtest "Prefer References to dodgy In-Reply-To"
add_message '[id]="foo@two.com"' \
'[subject]=two'
-add_message '[in-reply-to]="<bar@baz.com>"' \
+add_message '[in-reply-to]="Your message of December 31 1999 <bar@baz.com>"' \
'[references]="<foo@two.com>"' \
'[subject]="Re: two"'
output=$(notmuch show --format=json 'subject:two' | notmuch_json_show_sanitize)
@@ -101,12 +101,12 @@ expected='[[[{"id": "foo@three.com", "match": true, "excluded": false,
expected=`echo "$expected" | notmuch_json_show_sanitize`
test_expect_equal_json "$output" "$expected"
-test_begin_subtest "Use last Reference"
+test_begin_subtest "Use last Reference when In-Reply-To is dodgy"
add_message '[id]="foo@four.com"' \
'[subject]="four"'
add_message '[id]="bar@four.com"' \
'[subject]="not-four"'
-add_message '[in-reply-to]="<baz@four.com>"' \
+add_message '[in-reply-to]="<baz@four.com> (RFC822 4lyfe)"' \
'[references]="<baz@four.com> <foo@four.com>"' \
'[subject]="neither"'
output=$(notmuch show --format=json 'subject:four' | notmuch_json_show_sanitize)
--
2.18.0
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 12/15] util/string-util: export skip_space
2018-07-30 22:45 Threading patches v2 David Bremner
` (10 preceding siblings ...)
2018-07-30 22:45 ` [PATCH 11/15] test/thread-replies: mangle In-Reply-To's David Bremner
@ 2018-07-30 22:45 ` David Bremner
2018-07-30 22:45 ` [PATCH 13/15] lib: add _notmuch_message_id_parse_strict David Bremner
` (3 subsequent siblings)
15 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2018-07-30 22:45 UTC (permalink / raw)
To: notmuch
It's only few lines, but we already define the function, so make it
usable elsewhere
---
util/string-util.c | 2 +-
util/string-util.h | 2 ++
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/util/string-util.c b/util/string-util.c
index b0108811..fc2058e0 100644
--- a/util/string-util.c
+++ b/util/string-util.c
@@ -141,7 +141,7 @@ make_boolean_term (void *ctx, const char *prefix, const char *term,
return 0;
}
-static const char*
+const char*
skip_space (const char *str)
{
while (*str && isspace ((unsigned char) *str))
diff --git a/util/string-util.h b/util/string-util.h
index 97770614..4c110a20 100644
--- a/util/string-util.h
+++ b/util/string-util.h
@@ -77,6 +77,8 @@ unsigned int strcase_hash (const void *ptr);
void strip_trailing (char *str, char ch);
+const char* skip_space (const char *str);
+
#ifdef __cplusplus
}
#endif
--
2.18.0
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 13/15] lib: add _notmuch_message_id_parse_strict
2018-07-30 22:45 Threading patches v2 David Bremner
` (11 preceding siblings ...)
2018-07-30 22:45 ` [PATCH 12/15] util/string-util: export skip_space David Bremner
@ 2018-07-30 22:45 ` David Bremner
2018-08-01 0:46 ` Amin Bandali
2018-08-01 4:58 ` [Patch v1.1] " David Bremner
2018-07-30 22:45 ` [PATCH 14/15] lib: change parent strategy to use In-Reply-To if it looks sane David Bremner
` (2 subsequent siblings)
15 siblings, 2 replies; 22+ messages in thread
From: David Bremner @ 2018-07-30 22:45 UTC (permalink / raw)
To: notmuch
The idea is that if a message-id parses with this function, the MUA
generating it was probably sane, and in particular it's probably safe
to use the result as a parent from In-Reply-to.
---
lib/message-id.c | 32 ++++++++++++++++++
lib/notmuch-private.h | 14 ++++++++
test/Makefile.local | 6 +++-
test/T710-message-id.sh | 73 +++++++++++++++++++++++++++++++++++++++++
test/message-id-parse.c | 24 ++++++++++++++
5 files changed, 148 insertions(+), 1 deletion(-)
create mode 100755 test/T710-message-id.sh
create mode 100644 test/message-id-parse.c
diff --git a/lib/message-id.c b/lib/message-id.c
index d7541d50..a1dce9c8 100644
--- a/lib/message-id.c
+++ b/lib/message-id.c
@@ -1,4 +1,5 @@
#include "notmuch-private.h"
+#include "string-util.h"
/* Advance 'str' past any whitespace or RFC 822 comments. A comment is
* a (potentially nested) parenthesized sequence with '\' used to
@@ -94,3 +95,34 @@ _notmuch_message_id_parse (void *ctx, const char *message_id, const char **next)
return result;
}
+
+char *
+_notmuch_message_id_parse_strict (void *ctx, const char *message_id)
+{
+ const char *s, *end;
+ char *result;
+
+ if (message_id == NULL || *message_id == '\0')
+ return NULL;
+
+ s = skip_space (message_id);
+ if (*s == '<')
+ s++;
+ else
+ return NULL;
+
+ for (end = s; *end && *end != '>'; end++)
+ if (isspace (*end))
+ return NULL;
+
+ if (*end != '>')
+ return NULL;
+ else {
+ const char *last = skip_space (end+1);
+ if (*last != '\0')
+ return NULL;
+ }
+
+ result = talloc_strndup (ctx, s, end - s);
+ return result;
+}
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 083fc013..67fd4990 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -526,6 +526,20 @@ _notmuch_query_count_documents (notmuch_query_t *query,
char *
_notmuch_message_id_parse (void *ctx, const char *message_id, const char **next);
+/* Parse a message-id, discarding leading and trailing whitespace, and
+ * '<' and '>' delimiters.
+ *
+ * Apply a probably-stricter-than RFC definition of what is allowed in
+ * a message-id. In particular, forbid whitespace.
+ *
+ * Returns a newly talloc'ed string belonging to 'ctx'.
+ *
+ * Returns NULL if there is any error parsing the message-id.
+ */
+
+char *
+_notmuch_message_id_parse_strict (void *ctx, const char *message_id);
+
/* message.cc */
diff --git a/test/Makefile.local b/test/Makefile.local
index 1a0ab813..1cf09778 100644
--- a/test/Makefile.local
+++ b/test/Makefile.local
@@ -15,6 +15,9 @@ smtp_dummy_modules = $(smtp_dummy_srcs:.c=.o)
$(dir)/arg-test: $(dir)/arg-test.o command-line-arguments.o util/libnotmuch_util.a
$(call quiet,CC) $^ -o $@ $(LDFLAGS)
+$(dir)/message-id-parse: $(dir)/message-id-parse.o lib/libnotmuch.a util/libnotmuch_util.a
+ $(call quiet,CC) $^ -o $@ $(LDFLAGS) $(TALLOC_LDFLAGS)
+
$(dir)/hex-xcode: $(dir)/hex-xcode.o command-line-arguments.o util/libnotmuch_util.a
$(call quiet,CC) $^ -o $@ $(LDFLAGS) $(TALLOC_LDFLAGS)
@@ -50,7 +53,8 @@ test_main_srcs=$(dir)/arg-test.c \
$(dir)/smtp-dummy.c \
$(dir)/symbol-test.cc \
$(dir)/make-db-version.cc \
- $(dir)/ghost-report.cc
+ $(dir)/ghost-report.cc \
+ $(dir)/message-id-parse.c
test_srcs=$(test_main_srcs) $(dir)/database-test.c
diff --git a/test/T710-message-id.sh b/test/T710-message-id.sh
new file mode 100755
index 00000000..163c1300
--- /dev/null
+++ b/test/T710-message-id.sh
@@ -0,0 +1,73 @@
+#!/usr/bin/env bash
+test_description="message id parsing"
+
+. $(dirname "$0")/test-lib.sh || exit 1
+
+test_begin_subtest "good message ids"
+${TEST_DIRECTORY}/message-id-parse <<EOF >OUTPUT
+<018b1a8f2d1df62e804ce88b65401304832dfbbf.1346614915.git.jani@nikula.org>
+<1530507300.raoomurnbf.astroid@strange.none>
+<1258787708-21121-2-git-send-email-keithp@keithp.com>
+EOF
+cat <<EOF >EXPECTED
+GOOD: 018b1a8f2d1df62e804ce88b65401304832dfbbf.1346614915.git.jani@nikula.org
+GOOD: 1530507300.raoomurnbf.astroid@strange.none
+GOOD: 1258787708-21121-2-git-send-email-keithp@keithp.com
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "leading and trailing space is OK"
+${TEST_DIRECTORY}/message-id-parse <<EOF >OUTPUT
+ <018b1a8f2d1df62e804ce88b65401304832dfbbf.1346614915.git.jani@nikula.org>
+<1530507300.raoomurnbf.astroid@strange.none>
+ <1258787708-21121-2-git-send-email-keithp@keithp.com>
+EOF
+cat <<EOF >EXPECTED
+GOOD: 018b1a8f2d1df62e804ce88b65401304832dfbbf.1346614915.git.jani@nikula.org
+GOOD: 1530507300.raoomurnbf.astroid@strange.none
+GOOD: 1258787708-21121-2-git-send-email-keithp@keithp.com
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "<> delimeters are required"
+${TEST_DIRECTORY}/message-id-parse <<EOF >OUTPUT
+018b1a8f2d1df62e804ce88b65401304832dfbbf.1346614915.git.jani@nikula.org>
+<1530507300.raoomurnbf.astroid@strange.none
+1258787708-21121-2-git-send-email-keithp@keithp.com
+EOF
+cat <<EOF >EXPECTED
+BAD: 018b1a8f2d1df62e804ce88b65401304832dfbbf.1346614915.git.jani@nikula.org>
+BAD: <1530507300.raoomurnbf.astroid@strange.none
+BAD: 1258787708-21121-2-git-send-email-keithp@keithp.com
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "embedded whitespace is forbidden"
+${TEST_DIRECTORY}/message-id-parse <<EOF >OUTPUT
+<018b1a8f2d1df62e804ce88b65401304832dfbbf.1346614915 .git.jani@nikula.org>
+<1530507300.raoomurnbf.astroid @strange.none>
+<1258787708-21121-
2-git-send-email-keithp@keithp.com>
+EOF
+cat <<EOF >EXPECTED
+BAD: <018b1a8f2d1df62e804ce88b65401304832dfbbf.1346614915 .git.jani@nikula.org>
+BAD: <1530507300.raoomurnbf.astroid @strange.none>
+BAD: <1258787708-21121-
2-git-send-email-keithp@keithp.com>
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+
+test_begin_subtest "folded real life bad In-Reply-To values"
+${TEST_DIRECTORY}/message-id-parse <<EOF >OUTPUT
+<22597.31869.380767.339702@chiark.greenend.org.uk> (Ian Jackson's message of "Mon, 5 Dec 2016 14:41:01 +0000")
+<20170625141242.loaalhis2eodo66n@gaara.hadrons.org> <149719990964.27883.13021127452105787770.reportbug@seneca.home.org>
+Your message of Tue, 09 Dec 2014 13:21:11 +0100. <1900758.CgLNVPbY9N@liber>
+EOF
+cat <<EOF >EXPECTED
+BAD: <22597.31869.380767.339702@chiark.greenend.org.uk> (Ian Jackson's message of "Mon, 5 Dec 2016 14:41:01 +0000")
+BAD: <20170625141242.loaalhis2eodo66n@gaara.hadrons.org> <149719990964.27883.13021127452105787770.reportbug@seneca.home.org>
+BAD: Your message of Tue, 09 Dec 2014 13:21:11 +0100. <1900758.CgLNVPbY9N@liber>
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+
+test_done
diff --git a/test/message-id-parse.c b/test/message-id-parse.c
new file mode 100644
index 00000000..25decc9b
--- /dev/null
+++ b/test/message-id-parse.c
@@ -0,0 +1,24 @@
+#include <stdio.h>
+#include <talloc.h>
+#include "notmuch-private.h"
+
+int main(unused (int argc), unused (char **argv)){
+ char *line = NULL;
+ size_t len = 0;
+ ssize_t nread;
+ void *local = talloc_new (NULL);
+
+ while ((nread = getline(&line, &len, stdin)) != -1) {
+ int last = strlen(line) - 1;
+ if (line[last] == '\n')
+ line[last] = '\0';
+
+ char *mid = _notmuch_message_id_parse_strict (local, line);
+ if (mid)
+ printf("GOOD: %s\n", mid);
+ else
+ printf("BAD: %s\n", line);
+ }
+
+ talloc_free (local);
+}
--
2.18.0
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [PATCH 13/15] lib: add _notmuch_message_id_parse_strict
2018-07-30 22:45 ` [PATCH 13/15] lib: add _notmuch_message_id_parse_strict David Bremner
@ 2018-08-01 0:46 ` Amin Bandali
2018-08-01 4:58 ` [Patch v1.1] " David Bremner
1 sibling, 0 replies; 22+ messages in thread
From: Amin Bandali @ 2018-08-01 0:46 UTC (permalink / raw)
To: David Bremner, notmuch
This patch in the series appears to be corrupt:
--8<---------------cut here---------------start------------->8---
Applying: lib: add _notmuch_message_id_parse_strict
/home/amin/.emacs.d/.git/modules/notmuch/rebase-apply/patch:132: trailing whitespace.
<1530507300.raoomurnbf.astroid@strange.none>
error: corrupt patch at line 160
Patch failed at 0013 lib: add _notmuch_message_id_parse_strict
Use 'git am --show-current-patch' to see the failed patch
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".
--8<---------------cut here---------------end--------------->8---
Seems to be due to a linebreak in the middle of a message id in a
test file, whereby the line doesn't begin with a +.
-amin
^ permalink raw reply [flat|nested] 22+ messages in thread
* [Patch v1.1] lib: add _notmuch_message_id_parse_strict
2018-07-30 22:45 ` [PATCH 13/15] lib: add _notmuch_message_id_parse_strict David Bremner
2018-08-01 0:46 ` Amin Bandali
@ 2018-08-01 4:58 ` David Bremner
1 sibling, 0 replies; 22+ messages in thread
From: David Bremner @ 2018-08-01 4:58 UTC (permalink / raw)
To: notmuch
The idea is that if a message-id parses with this function, the MUA
generating it was probably sane, and in particular it's probably safe
to use the result as a parent from In-Reply-to.
---
replace ^M with ^L in tests, hopefully less vulnerable to well meaning
MTAs
lib/message-id.c | 32 ++++++++++++++++++
lib/notmuch-private.h | 14 ++++++++
test/Makefile.local | 6 +++-
test/T710-message-id.sh | 73 +++++++++++++++++++++++++++++++++++++++++
test/message-id-parse.c | 24 ++++++++++++++
5 files changed, 148 insertions(+), 1 deletion(-)
create mode 100755 test/T710-message-id.sh
create mode 100644 test/message-id-parse.c
diff --git a/lib/message-id.c b/lib/message-id.c
index d7541d50..a1dce9c8 100644
--- a/lib/message-id.c
+++ b/lib/message-id.c
@@ -1,4 +1,5 @@
#include "notmuch-private.h"
+#include "string-util.h"
/* Advance 'str' past any whitespace or RFC 822 comments. A comment is
* a (potentially nested) parenthesized sequence with '\' used to
@@ -94,3 +95,34 @@ _notmuch_message_id_parse (void *ctx, const char *message_id, const char **next)
return result;
}
+
+char *
+_notmuch_message_id_parse_strict (void *ctx, const char *message_id)
+{
+ const char *s, *end;
+ char *result;
+
+ if (message_id == NULL || *message_id == '\0')
+ return NULL;
+
+ s = skip_space (message_id);
+ if (*s == '<')
+ s++;
+ else
+ return NULL;
+
+ for (end = s; *end && *end != '>'; end++)
+ if (isspace (*end))
+ return NULL;
+
+ if (*end != '>')
+ return NULL;
+ else {
+ const char *last = skip_space (end+1);
+ if (*last != '\0')
+ return NULL;
+ }
+
+ result = talloc_strndup (ctx, s, end - s);
+ return result;
+}
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 083fc013..67fd4990 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -526,6 +526,20 @@ _notmuch_query_count_documents (notmuch_query_t *query,
char *
_notmuch_message_id_parse (void *ctx, const char *message_id, const char **next);
+/* Parse a message-id, discarding leading and trailing whitespace, and
+ * '<' and '>' delimiters.
+ *
+ * Apply a probably-stricter-than RFC definition of what is allowed in
+ * a message-id. In particular, forbid whitespace.
+ *
+ * Returns a newly talloc'ed string belonging to 'ctx'.
+ *
+ * Returns NULL if there is any error parsing the message-id.
+ */
+
+char *
+_notmuch_message_id_parse_strict (void *ctx, const char *message_id);
+
/* message.cc */
diff --git a/test/Makefile.local b/test/Makefile.local
index 1a0ab813..1cf09778 100644
--- a/test/Makefile.local
+++ b/test/Makefile.local
@@ -15,6 +15,9 @@ smtp_dummy_modules = $(smtp_dummy_srcs:.c=.o)
$(dir)/arg-test: $(dir)/arg-test.o command-line-arguments.o util/libnotmuch_util.a
$(call quiet,CC) $^ -o $@ $(LDFLAGS)
+$(dir)/message-id-parse: $(dir)/message-id-parse.o lib/libnotmuch.a util/libnotmuch_util.a
+ $(call quiet,CC) $^ -o $@ $(LDFLAGS) $(TALLOC_LDFLAGS)
+
$(dir)/hex-xcode: $(dir)/hex-xcode.o command-line-arguments.o util/libnotmuch_util.a
$(call quiet,CC) $^ -o $@ $(LDFLAGS) $(TALLOC_LDFLAGS)
@@ -50,7 +53,8 @@ test_main_srcs=$(dir)/arg-test.c \
$(dir)/smtp-dummy.c \
$(dir)/symbol-test.cc \
$(dir)/make-db-version.cc \
- $(dir)/ghost-report.cc
+ $(dir)/ghost-report.cc \
+ $(dir)/message-id-parse.c
test_srcs=$(test_main_srcs) $(dir)/database-test.c
diff --git a/test/T710-message-id.sh b/test/T710-message-id.sh
new file mode 100755
index 00000000..e73d6ba9
--- /dev/null
+++ b/test/T710-message-id.sh
@@ -0,0 +1,73 @@
+#!/usr/bin/env bash
+test_description="message id parsing"
+
+. $(dirname "$0")/test-lib.sh || exit 1
+
+test_begin_subtest "good message ids"
+${TEST_DIRECTORY}/message-id-parse <<EOF >OUTPUT
+<018b1a8f2d1df62e804ce88b65401304832dfbbf.1346614915.git.jani@nikula.org>
+<1530507300.raoomurnbf.astroid@strange.none>
+<1258787708-21121-2-git-send-email-keithp@keithp.com>
+EOF
+cat <<EOF >EXPECTED
+GOOD: 018b1a8f2d1df62e804ce88b65401304832dfbbf.1346614915.git.jani@nikula.org
+GOOD: 1530507300.raoomurnbf.astroid@strange.none
+GOOD: 1258787708-21121-2-git-send-email-keithp@keithp.com
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "leading and trailing space is OK"
+${TEST_DIRECTORY}/message-id-parse <<EOF >OUTPUT
+ <018b1a8f2d1df62e804ce88b65401304832dfbbf.1346614915.git.jani@nikula.org>
+<1530507300.raoomurnbf.astroid@strange.none>
+ <1258787708-21121-2-git-send-email-keithp@keithp.com>
+EOF
+cat <<EOF >EXPECTED
+GOOD: 018b1a8f2d1df62e804ce88b65401304832dfbbf.1346614915.git.jani@nikula.org
+GOOD: 1530507300.raoomurnbf.astroid@strange.none
+GOOD: 1258787708-21121-2-git-send-email-keithp@keithp.com
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "<> delimeters are required"
+${TEST_DIRECTORY}/message-id-parse <<EOF >OUTPUT
+018b1a8f2d1df62e804ce88b65401304832dfbbf.1346614915.git.jani@nikula.org>
+<1530507300.raoomurnbf.astroid@strange.none
+1258787708-21121-2-git-send-email-keithp@keithp.com
+EOF
+cat <<EOF >EXPECTED
+BAD: 018b1a8f2d1df62e804ce88b65401304832dfbbf.1346614915.git.jani@nikula.org>
+BAD: <1530507300.raoomurnbf.astroid@strange.none
+BAD: 1258787708-21121-2-git-send-email-keithp@keithp.com
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "embedded whitespace is forbidden"
+${TEST_DIRECTORY}/message-id-parse <<EOF >OUTPUT
+<018b1a8f2d1df62e804ce88b65401304832dfbbf.1346614915 .git.jani@nikula.org>
+<1530507300.raoomurnbf.astroid @strange.none>
+<1258787708-21121-\f2-git-send-email-keithp@keithp.com>
+EOF
+cat <<EOF >EXPECTED
+BAD: <018b1a8f2d1df62e804ce88b65401304832dfbbf.1346614915 .git.jani@nikula.org>
+BAD: <1530507300.raoomurnbf.astroid @strange.none>
+BAD: <1258787708-21121-\f2-git-send-email-keithp@keithp.com>
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+
+test_begin_subtest "folded real life bad In-Reply-To values"
+${TEST_DIRECTORY}/message-id-parse <<EOF >OUTPUT
+<22597.31869.380767.339702@chiark.greenend.org.uk> (Ian Jackson's message of "Mon, 5 Dec 2016 14:41:01 +0000")
+<20170625141242.loaalhis2eodo66n@gaara.hadrons.org> <149719990964.27883.13021127452105787770.reportbug@seneca.home.org>
+Your message of Tue, 09 Dec 2014 13:21:11 +0100. <1900758.CgLNVPbY9N@liber>
+EOF
+cat <<EOF >EXPECTED
+BAD: <22597.31869.380767.339702@chiark.greenend.org.uk> (Ian Jackson's message of "Mon, 5 Dec 2016 14:41:01 +0000")
+BAD: <20170625141242.loaalhis2eodo66n@gaara.hadrons.org> <149719990964.27883.13021127452105787770.reportbug@seneca.home.org>
+BAD: Your message of Tue, 09 Dec 2014 13:21:11 +0100. <1900758.CgLNVPbY9N@liber>
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+
+test_done
diff --git a/test/message-id-parse.c b/test/message-id-parse.c
new file mode 100644
index 00000000..25decc9b
--- /dev/null
+++ b/test/message-id-parse.c
@@ -0,0 +1,24 @@
+#include <stdio.h>
+#include <talloc.h>
+#include "notmuch-private.h"
+
+int main(unused (int argc), unused (char **argv)){
+ char *line = NULL;
+ size_t len = 0;
+ ssize_t nread;
+ void *local = talloc_new (NULL);
+
+ while ((nread = getline(&line, &len, stdin)) != -1) {
+ int last = strlen(line) - 1;
+ if (line[last] == '\n')
+ line[last] = '\0';
+
+ char *mid = _notmuch_message_id_parse_strict (local, line);
+ if (mid)
+ printf("GOOD: %s\n", mid);
+ else
+ printf("BAD: %s\n", line);
+ }
+
+ talloc_free (local);
+}
--
2.18.0
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 14/15] lib: change parent strategy to use In-Reply-To if it looks sane
2018-07-30 22:45 Threading patches v2 David Bremner
` (12 preceding siblings ...)
2018-07-30 22:45 ` [PATCH 13/15] lib: add _notmuch_message_id_parse_strict David Bremner
@ 2018-07-30 22:45 ` David Bremner
2018-07-30 22:45 ` [PATCH 15/15] test: add known broken test for multiple thread terms per message David Bremner
2018-08-01 14:53 ` Threading patches v2 Gregor Zattler
15 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2018-07-30 22:45 UTC (permalink / raw)
To: notmuch
As reported by Sean Whitton, there are mailers (in particular the
Debian Bug Tracking System) that have sensible In-Reply-To headers,
but un-useful-for-notmuch References (in particular with the BTS, the
oldest reference is last). I looked at a sample of about 200K
messages, and only about 0.5% these had something other than a single
message-id in In-Reply-To. On this basis, if we see a single
message-id in In-Reply-To, consider that as authoritative.
---
lib/add-message.cc | 20 +++++++++++++++-----
test/T510-thread-replies.sh | 1 -
2 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/lib/add-message.cc b/lib/add-message.cc
index f5fac8be..da37032c 100644
--- a/lib/add-message.cc
+++ b/lib/add-message.cc
@@ -227,7 +227,7 @@ _notmuch_database_link_message_to_parents (notmuch_database_t *notmuch,
const char **thread_id)
{
GHashTable *parents = NULL;
- const char *refs, *in_reply_to, *in_reply_to_message_id;
+ const char *refs, *in_reply_to, *in_reply_to_message_id, *strict_message_id = NULL;
const char *last_ref_message_id, *this_message_id;
GList *l, *keys = NULL;
notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
@@ -242,14 +242,24 @@ _notmuch_database_link_message_to_parents (notmuch_database_t *notmuch,
parents, refs);
in_reply_to = _notmuch_message_file_get_header (message_file, "in-reply-to");
+ if (in_reply_to)
+ strict_message_id = _notmuch_message_id_parse_strict (message,
+ in_reply_to);
+
in_reply_to_message_id = parse_references (message,
this_message_id,
parents, in_reply_to);
- /* For the parent of this message, use the last message ID of the
- * References header, if available. If not, fall back to the
- * first message ID in the In-Reply-To header. */
- if (last_ref_message_id) {
+ /* For the parent of this message, use
+ * 1) the In-Reply-To header, if it looks sane, otherwise
+ * 2) the last message ID of the References header, if available.
+ * 3) Otherwise, fall back to the first message ID in
+ * the In-Reply-To header.
+ */
+
+ if (strict_message_id) {
+ _notmuch_message_add_term (message, "replyto", strict_message_id);
+ } else if (last_ref_message_id) {
_notmuch_message_add_term (message, "replyto",
last_ref_message_id);
} else if (in_reply_to_message_id) {
diff --git a/test/T510-thread-replies.sh b/test/T510-thread-replies.sh
index d94c9a0c..f5ee81fe 100755
--- a/test/T510-thread-replies.sh
+++ b/test/T510-thread-replies.sh
@@ -190,7 +190,6 @@ EOF
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "trusting reply-to (tree view)"
-test_subtest_known_broken
test_emacs '(notmuch-tree "id:B00-root@example.org")
(notmuch-test-wait)
(test-output)
--
2.18.0
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 15/15] test: add known broken test for multiple thread terms per message
2018-07-30 22:45 Threading patches v2 David Bremner
` (13 preceding siblings ...)
2018-07-30 22:45 ` [PATCH 14/15] lib: change parent strategy to use In-Reply-To if it looks sane David Bremner
@ 2018-07-30 22:45 ` David Bremner
2018-08-01 14:53 ` Threading patches v2 Gregor Zattler
15 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2018-07-30 22:45 UTC (permalink / raw)
To: notmuch
Having multiple thread terms on a message document seems to be the
underlying cause of some confusing results from notmuch search.
The presence of these multiple thread terms is presumably an indexing
bug, related to multiple files with the same message-id.
The files here are synthesized from a reproducer for the problems in
id:1523007700.l8xm6nm6af.naveen@linux.ibm.com. It isn't quite clear
this is the same issue (the symptoms using notmuch-search are a bit
different).
---
test/.gitignore | 1 +
test/Makefile.local | 7 ++++++-
test/T720-database-schema.sh | 16 ++++++++++++++++
test/corpora/threading/mutant-ref/file1 | 9 +++++++++
test/corpora/threading/mutant-ref/file2 | 9 +++++++++
test/corpora/threading/mutant-ref/file3 | 9 +++++++++
test/corpora/threading/mutant-ref/file4 | 7 +++++++
test/corpora/threading/mutant-ref/file5 | 7 +++++++
test/term-report.cc | 22 ++++++++++++++++++++++
9 files changed, 86 insertions(+), 1 deletion(-)
create mode 100755 test/T720-database-schema.sh
create mode 100644 test/corpora/threading/mutant-ref/file1
create mode 100644 test/corpora/threading/mutant-ref/file2
create mode 100644 test/corpora/threading/mutant-ref/file3
create mode 100644 test/corpora/threading/mutant-ref/file4
create mode 100644 test/corpora/threading/mutant-ref/file5
create mode 100644 test/term-report.cc
diff --git a/test/.gitignore b/test/.gitignore
index 73fe7e24..71bbd7ed 100644
--- a/test/.gitignore
+++ b/test/.gitignore
@@ -8,4 +8,5 @@
/make-db-version
/test-results
/ghost-report
+/term-report
/tmp.*
diff --git a/test/Makefile.local b/test/Makefile.local
index 1cf09778..c39feace 100644
--- a/test/Makefile.local
+++ b/test/Makefile.local
@@ -44,6 +44,9 @@ $(dir)/make-db-version: $(dir)/make-db-version.o
$(dir)/ghost-report: $(dir)/ghost-report.o
$(call quiet,CXX) $^ -o $@ $(LDFLAGS) $(XAPIAN_LDFLAGS)
+$(dir)/term-report: $(dir)/term-report.o
+ $(call quiet,CXX) $^ -o $@ $(LDFLAGS) $(XAPIAN_LDFLAGS)
+
.PHONY: test check
test_main_srcs=$(dir)/arg-test.c \
@@ -54,7 +57,9 @@ test_main_srcs=$(dir)/arg-test.c \
$(dir)/symbol-test.cc \
$(dir)/make-db-version.cc \
$(dir)/ghost-report.cc \
- $(dir)/message-id-parse.c
+ $(dir)/message-id-parse.c \
+ $(dir)/term-report.cc
+
test_srcs=$(test_main_srcs) $(dir)/database-test.c
diff --git a/test/T720-database-schema.sh b/test/T720-database-schema.sh
new file mode 100755
index 00000000..a6a99239
--- /dev/null
+++ b/test/T720-database-schema.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+test_description="database schema in lib/database.cc"
+
+. $(dirname "$0")/test-lib.sh || exit 1
+
+add_email_corpus threading
+
+test_begin_subtest "every document has at most one thread term"
+test_subtest_known_broken
+${TEST_DIRECTORY}/term-report ${MAIL_DIR}/.notmuch/xapian | perl -ane 'pop(@F); printf "%d\n",scalar(grep { m/^G/ } @F);' | sort -u > OUTPUT
+cat <<EOF >> EXPECTED
+0
+1
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+test_done
diff --git a/test/corpora/threading/mutant-ref/file1 b/test/corpora/threading/mutant-ref/file1
new file mode 100644
index 00000000..97f8db58
--- /dev/null
+++ b/test/corpora/threading/mutant-ref/file1
@@ -0,0 +1,9 @@
+From: Alice <alice@example.org>
+To: Daniel <daniel@example.org>
+Subject: leaf message
+In-Reply-To: <mutant-ref-parent1@example.org>
+References: <mutant-ref-parent1@example.org>
+Message-ID: <mutant-ref-leaf@example.org>
+Date: Thu, 16 Jun 2016 22:14:41 -0400
+
+body
diff --git a/test/corpora/threading/mutant-ref/file2 b/test/corpora/threading/mutant-ref/file2
new file mode 100644
index 00000000..2b2ccd1d
--- /dev/null
+++ b/test/corpora/threading/mutant-ref/file2
@@ -0,0 +1,9 @@
+From: Alice <alice@example.org>
+To: Daniel <daniel@example.org>
+Subject: leaf message
+In-Reply-To: <mutant-ref-parent2@example.org>
+References: <mutant-ref-parent2@example.org>
+Message-ID: <mutant-ref-leaf@example.org>
+Date: Thu, 16 Jun 2016 22:14:41 -0400
+
+body
diff --git a/test/corpora/threading/mutant-ref/file3 b/test/corpora/threading/mutant-ref/file3
new file mode 100644
index 00000000..a8e705bc
--- /dev/null
+++ b/test/corpora/threading/mutant-ref/file3
@@ -0,0 +1,9 @@
+From: Alice <alice@example.org>
+To: Daniel <daniel@example.org>
+Subject: leaf message
+In-Reply-To: <mutant-ref-parent3@example.org>
+References: <mutant-ref-parent3@example.org>
+Message-ID: <mutant-ref-leaf@example.org>
+Date: Thu, 16 Jun 2016 22:14:41 -0400
+
+body
diff --git a/test/corpora/threading/mutant-ref/file4 b/test/corpora/threading/mutant-ref/file4
new file mode 100644
index 00000000..3a0a5a13
--- /dev/null
+++ b/test/corpora/threading/mutant-ref/file4
@@ -0,0 +1,7 @@
+From: Daniel <daniel@example.org>
+To: Alice <alice@example.org>
+Subject: existing parent
+Message-ID: <mutant-ref-parent2@example.org>
+Date: Fri, 17 Jun 2016 22:14:41 -0400
+
+body
diff --git a/test/corpora/threading/mutant-ref/file5 b/test/corpora/threading/mutant-ref/file5
new file mode 100644
index 00000000..8f525d63
--- /dev/null
+++ b/test/corpora/threading/mutant-ref/file5
@@ -0,0 +1,7 @@
+From: Daniel <daniel@example.org>
+To: Alice <alice@example.org>
+Subject: existing parent
+Message-ID: <mutant-ref-parent3@example.org>
+Date: Fri, 17 Jun 2016 22:14:41 -0400
+
+body
diff --git a/test/term-report.cc b/test/term-report.cc
new file mode 100644
index 00000000..88cd1bf5
--- /dev/null
+++ b/test/term-report.cc
@@ -0,0 +1,22 @@
+#include <iostream>
+#include <cstdlib>
+#include <xapian.h>
+
+int main(int argc, char **argv) {
+
+ if (argc < 2) {
+ std::cerr << "usage: term-report xapian-dir" << std::endl;
+ exit(1);
+ }
+
+ Xapian::Database db(argv[1]);
+ for (Xapian::docid id(1); id < db.get_lastdocid(); id++) {
+ std::cout << id;
+ for (Xapian::TermIterator iter = db.termlist_begin(id);
+ iter != db.termlist_end(id);
+ iter++) {
+ std::cout << " " << *iter;
+ }
+ std::cout << std::endl;
+ }
+}
--
2.18.0
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: Threading patches v2
2018-07-30 22:45 Threading patches v2 David Bremner
` (14 preceding siblings ...)
2018-07-30 22:45 ` [PATCH 15/15] test: add known broken test for multiple thread terms per message David Bremner
@ 2018-08-01 14:53 ` Gregor Zattler
2018-08-27 1:53 ` [PATCH] WIP: sort top level messages in thread David Bremner
15 siblings, 1 reply; 22+ messages in thread
From: Gregor Zattler @ 2018-08-01 14:53 UTC (permalink / raw)
To: notmuch
Hi David,
* David Bremner <david@tethera.net> [2018-07-31; 06:45]:
> This series obsoletes the patch (series)
>
> id:20180727083720.19909-1-david@tethera.net
> id:20180723082259.32440-2-david@tethera.net
> id:20180720233746.2844-1-david@tethera.net
>
> It also includes a proposed fix for the problem reported by Gregor in
>
> id:87y3ducffj.fsf@len.workgroup
I applied the patch set and it solves the problem for the first
thread shown in id:87fu05p2iu.fsf@len.workgroup
For me this fixes the more important issue of the two.
It does not fix the ordering in the second example:
[support.xxxxxxxxxxx-xxxxxxxxx-xxxxxxxxx.de #33575] AutoReply: FYI: xxxx xxxxxxx xxxxxxxxxxxx xxx
1 via RT <rt-xxx@xxxxxxx-xxxxxxxxxxxxxxxxx-xxxxxxxxx-xxxxxxxxx.de> (Tue, 26 Jun 2018 14:44:24 +0200) (inbox)
--> Subject: [support.xxxxxxxxxxx-xxxxxxxxx-xxxxxxxxx.de #33575] Resolved: FYI: xxxx xxxxxxx xxxxxxxxxxxx xxx
2 Gregor Zattler <g.zattler@xxxxxxx-xxxxxxxxx.de> (Tue, 19 Jun 2018 18:26:26 +0200) (inbox)
Subject: FYI: xxxx xxxxxxx xxxxxxxxxxxx xxx
2a via RT <rt-xxx@xxxxxxx-xxxxxxxxxxxxxxxxx-xxxxxxxxx-xxxxxxxxx.de> (Tue, 19 Jun 2018 18:26:36 +0200) (inbox)
Subject: [support.xxxxxxxxxxx-xxxxxxxxx-xxxxxxxxx.de #33575] AutoReply: FYI: xxxx xxxxxxx xxxxxxxxxxxx xxx
I have to admit, this is not one thread if one accounts for
threads only with respect to References -headers.
But even if it's two threads we have two level 1 emails (1, 2) and
I would expect the level 1 emails to be sorted according to
date, the whole tree of sibling emails to every level 1 email
behind and indented relative to it's level 1 email:
2
2a
1
This is, what mutt does.
Thanks for your efforts, it's much easier now to follow email
discussions.
Ciao; Gregor
--
-... --- .-. . -.. ..--.. ...-.-
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH] WIP: sort top level messages in thread
2018-08-01 14:53 ` Threading patches v2 Gregor Zattler
@ 2018-08-27 1:53 ` David Bremner
2018-08-27 13:44 ` Gregor Zattler
0 siblings, 1 reply; 22+ messages in thread
From: David Bremner @ 2018-08-27 1:53 UTC (permalink / raw)
To: Gregor Zattler, notmuch
this needs a test, and memory de-allocation of the replaced
lists. Currently it creates a fair amount of garbage.
---
Hi Gregor;
Sorry for the lack of reply, some vacation intervened. Can you test
this patch? I think it fixes the second thread also.
lib/message.cc | 43 +++++++++++++++++++++++--------------------
lib/notmuch-private.h | 5 +++--
lib/thread.cc | 9 ++-------
3 files changed, 28 insertions(+), 29 deletions(-)
diff --git a/lib/message.cc b/lib/message.cc
index 3c547298..f329be20 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -639,34 +639,37 @@ _cmpmsg (const void *pa, const void *pb)
return (int) difftime (time_a, time_b);
}
-void
-_notmuch_message_sort_subtree (notmuch_message_t *root)
-{
- size_t child_count = 0;
- size_t child_capacity = 16;
- notmuch_message_t **children = talloc_zero_array (root, notmuch_message_t *, child_capacity);
+notmuch_message_list_t *
+_notmuch_message_sort_subtrees (notmuch_message_list_t *list) {
+
+ size_t count = 0;
+ size_t capacity = 16;
- for (notmuch_messages_t *messages = _notmuch_messages_create (root->replies);
+ if (!list)
+ return list;
+
+ notmuch_message_t **message_array = talloc_zero_array (list, notmuch_message_t *, capacity);
+
+ for (notmuch_messages_t *messages = _notmuch_messages_create (list);
notmuch_messages_valid (messages);
notmuch_messages_move_to_next (messages)) {
- notmuch_message_t *child = notmuch_messages_get (messages);
- if (child_count >= child_capacity) {
- child_capacity *= 2;
- children = talloc_realloc (root, children, notmuch_message_t *, child_capacity);
+ notmuch_message_t *root = notmuch_messages_get (messages);
+ if (count >= capacity) {
+ capacity *= 2;
+ message_array = talloc_realloc (root, message_array, notmuch_message_t *, capacity);
}
- children[child_count++] = child;
- _notmuch_message_sort_subtree (child);
+ message_array[count++] = root;
+ root->replies = _notmuch_message_sort_subtrees (root->replies);
}
- notmuch_message_list_t *new_replies = _notmuch_message_list_create (root);
+ notmuch_message_list_t *new_list = _notmuch_message_list_create (list);
- qsort (children, child_count, sizeof (notmuch_message_t *), _cmpmsg);
- for (size_t i=0; i<child_count; i++){
- _notmuch_message_list_add_message (new_replies, children[i]);
+ qsort (message_array, count, sizeof (notmuch_message_t *), _cmpmsg);
+ for (size_t i=0; i<count; i++){
+ _notmuch_message_list_add_message (new_list, message_array[i]);
}
- talloc_free (root->replies);
- root->replies = new_replies;
- talloc_free (children);
+ talloc_free (message_array);
+ return new_list;
}
notmuch_messages_t *
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 67fd4990..64f4e982 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -558,8 +558,9 @@ size_t _notmuch_message_get_thread_depth (notmuch_message_t *message);
void
_notmuch_message_label_depths (notmuch_message_t *message,
size_t depth);
-void
-_notmuch_message_sort_subtree (notmuch_message_t *message);
+
+notmuch_message_list_t *
+_notmuch_message_sort_subtrees (notmuch_message_list_t *list);
/* sha1.c */
diff --git a/lib/thread.cc b/lib/thread.cc
index a5047103..e9e15a5c 100644
--- a/lib/thread.cc
+++ b/lib/thread.cc
@@ -505,13 +505,8 @@ _resolve_thread_relationships (notmuch_thread_t *thread)
_notmuch_message_list_add_message (thread->toplevel_list, message);
}
- for (notmuch_messages_t *messages = _notmuch_messages_create (thread->toplevel_list);
- notmuch_messages_valid (messages);
- notmuch_messages_move_to_next (messages))
- {
- notmuch_message_t *message = notmuch_messages_get (messages);
- _notmuch_message_sort_subtree (message);
- }
+ /* XXX this creates garbage */
+ thread->toplevel_list = _notmuch_message_sort_subtrees (thread->toplevel_list);
talloc_free (local);
}
--
2.18.0
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [PATCH] WIP: sort top level messages in thread
2018-08-27 1:53 ` [PATCH] WIP: sort top level messages in thread David Bremner
@ 2018-08-27 13:44 ` Gregor Zattler
2018-08-28 21:33 ` Amin Bandali
0 siblings, 1 reply; 22+ messages in thread
From: Gregor Zattler @ 2018-08-27 13:44 UTC (permalink / raw)
To: notmuch
Hi David,
* David Bremner <david@tethera.net> [2018-08-26; 22:53]:
> this needs a test, and memory de-allocation of the replaced
> lists. Currently it creates a fair amount of garbage.
> ---
> Sorry for the lack of reply, some vacation intervened.
Thanks, hope it was relaxing.
> Can you test
> this patch? I think it fixes the second thread also.
The patch applied cleanly on top of the others (which I use
regularly with no problems) and fixed the threading order problem
in the two test cases I send you. Other RT-emails are also
ordered nicely now.
Thanks for this patch, I'll also use it regularly.
Gregor
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH] WIP: sort top level messages in thread
2018-08-27 13:44 ` Gregor Zattler
@ 2018-08-28 21:33 ` Amin Bandali
0 siblings, 0 replies; 22+ messages in thread
From: Amin Bandali @ 2018-08-28 21:33 UTC (permalink / raw)
To: Gregor Zattler, notmuch
Hi Gregor, David,
> The patch applied cleanly on top of the others (which I use
> regularly with no problems) and fixed the threading order problem
> in the two test cases I send you. Other RT-emails are also
> ordered nicely now.
>
> Thanks for this patch, I'll also use it regularly.
Seconded. Thanks for your work on the patch, David. It seems to
nicely order my RT tickets too, including the very first message
in each thread.
I'll continue using the patch and will report back if I run into
any issues.
-amin
^ permalink raw reply [flat|nested] 22+ messages in thread