unofficial mirror of notmuch@notmuchmail.org
 help / color / mirror / code / Atom feed
* [PATCH] test: add known broken test for duplicate thread-id terms
@ 2021-05-15 13:05 David Bremner
  2021-05-15 18:40 ` [PATCH] lib/n_d_index_file: re-use thread-id of existing message David Bremner
                   ` (3 more replies)
  0 siblings, 4 replies; 6+ messages in thread
From: David Bremner @ 2021-05-15 13:05 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

According to my bijection, this bug has been present since commit
411675a6ce in 2017. It is apparently harmless for regular use, but
does make notmuch crash when compiled with -DDEBUG_DATABASE_SANITY.
---
 test/T670-duplicate-mid.sh | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/test/T670-duplicate-mid.sh b/test/T670-duplicate-mid.sh
index 4e5672ab..3fd59ca5 100755
--- a/test/T670-duplicate-mid.sh
+++ b/test/T670-duplicate-mid.sh
@@ -2,10 +2,27 @@
 test_description="duplicate message ids"
 . $(dirname "$0")/test-lib.sh || exit 1
 
+test_require_external_prereq xapian-delve
+
 add_message '[id]="duplicate"' '[subject]="message 1" [filename]=copy1'
 add_message '[id]="duplicate"' '[subject]="message 2" [filename]=copy2'
 
 add_message '[id]="duplicate"' '[subject]="message 0" [filename]=copy0'
+
+test_begin_subtest 'at most 1 thread-id per xapian document'
+test_subtest_known_broken
+db=${MAIL_DIR}/.notmuch/xapian
+cp /dev/null OUTPUT.raw
+for doc in $(xapian-delve -1 -t '' "$db" | grep '^[1-9]'); do
+    xapian-delve -1 -r "$doc"  "$db" | grep '^G' | wc -l >> OUTPUT.raw
+done
+sort -u < OUTPUT.raw > OUTPUT
+cat <<EOF > EXPECTED
+0
+1
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
 test_begin_subtest 'search: first indexed subject preserved'
 cat <<EOF > EXPECTED
 thread:XXX   2001-01-05 [1/1(3)] Notmuch Test Suite; message 1 (inbox unread)
-- 
2.30.2

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH] lib/n_d_index_file: re-use thread-id of existing message
  2021-05-15 13:05 [PATCH] test: add known broken test for duplicate thread-id terms David Bremner
@ 2021-05-15 18:40 ` David Bremner
  2021-05-17  7:12 ` [PATCH] test: add known broken test for duplicate thread-id terms Tomi Ollila
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 6+ messages in thread
From: David Bremner @ 2021-05-15 18:40 UTC (permalink / raw)
  To: David Bremner, notmuch

This prevents the message document getting multiple thread-id terms
when there are multiple files with the same message-id.

This change shifts some thread ids, requiring adjustments to other tests.
---
 lib/add-message.cc            |  9 ++++++---
 test/T357-index-decryption.sh | 26 ++++++++++++--------------
 test/T670-duplicate-mid.sh    |  1 -
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/lib/add-message.cc b/lib/add-message.cc
index d4a00b17..b16748fd 100644
--- a/lib/add-message.cc
+++ b/lib/add-message.cc
@@ -404,14 +404,17 @@ static notmuch_status_t
 _notmuch_database_link_message (notmuch_database_t *notmuch,
 				notmuch_message_t *message,
 				notmuch_message_file_t *message_file,
-				bool is_ghost)
+				bool is_ghost,
+				bool is_new)
 {
     void *local = talloc_new (NULL);
     notmuch_status_t status;
     const char *thread_id = NULL;
 
     /* Check if the message already had a thread ID */
-    if (notmuch->features & NOTMUCH_FEATURE_GHOSTS) {
+    if (! is_new) {
+	thread_id = notmuch_message_get_thread_id (message);
+    } else if (notmuch->features & NOTMUCH_FEATURE_GHOSTS) {
 	if (is_ghost)
 	    thread_id = notmuch_message_get_thread_id (message);
     } else {
@@ -538,7 +541,7 @@ notmuch_database_index_file (notmuch_database_t *notmuch,
 	}
 
 	ret = _notmuch_database_link_message (notmuch, message,
-					      message_file, is_ghost);
+					      message_file, is_ghost, is_new);
 	if (ret)
 	    goto DONE;
 
diff --git a/test/T357-index-decryption.sh b/test/T357-index-decryption.sh
index b81bdfe1..3afefc2a 100755
--- a/test/T357-index-decryption.sh
+++ b/test/T357-index-decryption.sh
@@ -113,12 +113,10 @@ test_expect_equal \
     "$expected"
 
 # try inserting it with decryption, should appear as a single copy
-# (note: i think thread id skips 4 because of duplicate message-id
-# insertion, above)
 test_begin_subtest "message cleartext is present with insert --decrypt=true"
 notmuch insert --folder=sent --decrypt=true <<<"$contents"
-output=$(notmuch search wumpus)
-expected='thread:0000000000000005   2000-01-01 [1/1] Notmuch Test Suite; test encrypted message for cleartext index 002 (encrypted inbox unread)'
+output=$(notmuch search wumpus | notmuch_search_sanitize)
+expected='thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; test encrypted message for cleartext index 002 (encrypted inbox unread)'
 test_expect_equal \
     "$output" \
     "$expected"
@@ -128,9 +126,9 @@ test_expect_equal \
 test_begin_subtest 'tagging all messages'
 test_expect_success 'notmuch tag +blarney "encrypted message"'
 test_begin_subtest "verify that tags have not changed"
-output=$(notmuch search tag:blarney)
-expected='thread:0000000000000001   2000-01-01 [1/1] Notmuch Test Suite; test encrypted message for cleartext index 001 (blarney encrypted inbox)
-thread:0000000000000005   2000-01-01 [1/1] Notmuch Test Suite; test encrypted message for cleartext index 002 (blarney encrypted inbox unread)'
+output=$(notmuch search tag:blarney | notmuch_search_sanitize)
+expected='thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; test encrypted message for cleartext index 001 (blarney encrypted inbox)
+thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; test encrypted message for cleartext index 002 (blarney encrypted inbox unread)'
 test_expect_equal \
     "$output" \
     "$expected"
@@ -139,14 +137,14 @@ test_expect_equal \
 test_begin_subtest 'reindex old messages'
 test_expect_success 'notmuch reindex --decrypt=true tag:encrypted and not property:index.decryption=success'
 test_begin_subtest "reindexed encrypted message, including cleartext"
-output=$(notmuch search wumpus)
+output=$(notmuch search wumpus | notmuch_search_sanitize)
 test_expect_equal \
     "$output" \
     "$expected"
 
 # and the same search, but by property ($expected is untouched):
 test_begin_subtest "emacs search by property for both messages"
-output=$(notmuch search property:index.decryption=success)
+output=$(notmuch search property:index.decryption=success | notmuch_search_sanitize)
 test_expect_equal \
     "$output" \
     "$expected"
@@ -155,7 +153,7 @@ test_expect_equal \
 test_begin_subtest 'reindex in auto mode'
 test_expect_success 'notmuch reindex tag:encrypted and property:index.decryption=success'
 test_begin_subtest "reindexed encrypted messages, should not have changed"
-output=$(notmuch search wumpus)
+output=$(notmuch search wumpus | notmuch_search_sanitize)
 test_expect_equal \
     "$output" \
     "$expected"
@@ -189,9 +187,9 @@ test_expect_equal \
 
 # ensure that the tags remain even when we are dropping the cleartext.
 test_begin_subtest "verify that tags remain without cleartext"
-output=$(notmuch search tag:blarney)
-expected='thread:0000000000000001   2000-01-01 [1/1] Notmuch Test Suite; test encrypted message for cleartext index 001 (blarney encrypted inbox)
-thread:0000000000000005   2000-01-01 [1/1] Notmuch Test Suite; test encrypted message for cleartext index 002 (blarney encrypted inbox unread)'
+output=$(notmuch search tag:blarney | notmuch_search_sanitize)
+expected='thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; test encrypted message for cleartext index 001 (blarney encrypted inbox)
+thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; test encrypted message for cleartext index 002 (blarney encrypted inbox unread)'
 test_expect_equal \
     "$output" \
     "$expected"
@@ -200,7 +198,7 @@ test_begin_subtest "index cleartext without keeping session keys"
 test_expect_success "notmuch reindex --decrypt=nostash tag:blarney"
 
 test_begin_subtest "Ensure that the indexed terms are present"
-output=$(notmuch search wumpus)
+output=$(notmuch search wumpus | notmuch_search_sanitize)
 test_expect_equal \
     "$output" \
     "$expected"
diff --git a/test/T670-duplicate-mid.sh b/test/T670-duplicate-mid.sh
index 3fd59ca5..5359e542 100755
--- a/test/T670-duplicate-mid.sh
+++ b/test/T670-duplicate-mid.sh
@@ -10,7 +10,6 @@ add_message '[id]="duplicate"' '[subject]="message 2" [filename]=copy2'
 add_message '[id]="duplicate"' '[subject]="message 0" [filename]=copy0'
 
 test_begin_subtest 'at most 1 thread-id per xapian document'
-test_subtest_known_broken
 db=${MAIL_DIR}/.notmuch/xapian
 cp /dev/null OUTPUT.raw
 for doc in $(xapian-delve -1 -t '' "$db" | grep '^[1-9]'); do
-- 
2.30.2

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] test: add known broken test for duplicate thread-id terms
  2021-05-15 13:05 [PATCH] test: add known broken test for duplicate thread-id terms David Bremner
  2021-05-15 18:40 ` [PATCH] lib/n_d_index_file: re-use thread-id of existing message David Bremner
@ 2021-05-17  7:12 ` Tomi Ollila
  2021-05-17  9:33 ` Michael J Gruber
  2021-05-22 12:40 ` David Bremner
  3 siblings, 0 replies; 6+ messages in thread
From: Tomi Ollila @ 2021-05-17  7:12 UTC (permalink / raw)
  To: David Bremner, notmuch; +Cc: David Bremner

On Sat, May 15 2021, David Bremner wrote:

> According to my bijection, this bug has been present since commit
> 411675a6ce in 2017. It is apparently harmless for regular use, but
> does make notmuch crash when compiled with -DDEBUG_DATABASE_SANITY.
> ---
>  test/T670-duplicate-mid.sh | 17 +++++++++++++++++
>  1 file changed, 17 insertions(+)
>
> diff --git a/test/T670-duplicate-mid.sh b/test/T670-duplicate-mid.sh
> index 4e5672ab..3fd59ca5 100755
> --- a/test/T670-duplicate-mid.sh
> +++ b/test/T670-duplicate-mid.sh
> @@ -2,10 +2,27 @@
>  test_description="duplicate message ids"
>  . $(dirname "$0")/test-lib.sh || exit 1
>  
> +test_require_external_prereq xapian-delve
> +
>  add_message '[id]="duplicate"' '[subject]="message 1" [filename]=copy1'
>  add_message '[id]="duplicate"' '[subject]="message 2" [filename]=copy2'
>  
>  add_message '[id]="duplicate"' '[subject]="message 0" [filename]=copy0'
> +
> +test_begin_subtest 'at most 1 thread-id per xapian document'
> +test_subtest_known_broken
> +db=${MAIL_DIR}/.notmuch/xapian
> +cp /dev/null OUTPUT.raw
> +for doc in $(xapian-delve -1 -t '' "$db" | grep '^[1-9]'); do
> +    xapian-delve -1 -r "$doc"  "$db" | grep '^G' | wc -l >> OUTPUT.raw
> +done

for doc in $(xapian-delve -1 -t '' "$db" | grep '^[1-9]'); do
    xapian-delve -1 -r "$doc" "$db" | grep -c '^G'
done > OUTPUT.raw

will do (w/o cp /dev/null OUTPUT.raw) (also NOTE SPACING !!! >;D)


> +sort -u < OUTPUT.raw > OUTPUT
> +cat <<EOF > EXPECTED
> +0
> +1
> +EOF
> +test_expect_equal_file EXPECTED OUTPUT
> +
>  test_begin_subtest 'search: first indexed subject preserved'
>  cat <<EOF > EXPECTED
>  thread:XXX   2001-01-05 [1/1(3)] Notmuch Test Suite; message 1 (inbox unread)
> -- 
> 2.30.2

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] test: add known broken test for duplicate thread-id terms
  2021-05-15 13:05 [PATCH] test: add known broken test for duplicate thread-id terms David Bremner
  2021-05-15 18:40 ` [PATCH] lib/n_d_index_file: re-use thread-id of existing message David Bremner
  2021-05-17  7:12 ` [PATCH] test: add known broken test for duplicate thread-id terms Tomi Ollila
@ 2021-05-17  9:33 ` Michael J Gruber
  2021-05-17 10:02   ` David Bremner
  2021-05-22 12:40 ` David Bremner
  3 siblings, 1 reply; 6+ messages in thread
From: Michael J Gruber @ 2021-05-17  9:33 UTC (permalink / raw)
  To: David Bremner, notmuch; +Cc: David Bremner

David Bremner venit, vidit, dixit 2021-05-15 15:05:07:
> According to my bijection, this bug has been present since commit
> 411675a6ce in 2017. It is apparently harmless for regular use, but
> does make notmuch crash when compiled with -DDEBUG_DATABASE_SANITY.

If a message "belongs" to 2 threads, doesn't it mean that it "belongs"
to at least 1 that it doesn't really belong to?

The recent problem with "db update in pre-new hook" surfaced as extra
thread assignments for the wrong messages (making them show up the wrong
thread, too).

I meanwhile noticed that they are more common, but am wondering whether
they are really harmless?

Michael

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] test: add known broken test for duplicate thread-id terms
  2021-05-17  9:33 ` Michael J Gruber
@ 2021-05-17 10:02   ` David Bremner
  0 siblings, 0 replies; 6+ messages in thread
From: David Bremner @ 2021-05-17 10:02 UTC (permalink / raw)
  To: Michael J Gruber, notmuch

Michael J Gruber <git@grubix.eu> writes:

> David Bremner venit, vidit, dixit 2021-05-15 15:05:07:
>> According to my bijection, this bug has been present since commit
>> 411675a6ce in 2017. It is apparently harmless for regular use, but
>> does make notmuch crash when compiled with -DDEBUG_DATABASE_SANITY.
>
> If a message "belongs" to 2 threads, doesn't it mean that it "belongs"
> to at least 1 that it doesn't really belong to?

Sure, conceptually a message should belong to only one thread. But it
isn't clear what having an extra thread-id attached to a message really
means. As far as I can tell, _notmuch_message_get_term will just always
choose the lexicographically first (i.e. minimum) term.

> The recent problem with "db update in pre-new hook" surfaced as extra
> thread assignments for the wrong messages (making them show up the wrong
> thread, too).

To the best of my knowledge, this was caused by something else, namely
the in-memory view of last thread-id being out of sync with the database.

> I meanwhile noticed that they are more common, but am wondering whether
> they are really harmless?

It's possible that I missed some harmful effect. Anton reminded me on
IRC of some previous issues with threading that could conceivably be
related. I guess it would be more precise to say "Duplicate thread-id
terms have not been demonstrated to cause problems in regular use". I
mean, they're obviously a bug.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] test: add known broken test for duplicate thread-id terms
  2021-05-15 13:05 [PATCH] test: add known broken test for duplicate thread-id terms David Bremner
                   ` (2 preceding siblings ...)
  2021-05-17  9:33 ` Michael J Gruber
@ 2021-05-22 12:40 ` David Bremner
  3 siblings, 0 replies; 6+ messages in thread
From: David Bremner @ 2021-05-22 12:40 UTC (permalink / raw)
  To: notmuch

David Bremner <david@tethera.net> writes:

> According to my bijection, this bug has been present since commit
> 411675a6ce in 2017. It is apparently harmless for regular use, but
> does make notmuch crash when compiled with -DDEBUG_DATABASE_SANITY.

applied to master, with updates to commit message and shell script
fixes.

d

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-05-22 12:40 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-15 13:05 [PATCH] test: add known broken test for duplicate thread-id terms David Bremner
2021-05-15 18:40 ` [PATCH] lib/n_d_index_file: re-use thread-id of existing message David Bremner
2021-05-17  7:12 ` [PATCH] test: add known broken test for duplicate thread-id terms Tomi Ollila
2021-05-17  9:33 ` Michael J Gruber
2021-05-17 10:02   ` David Bremner
2021-05-22 12:40 ` David Bremner

Code repositories for project(s) associated with this public inbox

	https://yhetil.org/notmuch.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).