unofficial mirror of notmuch@notmuchmail.org
 help / color / mirror / code / Atom feed
* [PATCH 00/12] Add ghost messages and fix thread linking
@ 2014-10-06 23:17 Austin Clements
  2014-10-06 23:17 ` [PATCH v2 01/12] lib: Move message ID compression to _notmuch_message_create_for_message_id Austin Clements
                   ` (12 more replies)
  0 siblings, 13 replies; 21+ messages in thread
From: Austin Clements @ 2014-10-06 23:17 UTC (permalink / raw)
  To: notmuch

This is v2 of the
id:1412345958-8278-1-git-send-email-aclements@csail.mit.edu.  This
adds some comments and clarifies some code as suggested by David.
Patch 6 is new in v2 and adds some bit-twiddling macros for clarity
and robustness in later patches.

The diff from v1 is below.

diff --git a/lib/database.cc b/lib/database.cc
index 4655f59..6e51a72 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -1277,6 +1277,8 @@ notmuch_database_upgrade (notmuch_database_t *notmuch,
 	    ++total;
     }
     if (new_features & NOTMUCH_FEATURE_GHOSTS) {
+	/* The ghost message upgrade converts all thread_id_*
+	 * metadata values into ghost message documents. */
 	t_end = db->metadata_keys_end ("thread_id_");
 	for (t = db->metadata_keys_begin ("thread_id_"); t != t_end; ++t)
 	    ++total;
diff --git a/lib/message.cc b/lib/message.cc
index ad832cf..a7a13cc 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -344,15 +344,17 @@ _notmuch_message_ensure_metadata (notmuch_message_t *message)
 
     /* Get document type */
     assert (strcmp (id_prefix, type_prefix) < 0);
-    if (! (message->lazy_flags & (1 << NOTMUCH_MESSAGE_FLAG_GHOST))) {
+    if (! NOTMUCH_TEST_BIT (message->lazy_flags, NOTMUCH_MESSAGE_FLAG_GHOST)) {
 	i.skip_to (type_prefix);
+	/* "T" is the prefix "type" fields.  See
+	 * BOOLEAN_PREFIX_INTERNAL. */
 	if (*i == "Tmail")
-	    message->flags &= ~(1 << NOTMUCH_MESSAGE_FLAG_GHOST);
+	    NOTMUCH_CLEAR_BIT (&message->flags, NOTMUCH_MESSAGE_FLAG_GHOST);
 	else if (*i == "Tghost")
-	    message->flags |= (1 << NOTMUCH_MESSAGE_FLAG_GHOST);
+	    NOTMUCH_SET_BIT (&message->flags, NOTMUCH_MESSAGE_FLAG_GHOST);
 	else
 	    INTERNAL_ERROR ("Message without type term");
-	message->lazy_flags |= (1 << NOTMUCH_MESSAGE_FLAG_GHOST);
+	NOTMUCH_SET_BIT (&message->lazy_flags, NOTMUCH_MESSAGE_FLAG_GHOST);
     }
 
     /* Get filename list.  Here we get only the terms.  We lazily
@@ -390,8 +392,8 @@ _notmuch_message_invalidate_metadata (notmuch_message_t *message,
     }
 
     if (strcmp ("type", prefix_name) == 0) {
-	message->flags &= ~(1 << NOTMUCH_MESSAGE_FLAG_GHOST);
-	message->lazy_flags &= ~(1 << NOTMUCH_MESSAGE_FLAG_GHOST);
+	NOTMUCH_CLEAR_BIT (&message->flags, NOTMUCH_MESSAGE_FLAG_GHOST);
+	NOTMUCH_CLEAR_BIT (&message->lazy_flags, NOTMUCH_MESSAGE_FLAG_GHOST);
     }
 
     if (strcmp ("file-direntry", prefix_name) == 0) {
@@ -893,10 +895,10 @@ notmuch_message_get_flag (notmuch_message_t *message,
 			  notmuch_message_flag_t flag)
 {
     if (flag == NOTMUCH_MESSAGE_FLAG_GHOST &&
-	! (message->lazy_flags & (1 << flag)))
+	! NOTMUCH_TEST_BIT (message->lazy_flags, flag))
 	_notmuch_message_ensure_metadata (message);
 
-    return message->flags & (1 << flag);
+    return NOTMUCH_TEST_BIT (message->flags, flag);
 }
 
 void
@@ -904,10 +906,10 @@ notmuch_message_set_flag (notmuch_message_t *message,
 			  notmuch_message_flag_t flag, notmuch_bool_t enable)
 {
     if (enable)
-	message->flags |= (1 << flag);
+	NOTMUCH_SET_BIT (&message->flags, flag);
     else
-	message->flags &= ~(1 << flag);
-    message->lazy_flags |= (1 << flag);
+	NOTMUCH_CLEAR_BIT (&message->flags, flag);
+    NOTMUCH_SET_BIT (&message->lazy_flags, flag);
 }
 
 time_t
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 2fbd38e..2f43c1d 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -63,6 +63,17 @@ NOTMUCH_BEGIN_DECLS
 #define STRNCMP_LITERAL(var, literal) \
     strncmp ((var), (literal), sizeof (literal) - 1)
 
+/* Robust bit test/set/reset macros */
+#define NOTMUCH_TEST_BIT(val, bit) \
+    ((bit < 0 || bit >= CHAR_BIT * sizeof (unsigned long long)) ? 0	\
+     : !!((val) & (1ull << bit)))
+#define NOTMUCH_SET_BIT(valp, bit) \
+    ((bit < 0 || bit >= CHAR_BIT * sizeof (unsigned long long)) ? *(valp) \
+     : (*(valp) |= (1ull << bit)))
+#define NOTMUCH_CLEAR_BIT(valp,  bit) \
+    ((bit < 0 || bit >= CHAR_BIT * sizeof (unsigned long long)) ? *(valp) \
+     : (*(valp) &= ~(1ull << bit)))
+
 #define unused(x) x __attribute__ ((unused))
 
 #ifdef __cplusplus

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 01/12] lib: Move message ID compression to _notmuch_message_create_for_message_id
  2014-10-06 23:17 [PATCH 00/12] Add ghost messages and fix thread linking Austin Clements
@ 2014-10-06 23:17 ` Austin Clements
  2014-10-06 23:17 ` [PATCH v2 02/12] lib: Refactor _notmuch_database_link_message Austin Clements
                   ` (11 subsequent siblings)
  12 siblings, 0 replies; 21+ messages in thread
From: Austin Clements @ 2014-10-06 23:17 UTC (permalink / raw)
  To: notmuch

From: Austin Clements <amdragon@mit.edu>

Previously, this was performed by notmuch_database_add_message.  This
happens to be the only caller currently (which is why this was safe),
but we're about to introduce more callers, and it makes more sense to
put responsibility for ID compression in the lower-level function
rather than requiring each caller to handle it.
---
 lib/database.cc       | 16 ++++------------
 lib/message.cc        |  4 ++++
 lib/notmuch-private.h |  3 +++
 3 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/lib/database.cc b/lib/database.cc
index a47a71d..4e68706 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -390,8 +390,8 @@ find_document_for_doc_id (notmuch_database_t *notmuch, unsigned doc_id)
  *
  *	notmuch-sha1-<sha1_sum_of_message_id>
  */
-static char *
-_message_id_compressed (void *ctx, const char *message_id)
+char *
+_notmuch_message_id_compressed (void *ctx, const char *message_id)
 {
     char *sha1, *compressed;
 
@@ -415,7 +415,7 @@ notmuch_database_find_message (notmuch_database_t *notmuch,
 	return NOTMUCH_STATUS_NULL_POINTER;
 
     if (strlen (message_id) > NOTMUCH_MESSAGE_ID_MAX)
-	message_id = _message_id_compressed (notmuch, message_id);
+	message_id = _notmuch_message_id_compressed (notmuch, message_id);
 
     try {
 	status = _notmuch_database_find_unique_doc_id (notmuch, "id",
@@ -1728,7 +1728,7 @@ static char *
 _get_metadata_thread_id_key (void *ctx, const char *message_id)
 {
     if (strlen (message_id) > NOTMUCH_MESSAGE_ID_MAX)
-	message_id = _message_id_compressed (ctx, message_id);
+	message_id = _notmuch_message_id_compressed (ctx, message_id);
 
     return talloc_asprintf (ctx, NOTMUCH_METADATA_THREAD_ID_PREFIX "%s",
 			    message_id);
@@ -2100,14 +2100,6 @@ notmuch_database_add_message (notmuch_database_t *notmuch,
 	     * better than no message-id at all. */
 	    if (message_id == NULL)
 		message_id = talloc_strdup (message_file, header);
-
-	    /* If a message ID is too long, substitute its sha1 instead. */
-	    if (message_id && strlen (message_id) > NOTMUCH_MESSAGE_ID_MAX) {
-		char *compressed = _message_id_compressed (message_file,
-							   message_id);
-		talloc_free (message_id);
-		message_id = compressed;
-	    }
 	}
 
 	if (message_id == NULL ) {
diff --git a/lib/message.cc b/lib/message.cc
index 7e82548..bbfc250 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -226,6 +226,10 @@ _notmuch_message_create_for_message_id (notmuch_database_t *notmuch,
     else if (*status_ret)
 	return NULL;
 
+    /* If the message ID is too long, substitute its sha1 instead. */
+    if (strlen (message_id) > NOTMUCH_MESSAGE_ID_MAX)
+	message_id = _notmuch_message_id_compressed (message, message_id);
+
     term = talloc_asprintf (NULL, "%s%s",
 			    _find_prefix ("id"), message_id);
     if (term == NULL) {
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 17f3061..36cc12b 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -174,6 +174,9 @@ typedef struct _notmuch_doc_id_set notmuch_doc_id_set_t;
 const char *
 _find_prefix (const char *name);
 
+char *
+_notmuch_message_id_compressed (void *ctx, const char *message_id);
+
 notmuch_status_t
 _notmuch_database_ensure_writable (notmuch_database_t *notmuch);
 
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 02/12] lib: Refactor _notmuch_database_link_message
  2014-10-06 23:17 [PATCH 00/12] Add ghost messages and fix thread linking Austin Clements
  2014-10-06 23:17 ` [PATCH v2 01/12] lib: Move message ID compression to _notmuch_message_create_for_message_id Austin Clements
@ 2014-10-06 23:17 ` Austin Clements
  2014-10-06 23:17 ` [PATCH v2 03/12] lib: Handle empty date value Austin Clements
                   ` (10 subsequent siblings)
  12 siblings, 0 replies; 21+ messages in thread
From: Austin Clements @ 2014-10-06 23:17 UTC (permalink / raw)
  To: notmuch

From: Austin Clements <amdragon@mit.edu>

This moves the code to retrieve and clear the metadata thread ID out
of _notmuch_database_link_message into its own function.  This will
simplify future changes.
---
 lib/database.cc | 69 +++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 43 insertions(+), 26 deletions(-)

diff --git a/lib/database.cc b/lib/database.cc
index 4e68706..1c6ffc5 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -1958,6 +1958,37 @@ _notmuch_database_link_message_to_children (notmuch_database_t *notmuch,
     return ret;
 }
 
+/* Fetch and clear the stored thread_id for message, or NULL if none. */
+static char *
+_consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch,
+			     notmuch_message_t *message)
+{
+    const char *message_id;
+    string stored_id;
+    char *metadata_key;
+
+    message_id = notmuch_message_get_message_id (message);
+    metadata_key = _get_metadata_thread_id_key (ctx, message_id);
+
+    /* Check if we have already seen related messages to this one.
+     * If we have then use the thread_id that we stored at that time.
+     */
+    stored_id = notmuch->xapian_db->get_metadata (metadata_key);
+    if (stored_id.empty ()) {
+	return NULL;
+    } else {
+        Xapian::WritableDatabase *db;
+
+	db = static_cast <Xapian::WritableDatabase *> (notmuch->xapian_db);
+
+	/* Clear the metadata for this message ID. We don't need it
+	 * anymore. */
+        db->set_metadata (metadata_key, "");
+
+        return talloc_strdup (ctx, stored_id.c_str ());
+    }
+}
+
 /* Given a (mostly empty) 'message' and its corresponding
  * 'message_file' link it to existing threads in the database.
  *
@@ -1988,42 +2019,25 @@ _notmuch_database_link_message (notmuch_database_t *notmuch,
 				notmuch_message_t *message,
 				notmuch_message_file_t *message_file)
 {
+    void *local = talloc_new (NULL);
     notmuch_status_t status;
-    const char *message_id, *thread_id = NULL;
-    char *metadata_key;
-    string stored_id;
-
-    message_id = notmuch_message_get_message_id (message);
-    metadata_key = _get_metadata_thread_id_key (message, message_id);
-
-    /* Check if we have already seen related messages to this one.
-     * If we have then use the thread_id that we stored at that time.
-     */
-    stored_id = notmuch->xapian_db->get_metadata (metadata_key);
-    if (! stored_id.empty()) {
-        Xapian::WritableDatabase *db;
-
-	db = static_cast <Xapian::WritableDatabase *> (notmuch->xapian_db);
-
-	/* Clear the metadata for this message ID. We don't need it
-	 * anymore. */
-        db->set_metadata (metadata_key, "");
-        thread_id = stored_id.c_str();
+    const char *thread_id;
 
-        _notmuch_message_add_term (message, "thread", thread_id);
-    }
-    talloc_free (metadata_key);
+    /* Check if the message already had a thread ID */
+    thread_id = _consume_metadata_thread_id (local, notmuch, message);
+    if (thread_id)
+	_notmuch_message_add_term (message, "thread", thread_id);
 
     status = _notmuch_database_link_message_to_parents (notmuch, message,
 							message_file,
 							&thread_id);
     if (status)
-	return status;
+	goto DONE;
 
     status = _notmuch_database_link_message_to_children (notmuch, message,
 							 &thread_id);
     if (status)
-	return status;
+	goto DONE;
 
     /* If not part of any existing thread, generate a new thread ID. */
     if (thread_id == NULL) {
@@ -2032,7 +2046,10 @@ _notmuch_database_link_message (notmuch_database_t *notmuch,
 	_notmuch_message_add_term (message, "thread", thread_id);
     }
 
-    return NOTMUCH_STATUS_SUCCESS;
+ DONE:
+    talloc_free (local);
+
+    return status;
 }
 
 notmuch_status_t
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 03/12] lib: Handle empty date value
  2014-10-06 23:17 [PATCH 00/12] Add ghost messages and fix thread linking Austin Clements
  2014-10-06 23:17 ` [PATCH v2 01/12] lib: Move message ID compression to _notmuch_message_create_for_message_id Austin Clements
  2014-10-06 23:17 ` [PATCH v2 02/12] lib: Refactor _notmuch_database_link_message Austin Clements
@ 2014-10-06 23:17 ` Austin Clements
  2014-10-11  5:12   ` David Bremner
  2014-10-06 23:17 ` [PATCH v2 04/12] lib: Add a ghost messages database feature Austin Clements
                   ` (9 subsequent siblings)
  12 siblings, 1 reply; 21+ messages in thread
From: Austin Clements @ 2014-10-06 23:17 UTC (permalink / raw)
  To: notmuch

From: Austin Clements <amdragon@mit.edu>

In the interest of robustness, avoid undefined behavior of
sortable_unserialise if the date value is missing.  This shouldn't
happen now, but ghost messages will have blank date values.
---
 lib/message.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lib/message.cc b/lib/message.cc
index bbfc250..38bc929 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -896,6 +896,9 @@ notmuch_message_get_date (notmuch_message_t *message)
 	return 0;
     }
 
+    if (value.empty ())
+	/* sortable_unserialise is undefined on empty string */
+	return 0;
     return Xapian::sortable_unserialise (value);
 }
 
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 04/12] lib: Add a ghost messages database feature
  2014-10-06 23:17 [PATCH 00/12] Add ghost messages and fix thread linking Austin Clements
                   ` (2 preceding siblings ...)
  2014-10-06 23:17 ` [PATCH v2 03/12] lib: Handle empty date value Austin Clements
@ 2014-10-06 23:17 ` Austin Clements
  2014-10-06 23:17 ` [PATCH v2 05/12] lib: Update database schema doc for ghost messages Austin Clements
                   ` (8 subsequent siblings)
  12 siblings, 0 replies; 21+ messages in thread
From: Austin Clements @ 2014-10-06 23:17 UTC (permalink / raw)
  To: notmuch

From: Austin Clements <amdragon@mit.edu>

This will be implemented over the next several patches.  The feature
is not yet "enabled" (this does not add it to
NOTMUCH_FEATURES_CURRENT).
---
 lib/database-private.h | 7 +++++++
 lib/database.cc        | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/lib/database-private.h b/lib/database-private.h
index ca0751c..e2e4bc8 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -85,6 +85,13 @@ enum _notmuch_features {
      *
      * Introduced: version 2. */
     NOTMUCH_FEATURE_BOOL_FOLDER = 1 << 3,
+
+    /* If set, missing messages are stored in ghost mail documents.
+     * If unset, thread IDs of ghost messages are stored as database
+     * metadata instead of in ghost documents.
+     *
+     * Introduced: version 3. */
+    NOTMUCH_FEATURE_GHOSTS = 1 << 4,
 };
 
 /* In C++, a named enum is its own type, so define bitwise operators
diff --git a/lib/database.cc b/lib/database.cc
index 1c6ffc5..8fd7fad 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -286,6 +286,8 @@ static const struct {
       "from/subject/message-ID in database", "w" },
     { NOTMUCH_FEATURE_BOOL_FOLDER,
       "exact folder:/path: search", "rw" },
+    { NOTMUCH_FEATURE_GHOSTS,
+      "mail documents for missing messages", "w"},
 };
 
 const char *
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 05/12] lib: Update database schema doc for ghost messages
  2014-10-06 23:17 [PATCH 00/12] Add ghost messages and fix thread linking Austin Clements
                   ` (3 preceding siblings ...)
  2014-10-06 23:17 ` [PATCH v2 04/12] lib: Add a ghost messages database feature Austin Clements
@ 2014-10-06 23:17 ` Austin Clements
  2014-10-06 23:17 ` [PATCH v2 06/12] lib: Introduce macros for bit operations Austin Clements
                   ` (7 subsequent siblings)
  12 siblings, 0 replies; 21+ messages in thread
From: Austin Clements @ 2014-10-06 23:17 UTC (permalink / raw)
  To: notmuch

From: Austin Clements <amdragon@mit.edu>

This describes the structure of ghost mail documents.  Ghost messages
are not yet implemented.
---
 lib/database.cc | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/lib/database.cc b/lib/database.cc
index 8fd7fad..c641bcd 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -50,8 +50,8 @@ typedef struct {
 
 /* Here's the current schema for our database (for NOTMUCH_DATABASE_VERSION):
  *
- * We currently have two different types of documents (mail and
- * directory) and also some metadata.
+ * We currently have three different types of documents (mail, ghost,
+ * and directory) and also some metadata.
  *
  * Mail document
  * -------------
@@ -109,6 +109,15 @@ typedef struct {
  *
  * The data portion of a mail document is empty.
  *
+ * Ghost mail document [if NOTMUCH_FEATURE_GHOSTS]
+ * -----------------------------------------------
+ * A ghost mail document is like a mail document, but where we don't
+ * have the message content.  These are used to track thread reference
+ * information for messages we haven't received.
+ *
+ * A ghost mail document has type: ghost; id and thread fields that
+ * are identical to the mail document fields; and a MESSAGE_ID value.
+ *
  * Directory document
  * ------------------
  * A directory document is used by a client of the notmuch library to
@@ -172,6 +181,13 @@ typedef struct {
  *			generated is 1 and the value will be
  *			incremented for each thread ID.
  *
+ * Obsolete metadata
+ * -----------------
+ *
+ * If ! NOTMUCH_FEATURE_GHOSTS, there are no ghost mail documents.
+ * Instead, the database has the following additional database
+ * metadata:
+ *
  *	thread_id_*	A pre-allocated thread ID for a particular
  *			message. This is actually an arbitrarily large
  *			family of metadata name. Any particular name is
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 06/12] lib: Introduce macros for bit operations
  2014-10-06 23:17 [PATCH 00/12] Add ghost messages and fix thread linking Austin Clements
                   ` (4 preceding siblings ...)
  2014-10-06 23:17 ` [PATCH v2 05/12] lib: Update database schema doc for ghost messages Austin Clements
@ 2014-10-06 23:17 ` Austin Clements
  2014-10-06 23:17 ` [PATCH v2 07/12] lib: Internal support for querying and creating ghost messages Austin Clements
                   ` (6 subsequent siblings)
  12 siblings, 0 replies; 21+ messages in thread
From: Austin Clements @ 2014-10-06 23:17 UTC (permalink / raw)
  To: notmuch; +Cc: Austin Clements

These macros help clarify basic bit-twiddling code and are written to
be robust against C undefined behavior of shift operators.
---
 lib/message.cc        |  6 +++---
 lib/notmuch-private.h | 11 +++++++++++
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/lib/message.cc b/lib/message.cc
index 38bc929..55d2ff6 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -869,7 +869,7 @@ notmuch_bool_t
 notmuch_message_get_flag (notmuch_message_t *message,
 			  notmuch_message_flag_t flag)
 {
-    return message->flags & (1 << flag);
+    return NOTMUCH_TEST_BIT (message->flags, flag);
 }
 
 void
@@ -877,9 +877,9 @@ notmuch_message_set_flag (notmuch_message_t *message,
 			  notmuch_message_flag_t flag, notmuch_bool_t enable)
 {
     if (enable)
-	message->flags |= (1 << flag);
+	NOTMUCH_SET_BIT (&message->flags, flag);
     else
-	message->flags &= ~(1 << flag);
+	NOTMUCH_CLEAR_BIT (&message->flags, flag);
 }
 
 time_t
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 36cc12b..7250291 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -63,6 +63,17 @@ NOTMUCH_BEGIN_DECLS
 #define STRNCMP_LITERAL(var, literal) \
     strncmp ((var), (literal), sizeof (literal) - 1)
 
+/* Robust bit test/set/reset macros */
+#define NOTMUCH_TEST_BIT(val, bit) \
+    ((bit < 0 || bit >= CHAR_BIT * sizeof (unsigned long long)) ? 0	\
+     : !!((val) & (1ull << bit)))
+#define NOTMUCH_SET_BIT(valp, bit) \
+    ((bit < 0 || bit >= CHAR_BIT * sizeof (unsigned long long)) ? *(valp) \
+     : (*(valp) |= (1ull << bit)))
+#define NOTMUCH_CLEAR_BIT(valp,  bit) \
+    ((bit < 0 || bit >= CHAR_BIT * sizeof (unsigned long long)) ? *(valp) \
+     : (*(valp) &= ~(1ull << bit)))
+
 #define unused(x) x __attribute__ ((unused))
 
 #ifdef __cplusplus
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 07/12] lib: Internal support for querying and creating ghost messages
  2014-10-06 23:17 [PATCH 00/12] Add ghost messages and fix thread linking Austin Clements
                   ` (5 preceding siblings ...)
  2014-10-06 23:17 ` [PATCH v2 06/12] lib: Introduce macros for bit operations Austin Clements
@ 2014-10-06 23:17 ` Austin Clements
  2014-10-21 23:05   ` Mark Walters
  2014-10-06 23:17 ` [PATCH v2 08/12] lib: Implement ghost-based thread linking Austin Clements
                   ` (5 subsequent siblings)
  12 siblings, 1 reply; 21+ messages in thread
From: Austin Clements @ 2014-10-06 23:17 UTC (permalink / raw)
  To: notmuch

From: Austin Clements <amdragon@mit.edu>

This updates the message abstraction to support ghost messages: it
adds a message flag that distinguishes regular messages from ghost
messages, and an internal function for initializing a newly created
(blank) message as a ghost message.
---
 lib/message.cc        | 52 +++++++++++++++++++++++++++++++++++++++++++++++++--
 lib/notmuch-private.h |  4 ++++
 lib/notmuch.h         |  9 ++++++++-
 3 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/lib/message.cc b/lib/message.cc
index 55d2ff6..a7a13cc 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -39,6 +39,9 @@ struct visible _notmuch_message {
     notmuch_message_file_t *message_file;
     notmuch_message_list_t *replies;
     unsigned long flags;
+    /* For flags that are initialized on-demand, lazy_flags indicates
+     * if each flag has been initialized. */
+    unsigned long lazy_flags;
 
     Xapian::Document doc;
     Xapian::termcount termpos;
@@ -99,6 +102,7 @@ _notmuch_message_create_for_document (const void *talloc_owner,
 
     message->frozen = 0;
     message->flags = 0;
+    message->lazy_flags = 0;
 
     /* Each of these will be lazily created as needed. */
     message->message_id = NULL;
@@ -192,7 +196,7 @@ _notmuch_message_create (const void *talloc_owner,
  *
  *     There is already a document with message ID 'message_id' in the
  *     database. The returned message can be used to query/modify the
- *     document.
+ *     document. The message may be a ghost message.
  *
  *   NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND:
  *
@@ -305,6 +309,7 @@ _notmuch_message_ensure_metadata (notmuch_message_t *message)
     const char *thread_prefix = _find_prefix ("thread"),
 	*tag_prefix = _find_prefix ("tag"),
 	*id_prefix = _find_prefix ("id"),
+	*type_prefix = _find_prefix ("type"),
 	*filename_prefix = _find_prefix ("file-direntry"),
 	*replyto_prefix = _find_prefix ("replyto");
 
@@ -337,10 +342,25 @@ _notmuch_message_ensure_metadata (notmuch_message_t *message)
 	message->message_id =
 	    _notmuch_message_get_term (message, i, end, id_prefix);
 
+    /* Get document type */
+    assert (strcmp (id_prefix, type_prefix) < 0);
+    if (! NOTMUCH_TEST_BIT (message->lazy_flags, NOTMUCH_MESSAGE_FLAG_GHOST)) {
+	i.skip_to (type_prefix);
+	/* "T" is the prefix "type" fields.  See
+	 * BOOLEAN_PREFIX_INTERNAL. */
+	if (*i == "Tmail")
+	    NOTMUCH_CLEAR_BIT (&message->flags, NOTMUCH_MESSAGE_FLAG_GHOST);
+	else if (*i == "Tghost")
+	    NOTMUCH_SET_BIT (&message->flags, NOTMUCH_MESSAGE_FLAG_GHOST);
+	else
+	    INTERNAL_ERROR ("Message without type term");
+	NOTMUCH_SET_BIT (&message->lazy_flags, NOTMUCH_MESSAGE_FLAG_GHOST);
+    }
+
     /* Get filename list.  Here we get only the terms.  We lazily
      * expand them to full file names when needed in
      * _notmuch_message_ensure_filename_list. */
-    assert (strcmp (id_prefix, filename_prefix) < 0);
+    assert (strcmp (type_prefix, filename_prefix) < 0);
     if (!message->filename_term_list && !message->filename_list)
 	message->filename_term_list =
 	    _notmuch_database_get_terms_with_prefix (message, i, end,
@@ -371,6 +391,11 @@ _notmuch_message_invalidate_metadata (notmuch_message_t *message,
 	message->tag_list = NULL;
     }
 
+    if (strcmp ("type", prefix_name) == 0) {
+	NOTMUCH_CLEAR_BIT (&message->flags, NOTMUCH_MESSAGE_FLAG_GHOST);
+	NOTMUCH_CLEAR_BIT (&message->lazy_flags, NOTMUCH_MESSAGE_FLAG_GHOST);
+    }
+
     if (strcmp ("file-direntry", prefix_name) == 0) {
 	talloc_free (message->filename_term_list);
 	talloc_free (message->filename_list);
@@ -869,6 +894,10 @@ notmuch_bool_t
 notmuch_message_get_flag (notmuch_message_t *message,
 			  notmuch_message_flag_t flag)
 {
+    if (flag == NOTMUCH_MESSAGE_FLAG_GHOST &&
+	! NOTMUCH_TEST_BIT (message->lazy_flags, flag))
+	_notmuch_message_ensure_metadata (message);
+
     return NOTMUCH_TEST_BIT (message->flags, flag);
 }
 
@@ -880,6 +909,7 @@ notmuch_message_set_flag (notmuch_message_t *message,
 	NOTMUCH_SET_BIT (&message->flags, flag);
     else
 	NOTMUCH_CLEAR_BIT (&message->flags, flag);
+    NOTMUCH_SET_BIT (&message->lazy_flags, flag);
 }
 
 time_t
@@ -989,6 +1019,24 @@ _notmuch_message_delete (notmuch_message_t *message)
     return NOTMUCH_STATUS_SUCCESS;
 }
 
+/* Transform a blank message into a ghost message.  The caller must
+ * _notmuch_message_sync the message. */
+notmuch_private_status_t
+_notmuch_message_initialize_ghost (notmuch_message_t *message,
+				   const char *thread_id)
+{
+    notmuch_private_status_t status;
+
+    status = _notmuch_message_add_term (message, "type", "ghost");
+    if (status)
+	return status;
+    status = _notmuch_message_add_term (message, "thread", thread_id);
+    if (status)
+	return status;
+
+    return NOTMUCH_PRIVATE_STATUS_SUCCESS;
+}
+
 /* Ensure that 'message' is not holding any file object open. Future
  * calls to various functions will still automatically open the
  * message file as needed.
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 7250291..2f43c1d 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -308,6 +308,10 @@ _notmuch_message_sync (notmuch_message_t *message);
 notmuch_status_t
 _notmuch_message_delete (notmuch_message_t *message);
 
+notmuch_private_status_t
+_notmuch_message_initialize_ghost (notmuch_message_t *message,
+				   const char *thread_id);
+
 void
 _notmuch_message_close (notmuch_message_t *message);
 
diff --git a/lib/notmuch.h b/lib/notmuch.h
index dae0416..92594b9 100644
--- a/lib/notmuch.h
+++ b/lib/notmuch.h
@@ -1221,7 +1221,14 @@ notmuch_message_get_filenames (notmuch_message_t *message);
  */
 typedef enum _notmuch_message_flag {
     NOTMUCH_MESSAGE_FLAG_MATCH,
-    NOTMUCH_MESSAGE_FLAG_EXCLUDED
+    NOTMUCH_MESSAGE_FLAG_EXCLUDED,
+
+    /* This message is a "ghost message", meaning it has no filenames
+     * or content, but we know it exists because it was referenced by
+     * some other message.  A ghost message has only a message ID and
+     * thread ID.
+     */
+    NOTMUCH_MESSAGE_FLAG_GHOST,
 } notmuch_message_flag_t;
 
 /**
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 08/12] lib: Implement ghost-based thread linking
  2014-10-06 23:17 [PATCH 00/12] Add ghost messages and fix thread linking Austin Clements
                   ` (6 preceding siblings ...)
  2014-10-06 23:17 ` [PATCH v2 07/12] lib: Internal support for querying and creating ghost messages Austin Clements
@ 2014-10-06 23:17 ` Austin Clements
  2014-10-21 23:10   ` Mark Walters
  2014-10-06 23:17 ` [PATCH v2 09/12] lib: Implement upgrade to ghost messages feature Austin Clements
                   ` (4 subsequent siblings)
  12 siblings, 1 reply; 21+ messages in thread
From: Austin Clements @ 2014-10-06 23:17 UTC (permalink / raw)
  To: notmuch

From: Austin Clements <amdragon@mit.edu>

This updates the thread linking code to use ghost messages instead of
user metadata to link messages into threads.

In contrast with the old approach, this is actually correct.
Previously, thread merging updated only the thread IDs of message
documents, not thread IDs stored in user metadata.  As originally
diagnosed by Mark Walters [1] and as demonstrated by the broken
T260-thread-order test, this can cause notmuch to fail to link
messages even though they're in the same thread.  In principle the old
approach could have been fixed by updating the user metadata thread
IDs as well, but these are not indexed and hence this would have
required a full scan of all stored thread IDs.  Ghost messages solve
this problem naturally by reusing the exact same thread ID and message
ID representation and indexing as regular messages.

Furthermore, thanks to this greater symmetry, ghost messages are also
algorithmically simpler.  We continue to support the old user metadata
format, so this patch can't delete any code, but when we do remove
support for the old format, several functions can simply be deleted.

[1] id:8738h7kv2q.fsf@qmul.ac.uk
---
 lib/database.cc | 86 +++++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 75 insertions(+), 11 deletions(-)

diff --git a/lib/database.cc b/lib/database.cc
index c641bcd..fdcc526 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -1752,6 +1752,12 @@ _get_metadata_thread_id_key (void *ctx, const char *message_id)
 			    message_id);
 }
 
+static notmuch_status_t
+_resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch,
+				      void *ctx,
+				      const char *message_id,
+				      const char **thread_id_ret);
+
 /* Find the thread ID to which the message with 'message_id' belongs.
  *
  * Note: 'thread_id_ret' must not be NULL!
@@ -1760,9 +1766,9 @@ _get_metadata_thread_id_key (void *ctx, const char *message_id)
  *
  * Note: If there is no message in the database with the given
  * 'message_id' then a new thread_id will be allocated for this
- * message and stored in the database metadata, (where this same
+ * message ID and stored in the database metadata so that the
  * thread ID can be looked up if the message is added to the database
- * later).
+ * later.
  */
 static notmuch_status_t
 _resolve_message_id_to_thread_id (notmuch_database_t *notmuch,
@@ -1770,6 +1776,49 @@ _resolve_message_id_to_thread_id (notmuch_database_t *notmuch,
 				  const char *message_id,
 				  const char **thread_id_ret)
 {
+    notmuch_private_status_t status;
+    notmuch_message_t *message;
+
+    if (! (notmuch->features & NOTMUCH_FEATURE_GHOSTS))
+	return _resolve_message_id_to_thread_id_old (notmuch, ctx, message_id,
+						     thread_id_ret);
+
+    /* Look for this message (regular or ghost) */
+    message = _notmuch_message_create_for_message_id (
+	notmuch, message_id, &status);
+    if (status == NOTMUCH_PRIVATE_STATUS_SUCCESS) {
+	/* Message exists */
+	*thread_id_ret = talloc_steal (
+	    ctx, notmuch_message_get_thread_id (message));
+    } else if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) {
+	/* Message did not exist.  Give it a fresh thread ID and
+	 * populate this message as a ghost message. */
+	*thread_id_ret = talloc_strdup (
+	    ctx, _notmuch_database_generate_thread_id (notmuch));
+	if (! *thread_id_ret) {
+	    status = NOTMUCH_PRIVATE_STATUS_OUT_OF_MEMORY;
+	} else {
+	    status = _notmuch_message_initialize_ghost (message, *thread_id_ret);
+	    if (status == 0)
+		/* Commit the new ghost message */
+		_notmuch_message_sync (message);
+	}
+    } else {
+	/* Create failed. Fall through. */
+    }
+
+    notmuch_message_destroy (message);
+
+    return COERCE_STATUS (status, "Error creating ghost message");
+}
+
+/* Pre-ghost messages _resolve_message_id_to_thread_id */
+static notmuch_status_t
+_resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch,
+				      void *ctx,
+				      const char *message_id,
+				      const char **thread_id_ret)
+{
     notmuch_status_t status;
     notmuch_message_t *message;
     string thread_id_string;
@@ -2007,7 +2056,7 @@ _consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch,
     }
 }
 
-/* Given a (mostly empty) 'message' and its corresponding
+/* Given a blank or ghost 'message' and its corresponding
  * 'message_file' link it to existing threads in the database.
  *
  * The first check is in the metadata of the database to see if we
@@ -2035,16 +2084,22 @@ _consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch,
 static notmuch_status_t
 _notmuch_database_link_message (notmuch_database_t *notmuch,
 				notmuch_message_t *message,
-				notmuch_message_file_t *message_file)
+				notmuch_message_file_t *message_file,
+				notmuch_bool_t is_ghost)
 {
     void *local = talloc_new (NULL);
     notmuch_status_t status;
-    const char *thread_id;
+    const char *thread_id = NULL;
 
     /* Check if the message already had a thread ID */
-    thread_id = _consume_metadata_thread_id (local, notmuch, message);
-    if (thread_id)
-	_notmuch_message_add_term (message, "thread", thread_id);
+    if (notmuch->features & NOTMUCH_FEATURE_GHOSTS) {
+	if (is_ghost)
+	    thread_id = notmuch_message_get_thread_id (message);
+    } else {
+	thread_id = _consume_metadata_thread_id (local, notmuch, message);
+	if (thread_id)
+	    _notmuch_message_add_term (message, "thread", thread_id);
+    }
 
     status = _notmuch_database_link_message_to_parents (notmuch, message,
 							message_file,
@@ -2079,6 +2134,7 @@ notmuch_database_add_message (notmuch_database_t *notmuch,
     notmuch_message_t *message = NULL;
     notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, ret2;
     notmuch_private_status_t private_status;
+    notmuch_bool_t is_ghost = false;
 
     const char *date, *header;
     const char *from, *to, *subject;
@@ -2171,12 +2227,20 @@ notmuch_database_add_message (notmuch_database_t *notmuch,
 
 	_notmuch_message_add_filename (message, filename);
 
-	/* Is this a newly created message object? */
-	if (private_status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) {
+	/* Is this a newly created message object or a ghost
+	 * message?  We have to be slightly careful: if this is a
+	 * blank message, it's not safe to call
+	 * notmuch_message_get_flag yet. */
+	if (private_status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND ||
+	    (is_ghost = notmuch_message_get_flag (
+		message, NOTMUCH_MESSAGE_FLAG_GHOST))) {
 	    _notmuch_message_add_term (message, "type", "mail");
+	    if (is_ghost)
+		/* Convert ghost message to a regular message */
+		_notmuch_message_remove_term (message, "type", "ghost");
 
 	    ret = _notmuch_database_link_message (notmuch, message,
-						  message_file);
+						  message_file, is_ghost);
 	    if (ret)
 		goto DONE;
 
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 09/12] lib: Implement upgrade to ghost messages feature
  2014-10-06 23:17 [PATCH 00/12] Add ghost messages and fix thread linking Austin Clements
                   ` (7 preceding siblings ...)
  2014-10-06 23:17 ` [PATCH v2 08/12] lib: Implement ghost-based thread linking Austin Clements
@ 2014-10-06 23:17 ` Austin Clements
  2014-10-06 23:17 ` [PATCH v2 10/12] lib: Enable " Austin Clements
                   ` (3 subsequent siblings)
  12 siblings, 0 replies; 21+ messages in thread
From: Austin Clements @ 2014-10-06 23:17 UTC (permalink / raw)
  To: notmuch

From: Austin Clements <amdragon@mit.edu>

Somehow this is the first upgrade pass that actually does *any* error
checking, so this also adds the bit of necessary infrastructure to
handle that.
---
 lib/database.cc | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 64 insertions(+), 2 deletions(-)

diff --git a/lib/database.cc b/lib/database.cc
index fdcc526..1316529 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -1231,6 +1231,7 @@ notmuch_database_upgrade (notmuch_database_t *notmuch,
     notmuch_bool_t timer_is_active = FALSE;
     enum _notmuch_features target_features, new_features;
     notmuch_status_t status;
+    notmuch_private_status_t private_status;
     unsigned int count = 0, total = 0;
 
     status = _notmuch_database_ensure_writable (notmuch);
@@ -1275,6 +1276,13 @@ notmuch_database_upgrade (notmuch_database_t *notmuch,
 	for (t = db->allterms_begin ("XTIMESTAMP"); t != t_end; t++)
 	    ++total;
     }
+    if (new_features & NOTMUCH_FEATURE_GHOSTS) {
+	/* The ghost message upgrade converts all thread_id_*
+	 * metadata values into ghost message documents. */
+	t_end = db->metadata_keys_end ("thread_id_");
+	for (t = db->metadata_keys_begin ("thread_id_"); t != t_end; ++t)
+	    ++total;
+    }
 
     /* Perform the upgrade in a transaction. */
     db->begin_transaction (true);
@@ -1378,10 +1386,64 @@ notmuch_database_upgrade (notmuch_database_t *notmuch,
 	}
     }
 
+    /* Perform metadata upgrades. */
+
+    /* Prior to NOTMUCH_FEATURE_GHOSTS, thread IDs for missing
+     * messages were stored as database metadata. Change these to
+     * ghost messages.
+     */
+    if (new_features & NOTMUCH_FEATURE_GHOSTS) {
+	notmuch_message_t *message;
+	std::string message_id, thread_id;
+
+	t_end = db->metadata_keys_end (NOTMUCH_METADATA_THREAD_ID_PREFIX);
+	for (t = db->metadata_keys_begin (NOTMUCH_METADATA_THREAD_ID_PREFIX);
+	     t != t_end; ++t) {
+	    if (do_progress_notify) {
+		progress_notify (closure, (double) count / total);
+		do_progress_notify = 0;
+	    }
+
+	    message_id = (*t).substr (
+		strlen (NOTMUCH_METADATA_THREAD_ID_PREFIX));
+	    thread_id = db->get_metadata (*t);
+
+	    /* Create ghost message */
+	    message = _notmuch_message_create_for_message_id (
+		notmuch, message_id.c_str (), &private_status);
+	    if (private_status == NOTMUCH_PRIVATE_STATUS_SUCCESS) {
+		/* Document already exists; ignore the stored thread ID */
+	    } else if (private_status ==
+		       NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) {
+		private_status = _notmuch_message_initialize_ghost (
+		    message, thread_id.c_str ());
+		if (! private_status)
+		    _notmuch_message_sync (message);
+	    }
+
+	    if (private_status) {
+		fprintf (stderr,
+			 "Upgrade failed while creating ghost messages.\n");
+		status = COERCE_STATUS (private_status, "Unexpected status from _notmuch_message_initialize_ghost");
+		goto DONE;
+	    }
+
+	    /* Clear saved metadata thread ID */
+	    db->set_metadata (*t, "");
+
+	    ++count;
+	}
+    }
+
+    status = NOTMUCH_STATUS_SUCCESS;
     db->set_metadata ("features", _print_features (local, notmuch->features));
     db->set_metadata ("version", STRINGIFY (NOTMUCH_DATABASE_VERSION));
 
-    db->commit_transaction ();
+ DONE:
+    if (status == NOTMUCH_STATUS_SUCCESS)
+	db->commit_transaction ();
+    else
+	db->cancel_transaction ();
 
     if (timer_is_active) {
 	/* Now stop the timer. */
@@ -1397,7 +1459,7 @@ notmuch_database_upgrade (notmuch_database_t *notmuch,
     }
 
     talloc_free (local);
-    return NOTMUCH_STATUS_SUCCESS;
+    return status;
 }
 
 notmuch_status_t
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 10/12] lib: Enable ghost messages feature
  2014-10-06 23:17 [PATCH 00/12] Add ghost messages and fix thread linking Austin Clements
                   ` (8 preceding siblings ...)
  2014-10-06 23:17 ` [PATCH v2 09/12] lib: Implement upgrade to ghost messages feature Austin Clements
@ 2014-10-06 23:17 ` Austin Clements
  2014-10-06 23:17 ` [PATCH v2 11/12] test: Test upgrade to " Austin Clements
                   ` (2 subsequent siblings)
  12 siblings, 0 replies; 21+ messages in thread
From: Austin Clements @ 2014-10-06 23:17 UTC (permalink / raw)
  To: notmuch

From: Austin Clements <amdragon@mit.edu>

This fixes the broken thread order test.
---
 lib/database-private.h    | 2 +-
 test/T260-thread-order.sh | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/lib/database-private.h b/lib/database-private.h
index e2e4bc8..15e03cc 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -166,7 +166,7 @@ struct _notmuch_database {
  * databases will have it). */
 #define NOTMUCH_FEATURES_CURRENT \
     (NOTMUCH_FEATURE_FILE_TERMS | NOTMUCH_FEATURE_DIRECTORY_DOCS | \
-     NOTMUCH_FEATURE_BOOL_FOLDER)
+     NOTMUCH_FEATURE_BOOL_FOLDER | NOTMUCH_FEATURE_GHOSTS)
 
 /* Return the list of terms from the given iterator matching a prefix.
  * The prefix will be stripped from the strings in the returned list.
diff --git a/test/T260-thread-order.sh b/test/T260-thread-order.sh
index b435d79..99f5833 100755
--- a/test/T260-thread-order.sh
+++ b/test/T260-thread-order.sh
@@ -30,7 +30,6 @@ expected=$(for ((i = 0; i < $nthreads; i++)); do
 test_expect_equal "$output" "$expected"
 
 test_begin_subtest "Messages with all parents get linked in all delivery orders"
-test_subtest_known_broken
 # Here we do the same thing as the previous test, but each message
 # references all of its parents.  Since every message references the
 # root of the thread, each thread should always be fully joined.  This
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 11/12] test: Test upgrade to ghost messages feature
  2014-10-06 23:17 [PATCH 00/12] Add ghost messages and fix thread linking Austin Clements
                   ` (9 preceding siblings ...)
  2014-10-06 23:17 ` [PATCH v2 10/12] lib: Enable " Austin Clements
@ 2014-10-06 23:17 ` Austin Clements
  2014-10-06 23:17 ` [PATCH v2 12/12] lib: Remove unnecessary thread linking steps when using ghost messages Austin Clements
  2014-10-21 23:32 ` [PATCH 00/12] Add ghost messages and fix thread linking Mark Walters
  12 siblings, 0 replies; 21+ messages in thread
From: Austin Clements @ 2014-10-06 23:17 UTC (permalink / raw)
  To: notmuch; +Cc: Austin Clements

---
 test/T530-upgrade.sh | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/test/T530-upgrade.sh b/test/T530-upgrade.sh
index c4c4ac8..6b42a69 100755
--- a/test/T530-upgrade.sh
+++ b/test/T530-upgrade.sh
@@ -116,4 +116,25 @@ MAIL_DIR/bar/new/21:2,
 MAIL_DIR/bar/new/22:2,
 MAIL_DIR/cur/51:2,"
 
+# Ghost messages are difficult to test since they're nearly invisible.
+# However, if the upgrade works correctly, the ghost message should
+# retain the right thread ID even if all of the original messages in
+# the thread are deleted.  That's what we test.  This won't detect if
+# the upgrade just plain didn't happen, but it should detect if
+# something went wrong.
+test_begin_subtest "ghost message retains thread ID"
+# Upgrade database
+notmuch new
+# Get thread ID of real message
+thread=$(notmuch search --output=threads id:4EFC743A.3060609@april.org)
+# Delete all real messages in that thread
+rm $(notmuch search --output=files $thread)
+notmuch new
+# "Deliver" ghost message
+add_message '[subject]=Ghost' '[id]=4EFC3931.6030007@april.org'
+# If the ghost upgrade worked, the new message should be attached to
+# the existing thread ID.
+nthread=$(notmuch search --output=threads id:4EFC3931.6030007@april.org)
+test_expect_equal "$thread" "$nthread"
+
 test_done
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 12/12] lib: Remove unnecessary thread linking steps when using ghost messages
  2014-10-06 23:17 [PATCH 00/12] Add ghost messages and fix thread linking Austin Clements
                   ` (10 preceding siblings ...)
  2014-10-06 23:17 ` [PATCH v2 11/12] test: Test upgrade to " Austin Clements
@ 2014-10-06 23:17 ` Austin Clements
  2014-10-21 23:17   ` Mark Walters
  2014-10-21 23:32 ` [PATCH 00/12] Add ghost messages and fix thread linking Mark Walters
  12 siblings, 1 reply; 21+ messages in thread
From: Austin Clements @ 2014-10-06 23:17 UTC (permalink / raw)
  To: notmuch

From: Austin Clements <amdragon@mit.edu>

Previously, it was necessary to link new messages to children to work
around some (though not all) problems with the old metadata-based
approach to stored thread IDs.  With ghost messages, this is no longer
necessary, so don't bother with child linking when ghost messages are
in use.
---
 lib/database.cc | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/lib/database.cc b/lib/database.cc
index 1316529..6e51a72 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -2169,10 +2169,23 @@ _notmuch_database_link_message (notmuch_database_t *notmuch,
     if (status)
 	goto DONE;
 
-    status = _notmuch_database_link_message_to_children (notmuch, message,
-							 &thread_id);
-    if (status)
-	goto DONE;
+    if (! (notmuch->features & NOTMUCH_FEATURE_GHOSTS)) {
+	/* In general, it shouldn't be necessary to link children,
+	 * since the earlier indexing of those children will have
+	 * stored a thread ID for the missing parent.  However, prior
+	 * to ghost messages, these stored thread IDs were NOT
+	 * rewritten during thread merging (and there was no
+	 * performant way to do so), so if indexed children were
+	 * pulled into a different thread ID by a merge, it was
+	 * necessary to pull them *back* into the stored thread ID of
+	 * the parent.  With ghost messages, we just rewrite the
+	 * stored thread IDs during merging, so this workaround isn't
+	 * necessary. */
+	status = _notmuch_database_link_message_to_children (notmuch, message,
+							     &thread_id);
+	if (status)
+	    goto DONE;
+    }
 
     /* If not part of any existing thread, generate a new thread ID. */
     if (thread_id == NULL) {
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 03/12] lib: Handle empty date value
  2014-10-06 23:17 ` [PATCH v2 03/12] lib: Handle empty date value Austin Clements
@ 2014-10-11  5:12   ` David Bremner
  0 siblings, 0 replies; 21+ messages in thread
From: David Bremner @ 2014-10-11  5:12 UTC (permalink / raw)
  To: Austin Clements, notmuch

Austin Clements <aclements@csail.mit.edu> writes:

> From: Austin Clements <amdragon@mit.edu>
>
> In the interest of robustness, avoid undefined behavior of
> sortable_unserialise if the date value is missing.  This shouldn't
> happen now, but ghost messages will have blank date values.

Pushed the first 3 patches in the series.

d

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 07/12] lib: Internal support for querying and creating ghost messages
  2014-10-06 23:17 ` [PATCH v2 07/12] lib: Internal support for querying and creating ghost messages Austin Clements
@ 2014-10-21 23:05   ` Mark Walters
  2014-10-22  1:33     ` Austin Clements
  0 siblings, 1 reply; 21+ messages in thread
From: Mark Walters @ 2014-10-21 23:05 UTC (permalink / raw)
  To: Austin Clements, notmuch


Hi 

I am slowly working my way through this series: only two trivial queries
so far.

On Tue, 07 Oct 2014, Austin Clements <aclements@csail.mit.edu> wrote:
> From: Austin Clements <amdragon@mit.edu>
>
> This updates the message abstraction to support ghost messages: it
> adds a message flag that distinguishes regular messages from ghost
> messages, and an internal function for initializing a newly created
> (blank) message as a ghost message.
> ---
>  lib/message.cc        | 52 +++++++++++++++++++++++++++++++++++++++++++++++++--
>  lib/notmuch-private.h |  4 ++++
>  lib/notmuch.h         |  9 ++++++++-
>  3 files changed, 62 insertions(+), 3 deletions(-)
>
> diff --git a/lib/message.cc b/lib/message.cc
> index 55d2ff6..a7a13cc 100644
> --- a/lib/message.cc
> +++ b/lib/message.cc
> @@ -39,6 +39,9 @@ struct visible _notmuch_message {
>      notmuch_message_file_t *message_file;
>      notmuch_message_list_t *replies;
>      unsigned long flags;
> +    /* For flags that are initialized on-demand, lazy_flags indicates
> +     * if each flag has been initialized. */
> +    unsigned long lazy_flags;

I wonder if valid_flags might be better here as, as far as I can see,
the reason for these is so we can invalidate a flag more than an
optimisation (which is what I thought the lazy implied).

>  
>      Xapian::Document doc;
>      Xapian::termcount termpos;
> @@ -99,6 +102,7 @@ _notmuch_message_create_for_document (const void *talloc_owner,
>  
>      message->frozen = 0;
>      message->flags = 0;
> +    message->lazy_flags = 0;
>  
>      /* Each of these will be lazily created as needed. */
>      message->message_id = NULL;
> @@ -192,7 +196,7 @@ _notmuch_message_create (const void *talloc_owner,
>   *
>   *     There is already a document with message ID 'message_id' in the
>   *     database. The returned message can be used to query/modify the
> - *     document.
> + *     document. The message may be a ghost message.
>   *
>   *   NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND:
>   *
> @@ -305,6 +309,7 @@ _notmuch_message_ensure_metadata (notmuch_message_t *message)
>      const char *thread_prefix = _find_prefix ("thread"),
>  	*tag_prefix = _find_prefix ("tag"),
>  	*id_prefix = _find_prefix ("id"),
> +	*type_prefix = _find_prefix ("type"),
>  	*filename_prefix = _find_prefix ("file-direntry"),
>  	*replyto_prefix = _find_prefix ("replyto");
>  
> @@ -337,10 +342,25 @@ _notmuch_message_ensure_metadata (notmuch_message_t *message)
>  	message->message_id =
>  	    _notmuch_message_get_term (message, i, end, id_prefix);
>  
> +    /* Get document type */
> +    assert (strcmp (id_prefix, type_prefix) < 0);
> +    if (! NOTMUCH_TEST_BIT (message->lazy_flags, NOTMUCH_MESSAGE_FLAG_GHOST)) {
> +	i.skip_to (type_prefix);
> +	/* "T" is the prefix "type" fields.  See
> +	 * BOOLEAN_PREFIX_INTERNAL. */
> +	if (*i == "Tmail")
> +	    NOTMUCH_CLEAR_BIT (&message->flags, NOTMUCH_MESSAGE_FLAG_GHOST);
> +	else if (*i == "Tghost")
> +	    NOTMUCH_SET_BIT (&message->flags, NOTMUCH_MESSAGE_FLAG_GHOST);
> +	else
> +	    INTERNAL_ERROR ("Message without type term");
> +	NOTMUCH_SET_BIT (&message->lazy_flags, NOTMUCH_MESSAGE_FLAG_GHOST);
> +    }
> +
>      /* Get filename list.  Here we get only the terms.  We lazily
>       * expand them to full file names when needed in
>       * _notmuch_message_ensure_filename_list. */
> -    assert (strcmp (id_prefix, filename_prefix) < 0);
> +    assert (strcmp (type_prefix, filename_prefix) < 0);
>      if (!message->filename_term_list && !message->filename_list)
>  	message->filename_term_list =
>  	    _notmuch_database_get_terms_with_prefix (message, i, end,
> @@ -371,6 +391,11 @@ _notmuch_message_invalidate_metadata (notmuch_message_t *message,
>  	message->tag_list = NULL;
>      }
>  
> +    if (strcmp ("type", prefix_name) == 0) {
> +	NOTMUCH_CLEAR_BIT (&message->flags, NOTMUCH_MESSAGE_FLAG_GHOST);
> +	NOTMUCH_CLEAR_BIT (&message->lazy_flags, NOTMUCH_MESSAGE_FLAG_GHOST);
> +    }
> +
>      if (strcmp ("file-direntry", prefix_name) == 0) {
>  	talloc_free (message->filename_term_list);
>  	talloc_free (message->filename_list);
> @@ -869,6 +894,10 @@ notmuch_bool_t
>  notmuch_message_get_flag (notmuch_message_t *message,
>  			  notmuch_message_flag_t flag)
>  {
> +    if (flag == NOTMUCH_MESSAGE_FLAG_GHOST &&
> +	! NOTMUCH_TEST_BIT (message->lazy_flags, flag))
> +	_notmuch_message_ensure_metadata (message);
> +
>      return NOTMUCH_TEST_BIT (message->flags, flag);
>  }
>  
> @@ -880,6 +909,7 @@ notmuch_message_set_flag (notmuch_message_t *message,
>  	NOTMUCH_SET_BIT (&message->flags, flag);
>      else
>  	NOTMUCH_CLEAR_BIT (&message->flags, flag);
> +    NOTMUCH_SET_BIT (&message->lazy_flags, flag);
>  }
>  
>  time_t
> @@ -989,6 +1019,24 @@ _notmuch_message_delete (notmuch_message_t *message)
>      return NOTMUCH_STATUS_SUCCESS;
>  }
>  
> +/* Transform a blank message into a ghost message.  The caller must
> + * _notmuch_message_sync the message. */
> +notmuch_private_status_t
> +_notmuch_message_initialize_ghost (notmuch_message_t *message,
> +				   const char *thread_id)
> +{
> +    notmuch_private_status_t status;
> +
> +    status = _notmuch_message_add_term (message, "type", "ghost");
> +    if (status)
> +	return status;
> +    status = _notmuch_message_add_term (message, "thread", thread_id);
> +    if (status)
> +	return status;
> +
> +    return NOTMUCH_PRIVATE_STATUS_SUCCESS;
> +}
> +
>  /* Ensure that 'message' is not holding any file object open. Future
>   * calls to various functions will still automatically open the
>   * message file as needed.
> diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
> index 7250291..2f43c1d 100644
> --- a/lib/notmuch-private.h
> +++ b/lib/notmuch-private.h
> @@ -308,6 +308,10 @@ _notmuch_message_sync (notmuch_message_t *message);
>  notmuch_status_t
>  _notmuch_message_delete (notmuch_message_t *message);
>  
> +notmuch_private_status_t
> +_notmuch_message_initialize_ghost (notmuch_message_t *message,
> +				   const char *thread_id);
> +
>  void
>  _notmuch_message_close (notmuch_message_t *message);
>  
> diff --git a/lib/notmuch.h b/lib/notmuch.h
> index dae0416..92594b9 100644
> --- a/lib/notmuch.h
> +++ b/lib/notmuch.h
> @@ -1221,7 +1221,14 @@ notmuch_message_get_filenames (notmuch_message_t *message);
>   */
>  typedef enum _notmuch_message_flag {
>      NOTMUCH_MESSAGE_FLAG_MATCH,
> -    NOTMUCH_MESSAGE_FLAG_EXCLUDED
> +    NOTMUCH_MESSAGE_FLAG_EXCLUDED,
> +
> +    /* This message is a "ghost message", meaning it has no filenames
> +     * or content, but we know it exists because it was referenced by
> +     * some other message.  A ghost message has only a message ID and
> +     * thread ID.
> +     */

Can I check here: we are not allowing a ghost message to have any tags?

Best wishes

Mark

> +    NOTMUCH_MESSAGE_FLAG_GHOST,
>  } notmuch_message_flag_t;
>  
>  /**
> -- 
> 2.1.0
>
> _______________________________________________
> notmuch mailing list
> notmuch@notmuchmail.org
> http://notmuchmail.org/mailman/listinfo/notmuch

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 08/12] lib: Implement ghost-based thread linking
  2014-10-06 23:17 ` [PATCH v2 08/12] lib: Implement ghost-based thread linking Austin Clements
@ 2014-10-21 23:10   ` Mark Walters
  2014-10-22  1:49     ` Austin Clements
  0 siblings, 1 reply; 21+ messages in thread
From: Mark Walters @ 2014-10-21 23:10 UTC (permalink / raw)
  To: Austin Clements, notmuch

On Tue, 07 Oct 2014, Austin Clements <aclements@csail.mit.edu> wrote:
> From: Austin Clements <amdragon@mit.edu>
>
> This updates the thread linking code to use ghost messages instead of
> user metadata to link messages into threads.
>
> In contrast with the old approach, this is actually correct.
> Previously, thread merging updated only the thread IDs of message
> documents, not thread IDs stored in user metadata.  As originally
> diagnosed by Mark Walters [1] and as demonstrated by the broken
> T260-thread-order test, this can cause notmuch to fail to link
> messages even though they're in the same thread.  In principle the old
> approach could have been fixed by updating the user metadata thread
> IDs as well, but these are not indexed and hence this would have
> required a full scan of all stored thread IDs.  Ghost messages solve
> this problem naturally by reusing the exact same thread ID and message
> ID representation and indexing as regular messages.
>
> Furthermore, thanks to this greater symmetry, ghost messages are also
> algorithmically simpler.  We continue to support the old user metadata
> format, so this patch can't delete any code, but when we do remove
> support for the old format, several functions can simply be deleted.
>
> [1] id:8738h7kv2q.fsf@qmul.ac.uk
> ---
>  lib/database.cc | 86 +++++++++++++++++++++++++++++++++++++++++++++++++--------
>  1 file changed, 75 insertions(+), 11 deletions(-)
>
> diff --git a/lib/database.cc b/lib/database.cc
> index c641bcd..fdcc526 100644
> --- a/lib/database.cc
> +++ b/lib/database.cc
> @@ -1752,6 +1752,12 @@ _get_metadata_thread_id_key (void *ctx, const char *message_id)
>  			    message_id);
>  }
>  
> +static notmuch_status_t
> +_resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch,
> +				      void *ctx,
> +				      const char *message_id,
> +				      const char **thread_id_ret);
> +
>  /* Find the thread ID to which the message with 'message_id' belongs.
>   *
>   * Note: 'thread_id_ret' must not be NULL!
> @@ -1760,9 +1766,9 @@ _get_metadata_thread_id_key (void *ctx, const char *message_id)
>   *
>   * Note: If there is no message in the database with the given
>   * 'message_id' then a new thread_id will be allocated for this
> - * message and stored in the database metadata, (where this same
> + * message ID and stored in the database metadata so that the
>   * thread ID can be looked up if the message is added to the database
> - * later).
> + * later.
>   */
>  static notmuch_status_t
>  _resolve_message_id_to_thread_id (notmuch_database_t *notmuch,
> @@ -1770,6 +1776,49 @@ _resolve_message_id_to_thread_id (notmuch_database_t *notmuch,
>  				  const char *message_id,
>  				  const char **thread_id_ret)
>  {
> +    notmuch_private_status_t status;
> +    notmuch_message_t *message;
> +
> +    if (! (notmuch->features & NOTMUCH_FEATURE_GHOSTS))
> +	return _resolve_message_id_to_thread_id_old (notmuch, ctx, message_id,
> +						     thread_id_ret);
> +
> +    /* Look for this message (regular or ghost) */
> +    message = _notmuch_message_create_for_message_id (
> +	notmuch, message_id, &status);
> +    if (status == NOTMUCH_PRIVATE_STATUS_SUCCESS) {
> +	/* Message exists */
> +	*thread_id_ret = talloc_steal (
> +	    ctx, notmuch_message_get_thread_id (message));
> +    } else if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) {
> +	/* Message did not exist.  Give it a fresh thread ID and
> +	 * populate this message as a ghost message. */
> +	*thread_id_ret = talloc_strdup (
> +	    ctx, _notmuch_database_generate_thread_id (notmuch));
> +	if (! *thread_id_ret) {
> +	    status = NOTMUCH_PRIVATE_STATUS_OUT_OF_MEMORY;
> +	} else {
> +	    status = _notmuch_message_initialize_ghost (message, *thread_id_ret);
> +	    if (status == 0)
> +		/* Commit the new ghost message */
> +		_notmuch_message_sync (message);
> +	}
> +    } else {
> +	/* Create failed. Fall through. */
> +    }
> +
> +    notmuch_message_destroy (message);
> +
> +    return COERCE_STATUS (status, "Error creating ghost message");
> +}
> +
> +/* Pre-ghost messages _resolve_message_id_to_thread_id */
> +static notmuch_status_t
> +_resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch,
> +				      void *ctx,
> +				      const char *message_id,
> +				      const char **thread_id_ret)
> +{
>      notmuch_status_t status;
>      notmuch_message_t *message;
>      string thread_id_string;
> @@ -2007,7 +2056,7 @@ _consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch,
>      }
>  }
>  
> -/* Given a (mostly empty) 'message' and its corresponding
> +/* Given a blank or ghost 'message' and its corresponding
>   * 'message_file' link it to existing threads in the database.
>   *
>   * The first check is in the metadata of the database to see if we

There is quite a lot of comment below this and it is not clear to me how
much of it applies to the is_ghost case. In particular does the 

  * Finally, we look in the database for existing message that
  * reference 'message'.

still apply? I would have thought that we would already have done that
in the is_ghost case. 

Of course this also applies to the actual code which seems to call
_notmuch_database_link_message_to_children unconditionally. It might be
worth a comment in any case.

Best wishes

Mark


> @@ -2035,16 +2084,22 @@ _consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch,
>  static notmuch_status_t
>  _notmuch_database_link_message (notmuch_database_t *notmuch,
>  				notmuch_message_t *message,
> -				notmuch_message_file_t *message_file)
> +				notmuch_message_file_t *message_file,
> +				notmuch_bool_t is_ghost)
>  {
>      void *local = talloc_new (NULL);
>      notmuch_status_t status;
> -    const char *thread_id;
> +    const char *thread_id = NULL;
>  
>      /* Check if the message already had a thread ID */
> -    thread_id = _consume_metadata_thread_id (local, notmuch, message);
> -    if (thread_id)
> -	_notmuch_message_add_term (message, "thread", thread_id);
> +    if (notmuch->features & NOTMUCH_FEATURE_GHOSTS) {
> +	if (is_ghost)
> +	    thread_id = notmuch_message_get_thread_id (message);
> +    } else {
> +	thread_id = _consume_metadata_thread_id (local, notmuch, message);
> +	if (thread_id)
> +	    _notmuch_message_add_term (message, "thread", thread_id);
> +    }
>  
>      status = _notmuch_database_link_message_to_parents (notmuch, message,
>  							message_file,
> @@ -2079,6 +2134,7 @@ notmuch_database_add_message (notmuch_database_t *notmuch,
>      notmuch_message_t *message = NULL;
>      notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, ret2;
>      notmuch_private_status_t private_status;
> +    notmuch_bool_t is_ghost = false;
>  
>      const char *date, *header;
>      const char *from, *to, *subject;
> @@ -2171,12 +2227,20 @@ notmuch_database_add_message (notmuch_database_t *notmuch,
>  
>  	_notmuch_message_add_filename (message, filename);
>  
> -	/* Is this a newly created message object? */
> -	if (private_status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) {
> +	/* Is this a newly created message object or a ghost
> +	 * message?  We have to be slightly careful: if this is a
> +	 * blank message, it's not safe to call
> +	 * notmuch_message_get_flag yet. */
> +	if (private_status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND ||
> +	    (is_ghost = notmuch_message_get_flag (
> +		message, NOTMUCH_MESSAGE_FLAG_GHOST))) {
>  	    _notmuch_message_add_term (message, "type", "mail");
> +	    if (is_ghost)
> +		/* Convert ghost message to a regular message */
> +		_notmuch_message_remove_term (message, "type", "ghost");
>  
>  	    ret = _notmuch_database_link_message (notmuch, message,
> -						  message_file);
> +						  message_file, is_ghost);
>  	    if (ret)
>  		goto DONE;
>  
> -- 
> 2.1.0
>
> _______________________________________________
> notmuch mailing list
> notmuch@notmuchmail.org
> http://notmuchmail.org/mailman/listinfo/notmuch

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 12/12] lib: Remove unnecessary thread linking steps when using ghost messages
  2014-10-06 23:17 ` [PATCH v2 12/12] lib: Remove unnecessary thread linking steps when using ghost messages Austin Clements
@ 2014-10-21 23:17   ` Mark Walters
  2014-10-22  1:51     ` Austin Clements
  0 siblings, 1 reply; 21+ messages in thread
From: Mark Walters @ 2014-10-21 23:17 UTC (permalink / raw)
  To: Austin Clements, notmuch

On Tue, 07 Oct 2014, Austin Clements <aclements@csail.mit.edu> wrote:
> From: Austin Clements <amdragon@mit.edu>
>
> Previously, it was necessary to link new messages to children to work
> around some (though not all) problems with the old metadata-based
> approach to stored thread IDs.  With ghost messages, this is no longer
> necessary, so don't bother with child linking when ghost messages are
> in use.
> ---
>  lib/database.cc | 21 +++++++++++++++++----
>  1 file changed, 17 insertions(+), 4 deletions(-)
>
> diff --git a/lib/database.cc b/lib/database.cc
> index 1316529..6e51a72 100644
> --- a/lib/database.cc
> +++ b/lib/database.cc
> @@ -2169,10 +2169,23 @@ _notmuch_database_link_message (notmuch_database_t *notmuch,
>      if (status)
>  	goto DONE;
>  
> -    status = _notmuch_database_link_message_to_children (notmuch, message,
> -							 &thread_id);
> -    if (status)
> -	goto DONE;
> +    if (! (notmuch->features & NOTMUCH_FEATURE_GHOSTS)) {
> +	/* In general, it shouldn't be necessary to link children,
> +	 * since the earlier indexing of those children will have
> +	 * stored a thread ID for the missing parent.  However, prior
> +	 * to ghost messages, these stored thread IDs were NOT
> +	 * rewritten during thread merging (and there was no
> +	 * performant way to do so), so if indexed children were
> +	 * pulled into a different thread ID by a merge, it was
> +	 * necessary to pull them *back* into the stored thread ID of
> +	 * the parent.  With ghost messages, we just rewrite the
> +	 * stored thread IDs during merging, so this workaround isn't
> +	 * necessary. */
> +	status = _notmuch_database_link_message_to_children (notmuch, message,
> +							     &thread_id);
> +	if (status)
> +	    goto DONE;
> +    }

Ok so this basically answers my earlier comment. It might be worth
updating the big comment at the start of the function to match the new
code though.

Best wishes

Mark

>  
>      /* If not part of any existing thread, generate a new thread ID. */
>      if (thread_id == NULL) {
> -- 
> 2.1.0
>
> _______________________________________________
> notmuch mailing list
> notmuch@notmuchmail.org
> http://notmuchmail.org/mailman/listinfo/notmuch

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 00/12] Add ghost messages and fix thread linking
  2014-10-06 23:17 [PATCH 00/12] Add ghost messages and fix thread linking Austin Clements
                   ` (11 preceding siblings ...)
  2014-10-06 23:17 ` [PATCH v2 12/12] lib: Remove unnecessary thread linking steps when using ghost messages Austin Clements
@ 2014-10-21 23:32 ` Mark Walters
  12 siblings, 0 replies; 21+ messages in thread
From: Mark Walters @ 2014-10-21 23:32 UTC (permalink / raw)
  To: Austin Clements, notmuch


On Tue, 07 Oct 2014, Austin Clements <aclements@csail.mit.edu> wrote:
> This is v2 of the
> id:1412345958-8278-1-git-send-email-aclements@csail.mit.edu.  This
> adds some comments and clarifies some code as suggested by David.
> Patch 6 is new in v2 and adds some bit-twiddling macros for clarity
> and robustness in later patches.

Ok I have now been through the whole series and am basically happy with
it (I sent some trivial comments separately). The tests pass, and my
database still seems to work.

So +1 from me but it is code I am not very familiar with so it's a
slightly more cautious +1 than usual.

Best wishes

Mark


>
> The diff from v1 is below.
>
> diff --git a/lib/database.cc b/lib/database.cc
> index 4655f59..6e51a72 100644
> --- a/lib/database.cc
> +++ b/lib/database.cc
> @@ -1277,6 +1277,8 @@ notmuch_database_upgrade (notmuch_database_t *notmuch,
>  	    ++total;
>      }
>      if (new_features & NOTMUCH_FEATURE_GHOSTS) {
> +	/* The ghost message upgrade converts all thread_id_*
> +	 * metadata values into ghost message documents. */
>  	t_end = db->metadata_keys_end ("thread_id_");
>  	for (t = db->metadata_keys_begin ("thread_id_"); t != t_end; ++t)
>  	    ++total;
> diff --git a/lib/message.cc b/lib/message.cc
> index ad832cf..a7a13cc 100644
> --- a/lib/message.cc
> +++ b/lib/message.cc
> @@ -344,15 +344,17 @@ _notmuch_message_ensure_metadata (notmuch_message_t *message)
>  
>      /* Get document type */
>      assert (strcmp (id_prefix, type_prefix) < 0);
> -    if (! (message->lazy_flags & (1 << NOTMUCH_MESSAGE_FLAG_GHOST))) {
> +    if (! NOTMUCH_TEST_BIT (message->lazy_flags, NOTMUCH_MESSAGE_FLAG_GHOST)) {
>  	i.skip_to (type_prefix);
> +	/* "T" is the prefix "type" fields.  See
> +	 * BOOLEAN_PREFIX_INTERNAL. */
>  	if (*i == "Tmail")
> -	    message->flags &= ~(1 << NOTMUCH_MESSAGE_FLAG_GHOST);
> +	    NOTMUCH_CLEAR_BIT (&message->flags, NOTMUCH_MESSAGE_FLAG_GHOST);
>  	else if (*i == "Tghost")
> -	    message->flags |= (1 << NOTMUCH_MESSAGE_FLAG_GHOST);
> +	    NOTMUCH_SET_BIT (&message->flags, NOTMUCH_MESSAGE_FLAG_GHOST);
>  	else
>  	    INTERNAL_ERROR ("Message without type term");
> -	message->lazy_flags |= (1 << NOTMUCH_MESSAGE_FLAG_GHOST);
> +	NOTMUCH_SET_BIT (&message->lazy_flags, NOTMUCH_MESSAGE_FLAG_GHOST);
>      }
>  
>      /* Get filename list.  Here we get only the terms.  We lazily
> @@ -390,8 +392,8 @@ _notmuch_message_invalidate_metadata (notmuch_message_t *message,
>      }
>  
>      if (strcmp ("type", prefix_name) == 0) {
> -	message->flags &= ~(1 << NOTMUCH_MESSAGE_FLAG_GHOST);
> -	message->lazy_flags &= ~(1 << NOTMUCH_MESSAGE_FLAG_GHOST);
> +	NOTMUCH_CLEAR_BIT (&message->flags, NOTMUCH_MESSAGE_FLAG_GHOST);
> +	NOTMUCH_CLEAR_BIT (&message->lazy_flags, NOTMUCH_MESSAGE_FLAG_GHOST);
>      }
>  
>      if (strcmp ("file-direntry", prefix_name) == 0) {
> @@ -893,10 +895,10 @@ notmuch_message_get_flag (notmuch_message_t *message,
>  			  notmuch_message_flag_t flag)
>  {
>      if (flag == NOTMUCH_MESSAGE_FLAG_GHOST &&
> -	! (message->lazy_flags & (1 << flag)))
> +	! NOTMUCH_TEST_BIT (message->lazy_flags, flag))
>  	_notmuch_message_ensure_metadata (message);
>  
> -    return message->flags & (1 << flag);
> +    return NOTMUCH_TEST_BIT (message->flags, flag);
>  }
>  
>  void
> @@ -904,10 +906,10 @@ notmuch_message_set_flag (notmuch_message_t *message,
>  			  notmuch_message_flag_t flag, notmuch_bool_t enable)
>  {
>      if (enable)
> -	message->flags |= (1 << flag);
> +	NOTMUCH_SET_BIT (&message->flags, flag);
>      else
> -	message->flags &= ~(1 << flag);
> -    message->lazy_flags |= (1 << flag);
> +	NOTMUCH_CLEAR_BIT (&message->flags, flag);
> +    NOTMUCH_SET_BIT (&message->lazy_flags, flag);
>  }
>  
>  time_t
> diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
> index 2fbd38e..2f43c1d 100644
> --- a/lib/notmuch-private.h
> +++ b/lib/notmuch-private.h
> @@ -63,6 +63,17 @@ NOTMUCH_BEGIN_DECLS
>  #define STRNCMP_LITERAL(var, literal) \
>      strncmp ((var), (literal), sizeof (literal) - 1)
>  
> +/* Robust bit test/set/reset macros */
> +#define NOTMUCH_TEST_BIT(val, bit) \
> +    ((bit < 0 || bit >= CHAR_BIT * sizeof (unsigned long long)) ? 0	\
> +     : !!((val) & (1ull << bit)))
> +#define NOTMUCH_SET_BIT(valp, bit) \
> +    ((bit < 0 || bit >= CHAR_BIT * sizeof (unsigned long long)) ? *(valp) \
> +     : (*(valp) |= (1ull << bit)))
> +#define NOTMUCH_CLEAR_BIT(valp,  bit) \
> +    ((bit < 0 || bit >= CHAR_BIT * sizeof (unsigned long long)) ? *(valp) \
> +     : (*(valp) &= ~(1ull << bit)))
> +
>  #define unused(x) x __attribute__ ((unused))
>  
>  #ifdef __cplusplus
>
>
> _______________________________________________
> notmuch mailing list
> notmuch@notmuchmail.org
> http://notmuchmail.org/mailman/listinfo/notmuch

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 07/12] lib: Internal support for querying and creating ghost messages
  2014-10-21 23:05   ` Mark Walters
@ 2014-10-22  1:33     ` Austin Clements
  0 siblings, 0 replies; 21+ messages in thread
From: Austin Clements @ 2014-10-22  1:33 UTC (permalink / raw)
  To: Mark Walters; +Cc: notmuch

Quoth Mark Walters on Oct 22 at 12:05 am:
> 
> Hi 
> 
> I am slowly working my way through this series: only two trivial queries
> so far.
> 
> On Tue, 07 Oct 2014, Austin Clements <aclements@csail.mit.edu> wrote:
> > From: Austin Clements <amdragon@mit.edu>
> >
> > This updates the message abstraction to support ghost messages: it
> > adds a message flag that distinguishes regular messages from ghost
> > messages, and an internal function for initializing a newly created
> > (blank) message as a ghost message.
> > ---
> >  lib/message.cc        | 52 +++++++++++++++++++++++++++++++++++++++++++++++++--
> >  lib/notmuch-private.h |  4 ++++
> >  lib/notmuch.h         |  9 ++++++++-
> >  3 files changed, 62 insertions(+), 3 deletions(-)
> >
> > diff --git a/lib/message.cc b/lib/message.cc
> > index 55d2ff6..a7a13cc 100644
> > --- a/lib/message.cc
> > +++ b/lib/message.cc
> > @@ -39,6 +39,9 @@ struct visible _notmuch_message {
> >      notmuch_message_file_t *message_file;
> >      notmuch_message_list_t *replies;
> >      unsigned long flags;
> > +    /* For flags that are initialized on-demand, lazy_flags indicates
> > +     * if each flag has been initialized. */
> > +    unsigned long lazy_flags;
> 
> I wonder if valid_flags might be better here as, as far as I can see,
> the reason for these is so we can invalidate a flag more than an
> optimisation (which is what I thought the lazy implied).

I do think of this as an optimization.  If we were to compute the
value of this flag when a message was created (and keep it
up-to-date), there would be no need for lazy_flags.  But, unlike the
other flags, computing this is expensive.

> >  
> >      Xapian::Document doc;
> >      Xapian::termcount termpos;
> > @@ -99,6 +102,7 @@ _notmuch_message_create_for_document (const void *talloc_owner,
> >  
> >      message->frozen = 0;
> >      message->flags = 0;
> > +    message->lazy_flags = 0;
> >  
> >      /* Each of these will be lazily created as needed. */
> >      message->message_id = NULL;
> > @@ -192,7 +196,7 @@ _notmuch_message_create (const void *talloc_owner,
> >   *
> >   *     There is already a document with message ID 'message_id' in the
> >   *     database. The returned message can be used to query/modify the
> > - *     document.
> > + *     document. The message may be a ghost message.
> >   *
> >   *   NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND:
> >   *
> > @@ -305,6 +309,7 @@ _notmuch_message_ensure_metadata (notmuch_message_t *message)
> >      const char *thread_prefix = _find_prefix ("thread"),
> >  	*tag_prefix = _find_prefix ("tag"),
> >  	*id_prefix = _find_prefix ("id"),
> > +	*type_prefix = _find_prefix ("type"),
> >  	*filename_prefix = _find_prefix ("file-direntry"),
> >  	*replyto_prefix = _find_prefix ("replyto");
> >  
> > @@ -337,10 +342,25 @@ _notmuch_message_ensure_metadata (notmuch_message_t *message)
> >  	message->message_id =
> >  	    _notmuch_message_get_term (message, i, end, id_prefix);
> >  
> > +    /* Get document type */
> > +    assert (strcmp (id_prefix, type_prefix) < 0);
> > +    if (! NOTMUCH_TEST_BIT (message->lazy_flags, NOTMUCH_MESSAGE_FLAG_GHOST)) {
> > +	i.skip_to (type_prefix);
> > +	/* "T" is the prefix "type" fields.  See
> > +	 * BOOLEAN_PREFIX_INTERNAL. */
> > +	if (*i == "Tmail")
> > +	    NOTMUCH_CLEAR_BIT (&message->flags, NOTMUCH_MESSAGE_FLAG_GHOST);
> > +	else if (*i == "Tghost")
> > +	    NOTMUCH_SET_BIT (&message->flags, NOTMUCH_MESSAGE_FLAG_GHOST);
> > +	else
> > +	    INTERNAL_ERROR ("Message without type term");
> > +	NOTMUCH_SET_BIT (&message->lazy_flags, NOTMUCH_MESSAGE_FLAG_GHOST);
> > +    }
> > +
> >      /* Get filename list.  Here we get only the terms.  We lazily
> >       * expand them to full file names when needed in
> >       * _notmuch_message_ensure_filename_list. */
> > -    assert (strcmp (id_prefix, filename_prefix) < 0);
> > +    assert (strcmp (type_prefix, filename_prefix) < 0);
> >      if (!message->filename_term_list && !message->filename_list)
> >  	message->filename_term_list =
> >  	    _notmuch_database_get_terms_with_prefix (message, i, end,
> > @@ -371,6 +391,11 @@ _notmuch_message_invalidate_metadata (notmuch_message_t *message,
> >  	message->tag_list = NULL;
> >      }
> >  
> > +    if (strcmp ("type", prefix_name) == 0) {
> > +	NOTMUCH_CLEAR_BIT (&message->flags, NOTMUCH_MESSAGE_FLAG_GHOST);
> > +	NOTMUCH_CLEAR_BIT (&message->lazy_flags, NOTMUCH_MESSAGE_FLAG_GHOST);
> > +    }
> > +
> >      if (strcmp ("file-direntry", prefix_name) == 0) {
> >  	talloc_free (message->filename_term_list);
> >  	talloc_free (message->filename_list);
> > @@ -869,6 +894,10 @@ notmuch_bool_t
> >  notmuch_message_get_flag (notmuch_message_t *message,
> >  			  notmuch_message_flag_t flag)
> >  {
> > +    if (flag == NOTMUCH_MESSAGE_FLAG_GHOST &&
> > +	! NOTMUCH_TEST_BIT (message->lazy_flags, flag))
> > +	_notmuch_message_ensure_metadata (message);
> > +
> >      return NOTMUCH_TEST_BIT (message->flags, flag);
> >  }
> >  
> > @@ -880,6 +909,7 @@ notmuch_message_set_flag (notmuch_message_t *message,
> >  	NOTMUCH_SET_BIT (&message->flags, flag);
> >      else
> >  	NOTMUCH_CLEAR_BIT (&message->flags, flag);
> > +    NOTMUCH_SET_BIT (&message->lazy_flags, flag);
> >  }
> >  
> >  time_t
> > @@ -989,6 +1019,24 @@ _notmuch_message_delete (notmuch_message_t *message)
> >      return NOTMUCH_STATUS_SUCCESS;
> >  }
> >  
> > +/* Transform a blank message into a ghost message.  The caller must
> > + * _notmuch_message_sync the message. */
> > +notmuch_private_status_t
> > +_notmuch_message_initialize_ghost (notmuch_message_t *message,
> > +				   const char *thread_id)
> > +{
> > +    notmuch_private_status_t status;
> > +
> > +    status = _notmuch_message_add_term (message, "type", "ghost");
> > +    if (status)
> > +	return status;
> > +    status = _notmuch_message_add_term (message, "thread", thread_id);
> > +    if (status)
> > +	return status;
> > +
> > +    return NOTMUCH_PRIVATE_STATUS_SUCCESS;
> > +}
> > +
> >  /* Ensure that 'message' is not holding any file object open. Future
> >   * calls to various functions will still automatically open the
> >   * message file as needed.
> > diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
> > index 7250291..2f43c1d 100644
> > --- a/lib/notmuch-private.h
> > +++ b/lib/notmuch-private.h
> > @@ -308,6 +308,10 @@ _notmuch_message_sync (notmuch_message_t *message);
> >  notmuch_status_t
> >  _notmuch_message_delete (notmuch_message_t *message);
> >  
> > +notmuch_private_status_t
> > +_notmuch_message_initialize_ghost (notmuch_message_t *message,
> > +				   const char *thread_id);
> > +
> >  void
> >  _notmuch_message_close (notmuch_message_t *message);
> >  
> > diff --git a/lib/notmuch.h b/lib/notmuch.h
> > index dae0416..92594b9 100644
> > --- a/lib/notmuch.h
> > +++ b/lib/notmuch.h
> > @@ -1221,7 +1221,14 @@ notmuch_message_get_filenames (notmuch_message_t *message);
> >   */
> >  typedef enum _notmuch_message_flag {
> >      NOTMUCH_MESSAGE_FLAG_MATCH,
> > -    NOTMUCH_MESSAGE_FLAG_EXCLUDED
> > +    NOTMUCH_MESSAGE_FLAG_EXCLUDED,
> > +
> > +    /* This message is a "ghost message", meaning it has no filenames
> > +     * or content, but we know it exists because it was referenced by
> > +     * some other message.  A ghost message has only a message ID and
> > +     * thread ID.
> > +     */
> 
> Can I check here: we are not allowing a ghost message to have any tags?

Correct, at least for now.

However, I think it would make *a lot* of sense to be able to pre-seed
ghost messages with tags.  nmbug could use this to avoid races with
receiving messages.  Distributed tag sync could use it similarly.
Insert could use it to eliminate the nasty races between storing the
message, indexing it, and tagging it.  Restore could potentially use
it.  When sending messages, we could pre-seed a sent tag for when the
sent message is re-received (though insert may obviate that).  I'm
sure there are other uses I haven't thought of.

This requires some new APIs, since there's currently no way for a
library user to create a ghost message or get at it to tag it.  It
also slightly complicates notmuch_database_get_all_tags since that
probably shouldn't return tags that are only on ghost messages (I
think if we just collect all the docids in the Tghost posting list and
use that to filter out tag terms that there should be almost no
performance impact of this).  But these are both quite doable things.

A more complicated question is what we want to do with deleted
messages.  Currently we remove them entirely from the database, but we
*could* keep around their tags so if the message reappears (e.g.,
there was a transient problem) we can bring back the tags.  After
thinking about this a great deal, I concluded we should just continue
deleting them from the database (or, at most, strip the message back
down to its thread ID).  If anyone's curious, I can write up my
thoughts on this, but it boils down to complicated the semantics of
initial tagging and dump/restore.

> Best wishes
> 
> Mark
> 
> > +    NOTMUCH_MESSAGE_FLAG_GHOST,
> >  } notmuch_message_flag_t;
> >  
> >  /**
> >
> > _______________________________________________
> > notmuch mailing list
> > notmuch@notmuchmail.org
> > http://notmuchmail.org/mailman/listinfo/notmuch

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 08/12] lib: Implement ghost-based thread linking
  2014-10-21 23:10   ` Mark Walters
@ 2014-10-22  1:49     ` Austin Clements
  0 siblings, 0 replies; 21+ messages in thread
From: Austin Clements @ 2014-10-22  1:49 UTC (permalink / raw)
  To: Mark Walters; +Cc: notmuch

Quoth Mark Walters on Oct 22 at 12:10 am:
> On Tue, 07 Oct 2014, Austin Clements <aclements@csail.mit.edu> wrote:
> > From: Austin Clements <amdragon@mit.edu>
> >
> > This updates the thread linking code to use ghost messages instead of
> > user metadata to link messages into threads.
> >
> > In contrast with the old approach, this is actually correct.
> > Previously, thread merging updated only the thread IDs of message
> > documents, not thread IDs stored in user metadata.  As originally
> > diagnosed by Mark Walters [1] and as demonstrated by the broken
> > T260-thread-order test, this can cause notmuch to fail to link
> > messages even though they're in the same thread.  In principle the old
> > approach could have been fixed by updating the user metadata thread
> > IDs as well, but these are not indexed and hence this would have
> > required a full scan of all stored thread IDs.  Ghost messages solve
> > this problem naturally by reusing the exact same thread ID and message
> > ID representation and indexing as regular messages.
> >
> > Furthermore, thanks to this greater symmetry, ghost messages are also
> > algorithmically simpler.  We continue to support the old user metadata
> > format, so this patch can't delete any code, but when we do remove
> > support for the old format, several functions can simply be deleted.
> >
> > [1] id:8738h7kv2q.fsf@qmul.ac.uk
> > ---
> >  lib/database.cc | 86 +++++++++++++++++++++++++++++++++++++++++++++++++--------
> >  1 file changed, 75 insertions(+), 11 deletions(-)
> >
> > diff --git a/lib/database.cc b/lib/database.cc
> > index c641bcd..fdcc526 100644
> > --- a/lib/database.cc
> > +++ b/lib/database.cc
> > @@ -1752,6 +1752,12 @@ _get_metadata_thread_id_key (void *ctx, const char *message_id)
> >  			    message_id);
> >  }
> >  
> > +static notmuch_status_t
> > +_resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch,
> > +				      void *ctx,
> > +				      const char *message_id,
> > +				      const char **thread_id_ret);
> > +
> >  /* Find the thread ID to which the message with 'message_id' belongs.
> >   *
> >   * Note: 'thread_id_ret' must not be NULL!
> > @@ -1760,9 +1766,9 @@ _get_metadata_thread_id_key (void *ctx, const char *message_id)
> >   *
> >   * Note: If there is no message in the database with the given
> >   * 'message_id' then a new thread_id will be allocated for this
> > - * message and stored in the database metadata, (where this same
> > + * message ID and stored in the database metadata so that the
> >   * thread ID can be looked up if the message is added to the database
> > - * later).
> > + * later.
> >   */
> >  static notmuch_status_t
> >  _resolve_message_id_to_thread_id (notmuch_database_t *notmuch,
> > @@ -1770,6 +1776,49 @@ _resolve_message_id_to_thread_id (notmuch_database_t *notmuch,
> >  				  const char *message_id,
> >  				  const char **thread_id_ret)
> >  {
> > +    notmuch_private_status_t status;
> > +    notmuch_message_t *message;
> > +
> > +    if (! (notmuch->features & NOTMUCH_FEATURE_GHOSTS))
> > +	return _resolve_message_id_to_thread_id_old (notmuch, ctx, message_id,
> > +						     thread_id_ret);
> > +
> > +    /* Look for this message (regular or ghost) */
> > +    message = _notmuch_message_create_for_message_id (
> > +	notmuch, message_id, &status);
> > +    if (status == NOTMUCH_PRIVATE_STATUS_SUCCESS) {
> > +	/* Message exists */
> > +	*thread_id_ret = talloc_steal (
> > +	    ctx, notmuch_message_get_thread_id (message));
> > +    } else if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) {
> > +	/* Message did not exist.  Give it a fresh thread ID and
> > +	 * populate this message as a ghost message. */
> > +	*thread_id_ret = talloc_strdup (
> > +	    ctx, _notmuch_database_generate_thread_id (notmuch));
> > +	if (! *thread_id_ret) {
> > +	    status = NOTMUCH_PRIVATE_STATUS_OUT_OF_MEMORY;
> > +	} else {
> > +	    status = _notmuch_message_initialize_ghost (message, *thread_id_ret);
> > +	    if (status == 0)
> > +		/* Commit the new ghost message */
> > +		_notmuch_message_sync (message);
> > +	}
> > +    } else {
> > +	/* Create failed. Fall through. */
> > +    }
> > +
> > +    notmuch_message_destroy (message);
> > +
> > +    return COERCE_STATUS (status, "Error creating ghost message");
> > +}
> > +
> > +/* Pre-ghost messages _resolve_message_id_to_thread_id */
> > +static notmuch_status_t
> > +_resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch,
> > +				      void *ctx,
> > +				      const char *message_id,
> > +				      const char **thread_id_ret)
> > +{
> >      notmuch_status_t status;
> >      notmuch_message_t *message;
> >      string thread_id_string;
> > @@ -2007,7 +2056,7 @@ _consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch,
> >      }
> >  }
> >  
> > -/* Given a (mostly empty) 'message' and its corresponding
> > +/* Given a blank or ghost 'message' and its corresponding
> >   * 'message_file' link it to existing threads in the database.
> >   *
> >   * The first check is in the metadata of the database to see if we
> 
> There is quite a lot of comment below this and it is not clear to me how
> much of it applies to the is_ghost case. In particular does the 
> 
>   * Finally, we look in the database for existing message that
>   * reference 'message'.
> 
> still apply? I would have thought that we would already have done that
> in the is_ghost case. 

Good point.  The comment isn't really wrong, but it isn't right
either.  How about

diff --git a/lib/database.cc b/lib/database.cc
index 6e51a72..d063ec8 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -2121,10 +2121,13 @@ _consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch,
 /* Given a blank or ghost 'message' and its corresponding
  * 'message_file' link it to existing threads in the database.
  *
- * The first check is in the metadata of the database to see if we
- * have pre-allocated a thread_id in advance for this message, (which
- * would have happened if a message was previously added that
- * referenced this one).
+ * First, if is_ghost, this retrieves the thread ID already stored in
+ * the message (which will be the case if a message was previously
+ * added that referenced this one).  If the message is blank
+ * (!is_ghost), it doesn't have a thread ID yet (we'll generate one
+ * later in this function).  If the database does not support ghost
+ * messages, this checks for a thread ID stored in database metadata
+ * for this message ID.
  *
  * Second, we look at 'message_file' and its link-relevant headers
  * (References and In-Reply-To) for message IDs.
@@ -2132,7 +2135,7 @@ _consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch,
  * Finally, we look in the database for existing message that
  * reference 'message'.
  *
- * In all cases, we assign to the current message the first thread_id
+ * In all cases, we assign to the current message the first thread ID
  * found (through either parent or child). We will also merge any
  * existing, distinct threads where this message belongs to both,
  * (which is not uncommon when messages are processed out of order).

?

> Of course this also applies to the actual code which seems to call
> _notmuch_database_link_message_to_children unconditionally. It might be
> worth a comment in any case.
> 
> Best wishes
> 
> Mark
> 
> 
> > @@ -2035,16 +2084,22 @@ _consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch,
> >  static notmuch_status_t
> >  _notmuch_database_link_message (notmuch_database_t *notmuch,
> >  				notmuch_message_t *message,
> > -				notmuch_message_file_t *message_file)
> > +				notmuch_message_file_t *message_file,
> > +				notmuch_bool_t is_ghost)
> >  {
> >      void *local = talloc_new (NULL);
> >      notmuch_status_t status;
> > -    const char *thread_id;
> > +    const char *thread_id = NULL;
> >  
> >      /* Check if the message already had a thread ID */
> > -    thread_id = _consume_metadata_thread_id (local, notmuch, message);
> > -    if (thread_id)
> > -	_notmuch_message_add_term (message, "thread", thread_id);
> > +    if (notmuch->features & NOTMUCH_FEATURE_GHOSTS) {
> > +	if (is_ghost)
> > +	    thread_id = notmuch_message_get_thread_id (message);
> > +    } else {
> > +	thread_id = _consume_metadata_thread_id (local, notmuch, message);
> > +	if (thread_id)
> > +	    _notmuch_message_add_term (message, "thread", thread_id);
> > +    }
> >  
> >      status = _notmuch_database_link_message_to_parents (notmuch, message,
> >  							message_file,
> > @@ -2079,6 +2134,7 @@ notmuch_database_add_message (notmuch_database_t *notmuch,
> >      notmuch_message_t *message = NULL;
> >      notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, ret2;
> >      notmuch_private_status_t private_status;
> > +    notmuch_bool_t is_ghost = false;
> >  
> >      const char *date, *header;
> >      const char *from, *to, *subject;
> > @@ -2171,12 +2227,20 @@ notmuch_database_add_message (notmuch_database_t *notmuch,
> >  
> >  	_notmuch_message_add_filename (message, filename);
> >  
> > -	/* Is this a newly created message object? */
> > -	if (private_status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) {
> > +	/* Is this a newly created message object or a ghost
> > +	 * message?  We have to be slightly careful: if this is a
> > +	 * blank message, it's not safe to call
> > +	 * notmuch_message_get_flag yet. */
> > +	if (private_status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND ||
> > +	    (is_ghost = notmuch_message_get_flag (
> > +		message, NOTMUCH_MESSAGE_FLAG_GHOST))) {
> >  	    _notmuch_message_add_term (message, "type", "mail");
> > +	    if (is_ghost)
> > +		/* Convert ghost message to a regular message */
> > +		_notmuch_message_remove_term (message, "type", "ghost");
> >  
> >  	    ret = _notmuch_database_link_message (notmuch, message,
> > -						  message_file);
> > +						  message_file, is_ghost);
> >  	    if (ret)
> >  		goto DONE;
> >  
> >
> > _______________________________________________
> > notmuch mailing list
> > notmuch@notmuchmail.org
> > http://notmuchmail.org/mailman/listinfo/notmuch

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 12/12] lib: Remove unnecessary thread linking steps when using ghost messages
  2014-10-21 23:17   ` Mark Walters
@ 2014-10-22  1:51     ` Austin Clements
  0 siblings, 0 replies; 21+ messages in thread
From: Austin Clements @ 2014-10-22  1:51 UTC (permalink / raw)
  To: Mark Walters; +Cc: notmuch

Quoth Mark Walters on Oct 22 at 12:17 am:
> On Tue, 07 Oct 2014, Austin Clements <aclements@csail.mit.edu> wrote:
> > From: Austin Clements <amdragon@mit.edu>
> >
> > Previously, it was necessary to link new messages to children to work
> > around some (though not all) problems with the old metadata-based
> > approach to stored thread IDs.  With ghost messages, this is no longer
> > necessary, so don't bother with child linking when ghost messages are
> > in use.
> > ---
> >  lib/database.cc | 21 +++++++++++++++++----
> >  1 file changed, 17 insertions(+), 4 deletions(-)
> >
> > diff --git a/lib/database.cc b/lib/database.cc
> > index 1316529..6e51a72 100644
> > --- a/lib/database.cc
> > +++ b/lib/database.cc
> > @@ -2169,10 +2169,23 @@ _notmuch_database_link_message (notmuch_database_t *notmuch,
> >      if (status)
> >  	goto DONE;
> >  
> > -    status = _notmuch_database_link_message_to_children (notmuch, message,
> > -							 &thread_id);
> > -    if (status)
> > -	goto DONE;
> > +    if (! (notmuch->features & NOTMUCH_FEATURE_GHOSTS)) {
> > +	/* In general, it shouldn't be necessary to link children,
> > +	 * since the earlier indexing of those children will have
> > +	 * stored a thread ID for the missing parent.  However, prior
> > +	 * to ghost messages, these stored thread IDs were NOT
> > +	 * rewritten during thread merging (and there was no
> > +	 * performant way to do so), so if indexed children were
> > +	 * pulled into a different thread ID by a merge, it was
> > +	 * necessary to pull them *back* into the stored thread ID of
> > +	 * the parent.  With ghost messages, we just rewrite the
> > +	 * stored thread IDs during merging, so this workaround isn't
> > +	 * necessary. */
> > +	status = _notmuch_database_link_message_to_children (notmuch, message,
> > +							     &thread_id);
> > +	if (status)
> > +	    goto DONE;
> > +    }
> 
> Ok so this basically answers my earlier comment. It might be worth
> updating the big comment at the start of the function to match the new
> code though.

Like so?

diff --git a/lib/database.cc b/lib/database.cc
index d063ec8..3601f9d 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -2136,11 +2136,11 @@ _consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch,
  * reference 'message'.
  *
  * In all cases, we assign to the current message the first thread ID
- * found (through either parent or child). We will also merge any
- * existing, distinct threads where this message belongs to both,
- * (which is not uncommon when messages are processed out of order).
+ * found. We will also merge any existing, distinct threads where this
+ * message belongs to both, (which is not uncommon when messages are
+ * processed out of order).
  *
- * Finally, if no thread ID has been found through parent or child, we
+ * Finally, if no thread ID has been found through referenced messages, we
  * call _notmuch_message_generate_thread_id to generate a new thread
  * ID. This should only happen for new, top-level messages, (no
  * References or In-Reply-To header in this message, and no previously


> Best wishes
> 
> Mark
> 
> >  
> >      /* If not part of any existing thread, generate a new thread ID. */
> >      if (thread_id == NULL) {
> >
> > _______________________________________________
> > notmuch mailing list
> > notmuch@notmuchmail.org
> > http://notmuchmail.org/mailman/listinfo/notmuch

^ permalink raw reply related	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2014-10-22  1:51 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-10-06 23:17 [PATCH 00/12] Add ghost messages and fix thread linking Austin Clements
2014-10-06 23:17 ` [PATCH v2 01/12] lib: Move message ID compression to _notmuch_message_create_for_message_id Austin Clements
2014-10-06 23:17 ` [PATCH v2 02/12] lib: Refactor _notmuch_database_link_message Austin Clements
2014-10-06 23:17 ` [PATCH v2 03/12] lib: Handle empty date value Austin Clements
2014-10-11  5:12   ` David Bremner
2014-10-06 23:17 ` [PATCH v2 04/12] lib: Add a ghost messages database feature Austin Clements
2014-10-06 23:17 ` [PATCH v2 05/12] lib: Update database schema doc for ghost messages Austin Clements
2014-10-06 23:17 ` [PATCH v2 06/12] lib: Introduce macros for bit operations Austin Clements
2014-10-06 23:17 ` [PATCH v2 07/12] lib: Internal support for querying and creating ghost messages Austin Clements
2014-10-21 23:05   ` Mark Walters
2014-10-22  1:33     ` Austin Clements
2014-10-06 23:17 ` [PATCH v2 08/12] lib: Implement ghost-based thread linking Austin Clements
2014-10-21 23:10   ` Mark Walters
2014-10-22  1:49     ` Austin Clements
2014-10-06 23:17 ` [PATCH v2 09/12] lib: Implement upgrade to ghost messages feature Austin Clements
2014-10-06 23:17 ` [PATCH v2 10/12] lib: Enable " Austin Clements
2014-10-06 23:17 ` [PATCH v2 11/12] test: Test upgrade to " Austin Clements
2014-10-06 23:17 ` [PATCH v2 12/12] lib: Remove unnecessary thread linking steps when using ghost messages Austin Clements
2014-10-21 23:17   ` Mark Walters
2014-10-22  1:51     ` Austin Clements
2014-10-21 23:32 ` [PATCH 00/12] Add ghost messages and fix thread linking Mark Walters

Code repositories for project(s) associated with this public inbox

	https://yhetil.org/notmuch.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).