From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by arlo.cworth.org (Postfix) with ESMTP id F02296DE0BA2 for ; Sun, 4 Jun 2017 05:33:07 -0700 (PDT) X-Virus-Scanned: Debian amavisd-new at cworth.org X-Spam-Flag: NO X-Spam-Score: -0.001 X-Spam-Level: X-Spam-Status: No, score=-0.001 tagged_above=-999 required=5 tests=[AWL=0.010, SPF_PASS=-0.001, T_RP_MATCHES_RCVD=-0.01] autolearn=disabled Received: from arlo.cworth.org ([127.0.0.1]) by localhost (arlo.cworth.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id Vz5AvI5yKwXA for ; Sun, 4 Jun 2017 05:33:06 -0700 (PDT) Received: from fethera.tethera.net (fethera.tethera.net [198.245.60.197]) by arlo.cworth.org (Postfix) with ESMTPS id 8F0646DE0A89 for ; Sun, 4 Jun 2017 05:32:53 -0700 (PDT) Received: from remotemail by fethera.tethera.net with local (Exim 4.84_2) (envelope-from ) id 1dHUhG-0004ad-M9; Sun, 04 Jun 2017 08:31:54 -0400 Received: (nullmailer pid 24523 invoked by uid 1000); Sun, 04 Jun 2017 12:32:38 -0000 From: David Bremner To: notmuch@freelists.org, notmuch@notmuchmail.org Subject: [patch v3 01/12] lib: isolate n_d_add_message and helper functions into own file Date: Sun, 4 Jun 2017 09:32:24 -0300 Message-Id: <20170604123235.24466-2-david@tethera.net> X-Mailer: git-send-email 2.11.0 In-Reply-To: <20170604123235.24466-1-david@tethera.net> References: <20170604123235.24466-1-david@tethera.net> X-BeenThere: notmuch@notmuchmail.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: "Use and development of the notmuch mail system." List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 04 Jun 2017 12:33:08 -0000 'database.cc' is becoming a monster, and it's hard to follow what the various static functions are used for. It turns out that about 1/3 of this file notmuch_database_add_message and helper functions not used by any other function. This commit isolates this code into it's own file. Some side effects of this refactoring: - find_doc_ids becomes the non-static (but still private) _notmuch_database_find_doc_ids - a few instances of 'string' have 'std::' prepended, avoiding the need for 'using namespace std;' in the new file. --- lib/Makefile.local | 1 + lib/add-message.cc | 721 ++++++++++++++++++++++++++++++++++++++++++++++++ lib/database-private.h | 10 + lib/database.cc | 732 +------------------------------------------------ 4 files changed, 739 insertions(+), 725 deletions(-) create mode 100644 lib/add-message.cc diff --git a/lib/Makefile.local b/lib/Makefile.local index bf6e0649..9dd68286 100644 --- a/lib/Makefile.local +++ b/lib/Makefile.local @@ -50,6 +50,7 @@ libnotmuch_cxx_srcs = \ $(dir)/directory.cc \ $(dir)/index.cc \ $(dir)/message.cc \ + $(dir)/add-message.cc \ $(dir)/message-property.cc \ $(dir)/query.cc \ $(dir)/query-fp.cc \ diff --git a/lib/add-message.cc b/lib/add-message.cc new file mode 100644 index 00000000..5fe2c45b --- /dev/null +++ b/lib/add-message.cc @@ -0,0 +1,721 @@ +#include "database-private.h" + +/* Advance 'str' past any whitespace or RFC 822 comments. A comment is + * a (potentially nested) parenthesized sequence with '\' used to + * escape any character (including parentheses). + * + * If the sequence to be skipped continues to the end of the string, + * then 'str' will be left pointing at the final terminating '\0' + * character. + */ +static void +skip_space_and_comments (const char **str) +{ + const char *s; + + s = *str; + while (*s && (isspace (*s) || *s == '(')) { + while (*s && isspace (*s)) + s++; + if (*s == '(') { + int nesting = 1; + s++; + while (*s && nesting) { + if (*s == '(') { + nesting++; + } else if (*s == ')') { + nesting--; + } else if (*s == '\\') { + if (*(s+1)) + s++; + } + s++; + } + } + } + + *str = s; +} + +/* Parse an RFC 822 message-id, discarding whitespace, any RFC 822 + * comments, and the '<' and '>' delimiters. + * + * If not NULL, then *next will be made to point to the first character + * not parsed, (possibly pointing to the final '\0' terminator. + * + * Returns a newly talloc'ed string belonging to 'ctx'. + * + * Returns NULL if there is any error parsing the message-id. */ +static char * +_parse_message_id (void *ctx, const char *message_id, const char **next) +{ + const char *s, *end; + char *result; + + if (message_id == NULL || *message_id == '\0') + return NULL; + + s = message_id; + + skip_space_and_comments (&s); + + /* Skip any unstructured text as well. */ + while (*s && *s != '<') + s++; + + if (*s == '<') { + s++; + } else { + if (next) + *next = s; + return NULL; + } + + skip_space_and_comments (&s); + + end = s; + while (*end && *end != '>') + end++; + if (next) { + if (*end) + *next = end + 1; + else + *next = end; + } + + if (end > s && *end == '>') + end--; + if (end <= s) + return NULL; + + result = talloc_strndup (ctx, s, end - s + 1); + + /* Finally, collapse any whitespace that is within the message-id + * itself. */ + { + char *r; + int len; + + for (r = result, len = strlen (r); *r; r++, len--) + if (*r == ' ' || *r == '\t') + memmove (r, r+1, len); + } + + return result; +} + +/* Parse a References header value, putting a (talloc'ed under 'ctx') + * copy of each referenced message-id into 'hash'. + * + * We explicitly avoid including any reference identical to + * 'message_id' in the result (to avoid mass confusion when a single + * message references itself cyclically---and yes, mail messages are + * not infrequent in the wild that do this---don't ask me why). + * + * Return the last reference parsed, if it is not equal to message_id. + */ +static char * +parse_references (void *ctx, + const char *message_id, + GHashTable *hash, + const char *refs) +{ + char *ref, *last_ref = NULL; + + if (refs == NULL || *refs == '\0') + return NULL; + + while (*refs) { + ref = _parse_message_id (ctx, refs, &refs); + + if (ref && strcmp (ref, message_id)) { + g_hash_table_add (hash, ref); + last_ref = ref; + } + } + + /* The return value of this function is used to add a parent + * reference to the database. We should avoid making a message + * its own parent, thus the above check. + */ + return talloc_strdup(ctx, last_ref); +} + +static const char * +_notmuch_database_generate_thread_id (notmuch_database_t *notmuch) +{ + /* 16 bytes (+ terminator) for hexadecimal representation of + * a 64-bit integer. */ + static char thread_id[17]; + Xapian::WritableDatabase *db; + + db = static_cast (notmuch->xapian_db); + + notmuch->last_thread_id++; + + sprintf (thread_id, "%016" PRIx64, notmuch->last_thread_id); + + db->set_metadata ("last_thread_id", thread_id); + + return thread_id; +} + +static char * +_get_metadata_thread_id_key (void *ctx, const char *message_id) +{ + if (strlen (message_id) > NOTMUCH_MESSAGE_ID_MAX) + message_id = _notmuch_message_id_compressed (ctx, message_id); + + return talloc_asprintf (ctx, NOTMUCH_METADATA_THREAD_ID_PREFIX "%s", + message_id); +} + + +static notmuch_status_t +_resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch, + void *ctx, + const char *message_id, + const char **thread_id_ret); + + +/* Find the thread ID to which the message with 'message_id' belongs. + * + * Note: 'thread_id_ret' must not be NULL! + * On success '*thread_id_ret' is set to a newly talloced string belonging to + * 'ctx'. + * + * Note: If there is no message in the database with the given + * 'message_id' then a new thread_id will be allocated for this + * message ID and stored in the database metadata so that the + * thread ID can be looked up if the message is added to the database + * later. + */ +static notmuch_status_t +_resolve_message_id_to_thread_id (notmuch_database_t *notmuch, + void *ctx, + const char *message_id, + const char **thread_id_ret) +{ + notmuch_private_status_t status; + notmuch_message_t *message; + + if (! (notmuch->features & NOTMUCH_FEATURE_GHOSTS)) + return _resolve_message_id_to_thread_id_old (notmuch, ctx, message_id, + thread_id_ret); + + /* Look for this message (regular or ghost) */ + message = _notmuch_message_create_for_message_id ( + notmuch, message_id, &status); + if (status == NOTMUCH_PRIVATE_STATUS_SUCCESS) { + /* Message exists */ + *thread_id_ret = talloc_steal ( + ctx, notmuch_message_get_thread_id (message)); + } else if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) { + /* Message did not exist. Give it a fresh thread ID and + * populate this message as a ghost message. */ + *thread_id_ret = talloc_strdup ( + ctx, _notmuch_database_generate_thread_id (notmuch)); + if (! *thread_id_ret) { + status = NOTMUCH_PRIVATE_STATUS_OUT_OF_MEMORY; + } else { + status = _notmuch_message_initialize_ghost (message, *thread_id_ret); + if (status == 0) + /* Commit the new ghost message */ + _notmuch_message_sync (message); + } + } else { + /* Create failed. Fall through. */ + } + + notmuch_message_destroy (message); + + return COERCE_STATUS (status, "Error creating ghost message"); +} + +/* Pre-ghost messages _resolve_message_id_to_thread_id */ +static notmuch_status_t +_resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch, + void *ctx, + const char *message_id, + const char **thread_id_ret) +{ + notmuch_status_t status; + notmuch_message_t *message; + std::string thread_id_string; + char *metadata_key; + Xapian::WritableDatabase *db; + + status = notmuch_database_find_message (notmuch, message_id, &message); + + if (status) + return status; + + if (message) { + *thread_id_ret = talloc_steal (ctx, + notmuch_message_get_thread_id (message)); + + notmuch_message_destroy (message); + + return NOTMUCH_STATUS_SUCCESS; + } + + /* Message has not been seen yet. + * + * We may have seen a reference to it already, in which case, we + * can return the thread ID stored in the metadata. Otherwise, we + * generate a new thread ID and store it there. + */ + db = static_cast (notmuch->xapian_db); + metadata_key = _get_metadata_thread_id_key (ctx, message_id); + thread_id_string = notmuch->xapian_db->get_metadata (metadata_key); + + if (thread_id_string.empty()) { + *thread_id_ret = talloc_strdup (ctx, + _notmuch_database_generate_thread_id (notmuch)); + db->set_metadata (metadata_key, *thread_id_ret); + } else { + *thread_id_ret = talloc_strdup (ctx, thread_id_string.c_str()); + } + + talloc_free (metadata_key); + + return NOTMUCH_STATUS_SUCCESS; +} + +static notmuch_status_t +_merge_threads (notmuch_database_t *notmuch, + const char *winner_thread_id, + const char *loser_thread_id) +{ + Xapian::PostingIterator loser, loser_end; + notmuch_message_t *message = NULL; + notmuch_private_status_t private_status; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; + + _notmuch_database_find_doc_ids (notmuch, "thread", loser_thread_id, &loser, &loser_end); + + for ( ; loser != loser_end; loser++) { + message = _notmuch_message_create (notmuch, notmuch, + *loser, &private_status); + if (message == NULL) { + ret = COERCE_STATUS (private_status, + "Cannot find document for doc_id from query"); + goto DONE; + } + + _notmuch_message_remove_term (message, "thread", loser_thread_id); + _notmuch_message_add_term (message, "thread", winner_thread_id); + _notmuch_message_sync (message); + + notmuch_message_destroy (message); + message = NULL; + } + + DONE: + if (message) + notmuch_message_destroy (message); + + return ret; +} + +static void +_my_talloc_free_for_g_hash (void *ptr) +{ + talloc_free (ptr); +} + +static notmuch_status_t +_notmuch_database_link_message_to_parents (notmuch_database_t *notmuch, + notmuch_message_t *message, + notmuch_message_file_t *message_file, + const char **thread_id) +{ + GHashTable *parents = NULL; + const char *refs, *in_reply_to, *in_reply_to_message_id; + const char *last_ref_message_id, *this_message_id; + GList *l, *keys = NULL; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; + + parents = g_hash_table_new_full (g_str_hash, g_str_equal, + _my_talloc_free_for_g_hash, NULL); + this_message_id = notmuch_message_get_message_id (message); + + refs = _notmuch_message_file_get_header (message_file, "references"); + last_ref_message_id = parse_references (message, + this_message_id, + parents, refs); + + in_reply_to = _notmuch_message_file_get_header (message_file, "in-reply-to"); + in_reply_to_message_id = parse_references (message, + this_message_id, + parents, in_reply_to); + + /* For the parent of this message, use the last message ID of the + * References header, if available. If not, fall back to the + * first message ID in the In-Reply-To header. */ + if (last_ref_message_id) { + _notmuch_message_add_term (message, "replyto", + last_ref_message_id); + } else if (in_reply_to_message_id) { + _notmuch_message_add_term (message, "replyto", + in_reply_to_message_id); + } + + keys = g_hash_table_get_keys (parents); + for (l = keys; l; l = l->next) { + char *parent_message_id; + const char *parent_thread_id = NULL; + + parent_message_id = (char *) l->data; + + _notmuch_message_add_term (message, "reference", + parent_message_id); + + ret = _resolve_message_id_to_thread_id (notmuch, + message, + parent_message_id, + &parent_thread_id); + if (ret) + goto DONE; + + if (*thread_id == NULL) { + *thread_id = talloc_strdup (message, parent_thread_id); + _notmuch_message_add_term (message, "thread", *thread_id); + } else if (strcmp (*thread_id, parent_thread_id)) { + ret = _merge_threads (notmuch, *thread_id, parent_thread_id); + if (ret) + goto DONE; + } + } + + DONE: + if (keys) + g_list_free (keys); + if (parents) + g_hash_table_unref (parents); + + return ret; +} + +static notmuch_status_t +_notmuch_database_link_message_to_children (notmuch_database_t *notmuch, + notmuch_message_t *message, + const char **thread_id) +{ + const char *message_id = notmuch_message_get_message_id (message); + Xapian::PostingIterator child, children_end; + notmuch_message_t *child_message = NULL; + const char *child_thread_id; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; + notmuch_private_status_t private_status; + + _notmuch_database_find_doc_ids (notmuch, "reference", message_id, &child, &children_end); + + for ( ; child != children_end; child++) { + + child_message = _notmuch_message_create (message, notmuch, + *child, &private_status); + if (child_message == NULL) { + ret = COERCE_STATUS (private_status, + "Cannot find document for doc_id from query"); + goto DONE; + } + + child_thread_id = notmuch_message_get_thread_id (child_message); + if (*thread_id == NULL) { + *thread_id = talloc_strdup (message, child_thread_id); + _notmuch_message_add_term (message, "thread", *thread_id); + } else if (strcmp (*thread_id, child_thread_id)) { + _notmuch_message_remove_term (child_message, "reference", + message_id); + _notmuch_message_sync (child_message); + ret = _merge_threads (notmuch, *thread_id, child_thread_id); + if (ret) + goto DONE; + } + + notmuch_message_destroy (child_message); + child_message = NULL; + } + + DONE: + if (child_message) + notmuch_message_destroy (child_message); + + return ret; +} + +/* Fetch and clear the stored thread_id for message, or NULL if none. */ +static char * +_consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch, + notmuch_message_t *message) +{ + const char *message_id; + std::string stored_id; + char *metadata_key; + + message_id = notmuch_message_get_message_id (message); + metadata_key = _get_metadata_thread_id_key (ctx, message_id); + + /* Check if we have already seen related messages to this one. + * If we have then use the thread_id that we stored at that time. + */ + stored_id = notmuch->xapian_db->get_metadata (metadata_key); + if (stored_id.empty ()) { + return NULL; + } else { + Xapian::WritableDatabase *db; + + db = static_cast (notmuch->xapian_db); + + /* Clear the metadata for this message ID. We don't need it + * anymore. */ + db->set_metadata (metadata_key, ""); + + return talloc_strdup (ctx, stored_id.c_str ()); + } +} + +/* Given a blank or ghost 'message' and its corresponding + * 'message_file' link it to existing threads in the database. + * + * First, if is_ghost, this retrieves the thread ID already stored in + * the message (which will be the case if a message was previously + * added that referenced this one). If the message is blank + * (!is_ghost), it doesn't have a thread ID yet (we'll generate one + * later in this function). If the database does not support ghost + * messages, this checks for a thread ID stored in database metadata + * for this message ID. + * + * Second, we look at 'message_file' and its link-relevant headers + * (References and In-Reply-To) for message IDs. + * + * Finally, we look in the database for existing message that + * reference 'message'. + * + * In all cases, we assign to the current message the first thread ID + * found. We will also merge any existing, distinct threads where this + * message belongs to both, (which is not uncommon when messages are + * processed out of order). + * + * Finally, if no thread ID has been found through referenced messages, we + * call _notmuch_message_generate_thread_id to generate a new thread + * ID. This should only happen for new, top-level messages, (no + * References or In-Reply-To header in this message, and no previously + * added message refers to this message). + */ +static notmuch_status_t +_notmuch_database_link_message (notmuch_database_t *notmuch, + notmuch_message_t *message, + notmuch_message_file_t *message_file, + notmuch_bool_t is_ghost) +{ + void *local = talloc_new (NULL); + notmuch_status_t status; + const char *thread_id = NULL; + + /* Check if the message already had a thread ID */ + if (notmuch->features & NOTMUCH_FEATURE_GHOSTS) { + if (is_ghost) + thread_id = notmuch_message_get_thread_id (message); + } else { + thread_id = _consume_metadata_thread_id (local, notmuch, message); + if (thread_id) + _notmuch_message_add_term (message, "thread", thread_id); + } + + status = _notmuch_database_link_message_to_parents (notmuch, message, + message_file, + &thread_id); + if (status) + goto DONE; + + if (! (notmuch->features & NOTMUCH_FEATURE_GHOSTS)) { + /* In general, it shouldn't be necessary to link children, + * since the earlier indexing of those children will have + * stored a thread ID for the missing parent. However, prior + * to ghost messages, these stored thread IDs were NOT + * rewritten during thread merging (and there was no + * performant way to do so), so if indexed children were + * pulled into a different thread ID by a merge, it was + * necessary to pull them *back* into the stored thread ID of + * the parent. With ghost messages, we just rewrite the + * stored thread IDs during merging, so this workaround isn't + * necessary. */ + status = _notmuch_database_link_message_to_children (notmuch, message, + &thread_id); + if (status) + goto DONE; + } + + /* If not part of any existing thread, generate a new thread ID. */ + if (thread_id == NULL) { + thread_id = _notmuch_database_generate_thread_id (notmuch); + + _notmuch_message_add_term (message, "thread", thread_id); + } + + DONE: + talloc_free (local); + + return status; +} + +notmuch_status_t +notmuch_database_add_message (notmuch_database_t *notmuch, + const char *filename, + notmuch_message_t **message_ret) +{ + notmuch_message_file_t *message_file; + notmuch_message_t *message = NULL; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, ret2; + notmuch_private_status_t private_status; + notmuch_bool_t is_ghost = false; + + const char *date, *header; + const char *from, *to, *subject; + char *message_id = NULL; + + if (message_ret) + *message_ret = NULL; + + ret = _notmuch_database_ensure_writable (notmuch); + if (ret) + return ret; + + message_file = _notmuch_message_file_open (notmuch, filename); + if (message_file == NULL) + return NOTMUCH_STATUS_FILE_ERROR; + + /* Adding a message may change many documents. Do this all + * atomically. */ + ret = notmuch_database_begin_atomic (notmuch); + if (ret) + goto DONE; + + /* Parse message up front to get better error status. */ + ret = _notmuch_message_file_parse (message_file); + if (ret) + goto DONE; + + /* Before we do any real work, (especially before doing a + * potential SHA-1 computation on the entire file's contents), + * let's make sure that what we're looking at looks like an + * actual email message. + */ + from = _notmuch_message_file_get_header (message_file, "from"); + subject = _notmuch_message_file_get_header (message_file, "subject"); + to = _notmuch_message_file_get_header (message_file, "to"); + + if ((from == NULL || *from == '\0') && + (subject == NULL || *subject == '\0') && + (to == NULL || *to == '\0')) { + ret = NOTMUCH_STATUS_FILE_NOT_EMAIL; + goto DONE; + } + + /* Now that we're sure it's mail, the first order of business + * is to find a message ID (or else create one ourselves). + */ + header = _notmuch_message_file_get_header (message_file, "message-id"); + if (header && *header != '\0') { + message_id = _parse_message_id (message_file, header, NULL); + + /* So the header value isn't RFC-compliant, but it's + * better than no message-id at all. + */ + if (message_id == NULL) + message_id = talloc_strdup (message_file, header); + } + + if (message_id == NULL ) { + /* No message-id at all, let's generate one by taking a + * hash over the file's contents. + */ + char *sha1 = _notmuch_sha1_of_file (filename); + + /* If that failed too, something is really wrong. Give up. */ + if (sha1 == NULL) { + ret = NOTMUCH_STATUS_FILE_ERROR; + goto DONE; + } + + message_id = talloc_asprintf (message_file, "notmuch-sha1-%s", sha1); + free (sha1); + } + + try { + /* Now that we have a message ID, we get a message object, + * (which may or may not reference an existing document in the + * database). */ + + message = _notmuch_message_create_for_message_id (notmuch, + message_id, + &private_status); + + talloc_free (message_id); + + if (message == NULL) { + ret = COERCE_STATUS (private_status, + "Unexpected status value from _notmuch_message_create_for_message_id"); + goto DONE; + } + + _notmuch_message_add_filename (message, filename); + + /* Is this a newly created message object or a ghost + * message? We have to be slightly careful: if this is a + * blank message, it's not safe to call + * notmuch_message_get_flag yet. */ + if (private_status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND || + (is_ghost = notmuch_message_get_flag ( + message, NOTMUCH_MESSAGE_FLAG_GHOST))) { + _notmuch_message_add_term (message, "type", "mail"); + if (is_ghost) + /* Convert ghost message to a regular message */ + _notmuch_message_remove_term (message, "type", "ghost"); + + ret = _notmuch_database_link_message (notmuch, message, + message_file, is_ghost); + if (ret) + goto DONE; + + date = _notmuch_message_file_get_header (message_file, "date"); + _notmuch_message_set_header_values (message, date, from, subject); + + ret = _notmuch_message_index_file (message, message_file); + if (ret) + goto DONE; + } else { + ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID; + } + + _notmuch_message_sync (message); + } catch (const Xapian::Error &error) { + _notmuch_database_log (notmuch, "A Xapian exception occurred adding message: %s.\n", + error.get_msg().c_str()); + notmuch->exception_reported = TRUE; + ret = NOTMUCH_STATUS_XAPIAN_EXCEPTION; + goto DONE; + } + + DONE: + if (message) { + if ((ret == NOTMUCH_STATUS_SUCCESS || + ret == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID) && message_ret) + *message_ret = message; + else + notmuch_message_destroy (message); + } + + if (message_file) + _notmuch_message_file_close (message_file); + + ret2 = notmuch_database_end_atomic (notmuch); + if ((ret == NOTMUCH_STATUS_SUCCESS || + ret == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID) && + ret2 != NOTMUCH_STATUS_SUCCESS) + ret = ret2; + + return ret; +} diff --git a/lib/database-private.h b/lib/database-private.h index 727b1d61..504618b9 100644 --- a/lib/database-private.h +++ b/lib/database-private.h @@ -246,4 +246,14 @@ _notmuch_database_get_terms_with_prefix (void *ctx, Xapian::TermIterator &i, Xapian::TermIterator &end, const char *prefix); +void +_notmuch_database_find_doc_ids (notmuch_database_t *notmuch, + const char *prefix_name, + const char *value, + Xapian::PostingIterator *begin, + Xapian::PostingIterator *end); + + +#pragma GCC visibility pop + #endif diff --git a/lib/database.cc b/lib/database.cc index 5b13f541..8f0e22a8 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -463,12 +463,12 @@ find_doc_ids_for_term (notmuch_database_t *notmuch, *end = notmuch->xapian_db->postlist_end (term); } -static void -find_doc_ids (notmuch_database_t *notmuch, - const char *prefix_name, - const char *value, - Xapian::PostingIterator *begin, - Xapian::PostingIterator *end) +void +_notmuch_database_find_doc_ids (notmuch_database_t *notmuch, + const char *prefix_name, + const char *value, + Xapian::PostingIterator *begin, + Xapian::PostingIterator *end) { char *term; @@ -488,7 +488,7 @@ _notmuch_database_find_unique_doc_id (notmuch_database_t *notmuch, { Xapian::PostingIterator i, end; - find_doc_ids (notmuch, prefix_name, value, &i, &end); + _notmuch_database_find_doc_ids (notmuch, prefix_name, value, &i, &end); if (i == end) { *doc_id = 0; @@ -568,147 +568,6 @@ notmuch_database_find_message (notmuch_database_t *notmuch, } } -/* Advance 'str' past any whitespace or RFC 822 comments. A comment is - * a (potentially nested) parenthesized sequence with '\' used to - * escape any character (including parentheses). - * - * If the sequence to be skipped continues to the end of the string, - * then 'str' will be left pointing at the final terminating '\0' - * character. - */ -static void -skip_space_and_comments (const char **str) -{ - const char *s; - - s = *str; - while (*s && (isspace (*s) || *s == '(')) { - while (*s && isspace (*s)) - s++; - if (*s == '(') { - int nesting = 1; - s++; - while (*s && nesting) { - if (*s == '(') { - nesting++; - } else if (*s == ')') { - nesting--; - } else if (*s == '\\') { - if (*(s+1)) - s++; - } - s++; - } - } - } - - *str = s; -} - -/* Parse an RFC 822 message-id, discarding whitespace, any RFC 822 - * comments, and the '<' and '>' delimiters. - * - * If not NULL, then *next will be made to point to the first character - * not parsed, (possibly pointing to the final '\0' terminator. - * - * Returns a newly talloc'ed string belonging to 'ctx'. - * - * Returns NULL if there is any error parsing the message-id. */ -static char * -_parse_message_id (void *ctx, const char *message_id, const char **next) -{ - const char *s, *end; - char *result; - - if (message_id == NULL || *message_id == '\0') - return NULL; - - s = message_id; - - skip_space_and_comments (&s); - - /* Skip any unstructured text as well. */ - while (*s && *s != '<') - s++; - - if (*s == '<') { - s++; - } else { - if (next) - *next = s; - return NULL; - } - - skip_space_and_comments (&s); - - end = s; - while (*end && *end != '>') - end++; - if (next) { - if (*end) - *next = end + 1; - else - *next = end; - } - - if (end > s && *end == '>') - end--; - if (end <= s) - return NULL; - - result = talloc_strndup (ctx, s, end - s + 1); - - /* Finally, collapse any whitespace that is within the message-id - * itself. */ - { - char *r; - int len; - - for (r = result, len = strlen (r); *r; r++, len--) - if (*r == ' ' || *r == '\t') - memmove (r, r+1, len); - } - - return result; -} - -/* Parse a References header value, putting a (talloc'ed under 'ctx') - * copy of each referenced message-id into 'hash'. - * - * We explicitly avoid including any reference identical to - * 'message_id' in the result (to avoid mass confusion when a single - * message references itself cyclically---and yes, mail messages are - * not infrequent in the wild that do this---don't ask me why). - * - * Return the last reference parsed, if it is not equal to message_id. - */ -static char * -parse_references (void *ctx, - const char *message_id, - GHashTable *hash, - const char *refs) -{ - char *ref, *last_ref = NULL; - - if (refs == NULL || *refs == '\0') - return NULL; - - while (*refs) { - ref = _parse_message_id (ctx, refs, &refs); - - if (ref && strcmp (ref, message_id)) { - g_hash_table_add (hash, ref); - last_ref = ref; - } - } - - /* The return value of this function is used to add a parent - * reference to the database. We should avoid making a message - * its own parent, thus the above check. - */ - return talloc_strdup(ctx, last_ref); -} - notmuch_status_t notmuch_database_create (const char *path, notmuch_database_t **database) { @@ -2044,583 +1903,6 @@ _notmuch_database_generate_doc_id (notmuch_database_t *notmuch) return notmuch->last_doc_id; } -static const char * -_notmuch_database_generate_thread_id (notmuch_database_t *notmuch) -{ - /* 16 bytes (+ terminator) for hexadecimal representation of - * a 64-bit integer. */ - static char thread_id[17]; - Xapian::WritableDatabase *db; - - db = static_cast (notmuch->xapian_db); - - notmuch->last_thread_id++; - - sprintf (thread_id, "%016" PRIx64, notmuch->last_thread_id); - - db->set_metadata ("last_thread_id", thread_id); - - return thread_id; -} - -static char * -_get_metadata_thread_id_key (void *ctx, const char *message_id) -{ - if (strlen (message_id) > NOTMUCH_MESSAGE_ID_MAX) - message_id = _notmuch_message_id_compressed (ctx, message_id); - - return talloc_asprintf (ctx, NOTMUCH_METADATA_THREAD_ID_PREFIX "%s", - message_id); -} - -static notmuch_status_t -_resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch, - void *ctx, - const char *message_id, - const char **thread_id_ret); - -/* Find the thread ID to which the message with 'message_id' belongs. - * - * Note: 'thread_id_ret' must not be NULL! - * On success '*thread_id_ret' is set to a newly talloced string belonging to - * 'ctx'. - * - * Note: If there is no message in the database with the given - * 'message_id' then a new thread_id will be allocated for this - * message ID and stored in the database metadata so that the - * thread ID can be looked up if the message is added to the database - * later. - */ -static notmuch_status_t -_resolve_message_id_to_thread_id (notmuch_database_t *notmuch, - void *ctx, - const char *message_id, - const char **thread_id_ret) -{ - notmuch_private_status_t status; - notmuch_message_t *message; - - if (! (notmuch->features & NOTMUCH_FEATURE_GHOSTS)) - return _resolve_message_id_to_thread_id_old (notmuch, ctx, message_id, - thread_id_ret); - - /* Look for this message (regular or ghost) */ - message = _notmuch_message_create_for_message_id ( - notmuch, message_id, &status); - if (status == NOTMUCH_PRIVATE_STATUS_SUCCESS) { - /* Message exists */ - *thread_id_ret = talloc_steal ( - ctx, notmuch_message_get_thread_id (message)); - } else if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) { - /* Message did not exist. Give it a fresh thread ID and - * populate this message as a ghost message. */ - *thread_id_ret = talloc_strdup ( - ctx, _notmuch_database_generate_thread_id (notmuch)); - if (! *thread_id_ret) { - status = NOTMUCH_PRIVATE_STATUS_OUT_OF_MEMORY; - } else { - status = _notmuch_message_initialize_ghost (message, *thread_id_ret); - if (status == 0) - /* Commit the new ghost message */ - _notmuch_message_sync (message); - } - } else { - /* Create failed. Fall through. */ - } - - notmuch_message_destroy (message); - - return COERCE_STATUS (status, "Error creating ghost message"); -} - -/* Pre-ghost messages _resolve_message_id_to_thread_id */ -static notmuch_status_t -_resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch, - void *ctx, - const char *message_id, - const char **thread_id_ret) -{ - notmuch_status_t status; - notmuch_message_t *message; - string thread_id_string; - char *metadata_key; - Xapian::WritableDatabase *db; - - status = notmuch_database_find_message (notmuch, message_id, &message); - - if (status) - return status; - - if (message) { - *thread_id_ret = talloc_steal (ctx, - notmuch_message_get_thread_id (message)); - - notmuch_message_destroy (message); - - return NOTMUCH_STATUS_SUCCESS; - } - - /* Message has not been seen yet. - * - * We may have seen a reference to it already, in which case, we - * can return the thread ID stored in the metadata. Otherwise, we - * generate a new thread ID and store it there. - */ - db = static_cast (notmuch->xapian_db); - metadata_key = _get_metadata_thread_id_key (ctx, message_id); - thread_id_string = notmuch->xapian_db->get_metadata (metadata_key); - - if (thread_id_string.empty()) { - *thread_id_ret = talloc_strdup (ctx, - _notmuch_database_generate_thread_id (notmuch)); - db->set_metadata (metadata_key, *thread_id_ret); - } else { - *thread_id_ret = talloc_strdup (ctx, thread_id_string.c_str()); - } - - talloc_free (metadata_key); - - return NOTMUCH_STATUS_SUCCESS; -} - -static notmuch_status_t -_merge_threads (notmuch_database_t *notmuch, - const char *winner_thread_id, - const char *loser_thread_id) -{ - Xapian::PostingIterator loser, loser_end; - notmuch_message_t *message = NULL; - notmuch_private_status_t private_status; - notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; - - find_doc_ids (notmuch, "thread", loser_thread_id, &loser, &loser_end); - - for ( ; loser != loser_end; loser++) { - message = _notmuch_message_create (notmuch, notmuch, - *loser, &private_status); - if (message == NULL) { - ret = COERCE_STATUS (private_status, - "Cannot find document for doc_id from query"); - goto DONE; - } - - _notmuch_message_remove_term (message, "thread", loser_thread_id); - _notmuch_message_add_term (message, "thread", winner_thread_id); - _notmuch_message_sync (message); - - notmuch_message_destroy (message); - message = NULL; - } - - DONE: - if (message) - notmuch_message_destroy (message); - - return ret; -} - -static void -_my_talloc_free_for_g_hash (void *ptr) -{ - talloc_free (ptr); -} - -static notmuch_status_t -_notmuch_database_link_message_to_parents (notmuch_database_t *notmuch, - notmuch_message_t *message, - notmuch_message_file_t *message_file, - const char **thread_id) -{ - GHashTable *parents = NULL; - const char *refs, *in_reply_to, *in_reply_to_message_id; - const char *last_ref_message_id, *this_message_id; - GList *l, *keys = NULL; - notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; - - parents = g_hash_table_new_full (g_str_hash, g_str_equal, - _my_talloc_free_for_g_hash, NULL); - this_message_id = notmuch_message_get_message_id (message); - - refs = _notmuch_message_file_get_header (message_file, "references"); - last_ref_message_id = parse_references (message, - this_message_id, - parents, refs); - - in_reply_to = _notmuch_message_file_get_header (message_file, "in-reply-to"); - in_reply_to_message_id = parse_references (message, - this_message_id, - parents, in_reply_to); - - /* For the parent of this message, use the last message ID of the - * References header, if available. If not, fall back to the - * first message ID in the In-Reply-To header. */ - if (last_ref_message_id) { - _notmuch_message_add_term (message, "replyto", - last_ref_message_id); - } else if (in_reply_to_message_id) { - _notmuch_message_add_term (message, "replyto", - in_reply_to_message_id); - } - - keys = g_hash_table_get_keys (parents); - for (l = keys; l; l = l->next) { - char *parent_message_id; - const char *parent_thread_id = NULL; - - parent_message_id = (char *) l->data; - - _notmuch_message_add_term (message, "reference", - parent_message_id); - - ret = _resolve_message_id_to_thread_id (notmuch, - message, - parent_message_id, - &parent_thread_id); - if (ret) - goto DONE; - - if (*thread_id == NULL) { - *thread_id = talloc_strdup (message, parent_thread_id); - _notmuch_message_add_term (message, "thread", *thread_id); - } else if (strcmp (*thread_id, parent_thread_id)) { - ret = _merge_threads (notmuch, *thread_id, parent_thread_id); - if (ret) - goto DONE; - } - } - - DONE: - if (keys) - g_list_free (keys); - if (parents) - g_hash_table_unref (parents); - - return ret; -} - -static notmuch_status_t -_notmuch_database_link_message_to_children (notmuch_database_t *notmuch, - notmuch_message_t *message, - const char **thread_id) -{ - const char *message_id = notmuch_message_get_message_id (message); - Xapian::PostingIterator child, children_end; - notmuch_message_t *child_message = NULL; - const char *child_thread_id; - notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; - notmuch_private_status_t private_status; - - find_doc_ids (notmuch, "reference", message_id, &child, &children_end); - - for ( ; child != children_end; child++) { - - child_message = _notmuch_message_create (message, notmuch, - *child, &private_status); - if (child_message == NULL) { - ret = COERCE_STATUS (private_status, - "Cannot find document for doc_id from query"); - goto DONE; - } - - child_thread_id = notmuch_message_get_thread_id (child_message); - if (*thread_id == NULL) { - *thread_id = talloc_strdup (message, child_thread_id); - _notmuch_message_add_term (message, "thread", *thread_id); - } else if (strcmp (*thread_id, child_thread_id)) { - _notmuch_message_remove_term (child_message, "reference", - message_id); - _notmuch_message_sync (child_message); - ret = _merge_threads (notmuch, *thread_id, child_thread_id); - if (ret) - goto DONE; - } - - notmuch_message_destroy (child_message); - child_message = NULL; - } - - DONE: - if (child_message) - notmuch_message_destroy (child_message); - - return ret; -} - -/* Fetch and clear the stored thread_id for message, or NULL if none. */ -static char * -_consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch, - notmuch_message_t *message) -{ - const char *message_id; - string stored_id; - char *metadata_key; - - message_id = notmuch_message_get_message_id (message); - metadata_key = _get_metadata_thread_id_key (ctx, message_id); - - /* Check if we have already seen related messages to this one. - * If we have then use the thread_id that we stored at that time. - */ - stored_id = notmuch->xapian_db->get_metadata (metadata_key); - if (stored_id.empty ()) { - return NULL; - } else { - Xapian::WritableDatabase *db; - - db = static_cast (notmuch->xapian_db); - - /* Clear the metadata for this message ID. We don't need it - * anymore. */ - db->set_metadata (metadata_key, ""); - - return talloc_strdup (ctx, stored_id.c_str ()); - } -} - -/* Given a blank or ghost 'message' and its corresponding - * 'message_file' link it to existing threads in the database. - * - * First, if is_ghost, this retrieves the thread ID already stored in - * the message (which will be the case if a message was previously - * added that referenced this one). If the message is blank - * (!is_ghost), it doesn't have a thread ID yet (we'll generate one - * later in this function). If the database does not support ghost - * messages, this checks for a thread ID stored in database metadata - * for this message ID. - * - * Second, we look at 'message_file' and its link-relevant headers - * (References and In-Reply-To) for message IDs. - * - * Finally, we look in the database for existing message that - * reference 'message'. - * - * In all cases, we assign to the current message the first thread ID - * found. We will also merge any existing, distinct threads where this - * message belongs to both, (which is not uncommon when messages are - * processed out of order). - * - * Finally, if no thread ID has been found through referenced messages, we - * call _notmuch_message_generate_thread_id to generate a new thread - * ID. This should only happen for new, top-level messages, (no - * References or In-Reply-To header in this message, and no previously - * added message refers to this message). - */ -static notmuch_status_t -_notmuch_database_link_message (notmuch_database_t *notmuch, - notmuch_message_t *message, - notmuch_message_file_t *message_file, - notmuch_bool_t is_ghost) -{ - void *local = talloc_new (NULL); - notmuch_status_t status; - const char *thread_id = NULL; - - /* Check if the message already had a thread ID */ - if (notmuch->features & NOTMUCH_FEATURE_GHOSTS) { - if (is_ghost) - thread_id = notmuch_message_get_thread_id (message); - } else { - thread_id = _consume_metadata_thread_id (local, notmuch, message); - if (thread_id) - _notmuch_message_add_term (message, "thread", thread_id); - } - - status = _notmuch_database_link_message_to_parents (notmuch, message, - message_file, - &thread_id); - if (status) - goto DONE; - - if (! (notmuch->features & NOTMUCH_FEATURE_GHOSTS)) { - /* In general, it shouldn't be necessary to link children, - * since the earlier indexing of those children will have - * stored a thread ID for the missing parent. However, prior - * to ghost messages, these stored thread IDs were NOT - * rewritten during thread merging (and there was no - * performant way to do so), so if indexed children were - * pulled into a different thread ID by a merge, it was - * necessary to pull them *back* into the stored thread ID of - * the parent. With ghost messages, we just rewrite the - * stored thread IDs during merging, so this workaround isn't - * necessary. */ - status = _notmuch_database_link_message_to_children (notmuch, message, - &thread_id); - if (status) - goto DONE; - } - - /* If not part of any existing thread, generate a new thread ID. */ - if (thread_id == NULL) { - thread_id = _notmuch_database_generate_thread_id (notmuch); - - _notmuch_message_add_term (message, "thread", thread_id); - } - - DONE: - talloc_free (local); - - return status; -} - -notmuch_status_t -notmuch_database_add_message (notmuch_database_t *notmuch, - const char *filename, - notmuch_message_t **message_ret) -{ - notmuch_message_file_t *message_file; - notmuch_message_t *message = NULL; - notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, ret2; - notmuch_private_status_t private_status; - notmuch_bool_t is_ghost = false; - - const char *date, *header; - const char *from, *to, *subject; - char *message_id = NULL; - - if (message_ret) - *message_ret = NULL; - - ret = _notmuch_database_ensure_writable (notmuch); - if (ret) - return ret; - - message_file = _notmuch_message_file_open (notmuch, filename); - if (message_file == NULL) - return NOTMUCH_STATUS_FILE_ERROR; - - /* Adding a message may change many documents. Do this all - * atomically. */ - ret = notmuch_database_begin_atomic (notmuch); - if (ret) - goto DONE; - - /* Parse message up front to get better error status. */ - ret = _notmuch_message_file_parse (message_file); - if (ret) - goto DONE; - - /* Before we do any real work, (especially before doing a - * potential SHA-1 computation on the entire file's contents), - * let's make sure that what we're looking at looks like an - * actual email message. - */ - from = _notmuch_message_file_get_header (message_file, "from"); - subject = _notmuch_message_file_get_header (message_file, "subject"); - to = _notmuch_message_file_get_header (message_file, "to"); - - if ((from == NULL || *from == '\0') && - (subject == NULL || *subject == '\0') && - (to == NULL || *to == '\0')) { - ret = NOTMUCH_STATUS_FILE_NOT_EMAIL; - goto DONE; - } - - /* Now that we're sure it's mail, the first order of business - * is to find a message ID (or else create one ourselves). - */ - header = _notmuch_message_file_get_header (message_file, "message-id"); - if (header && *header != '\0') { - message_id = _parse_message_id (message_file, header, NULL); - - /* So the header value isn't RFC-compliant, but it's - * better than no message-id at all. - */ - if (message_id == NULL) - message_id = talloc_strdup (message_file, header); - } - - if (message_id == NULL ) { - /* No message-id at all, let's generate one by taking a - * hash over the file's contents. - */ - char *sha1 = _notmuch_sha1_of_file (filename); - - /* If that failed too, something is really wrong. Give up. */ - if (sha1 == NULL) { - ret = NOTMUCH_STATUS_FILE_ERROR; - goto DONE; - } - - message_id = talloc_asprintf (message_file, "notmuch-sha1-%s", sha1); - free (sha1); - } - - try { - /* Now that we have a message ID, we get a message object, - * (which may or may not reference an existing document in the - * database). */ - - message = _notmuch_message_create_for_message_id (notmuch, - message_id, - &private_status); - - talloc_free (message_id); - - if (message == NULL) { - ret = COERCE_STATUS (private_status, - "Unexpected status value from _notmuch_message_create_for_message_id"); - goto DONE; - } - - _notmuch_message_add_filename (message, filename); - - /* Is this a newly created message object or a ghost - * message? We have to be slightly careful: if this is a - * blank message, it's not safe to call - * notmuch_message_get_flag yet. */ - if (private_status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND || - (is_ghost = notmuch_message_get_flag ( - message, NOTMUCH_MESSAGE_FLAG_GHOST))) { - _notmuch_message_add_term (message, "type", "mail"); - if (is_ghost) - /* Convert ghost message to a regular message */ - _notmuch_message_remove_term (message, "type", "ghost"); - - ret = _notmuch_database_link_message (notmuch, message, - message_file, is_ghost); - if (ret) - goto DONE; - - date = _notmuch_message_file_get_header (message_file, "date"); - _notmuch_message_set_header_values (message, date, from, subject); - - ret = _notmuch_message_index_file (message, message_file); - if (ret) - goto DONE; - } else { - ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID; - } - - _notmuch_message_sync (message); - } catch (const Xapian::Error &error) { - _notmuch_database_log (notmuch, "A Xapian exception occurred adding message: %s.\n", - error.get_msg().c_str()); - notmuch->exception_reported = TRUE; - ret = NOTMUCH_STATUS_XAPIAN_EXCEPTION; - goto DONE; - } - - DONE: - if (message) { - if ((ret == NOTMUCH_STATUS_SUCCESS || - ret == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID) && message_ret) - *message_ret = message; - else - notmuch_message_destroy (message); - } - - if (message_file) - _notmuch_message_file_close (message_file); - - ret2 = notmuch_database_end_atomic (notmuch); - if ((ret == NOTMUCH_STATUS_SUCCESS || - ret == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID) && - ret2 != NOTMUCH_STATUS_SUCCESS) - ret = ret2; - - return ret; -} - notmuch_status_t notmuch_database_remove_message (notmuch_database_t *notmuch, const char *filename) -- 2.11.0