From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by arlo.cworth.org (Postfix) with ESMTP id BF9286DE091F for ; Fri, 8 Jul 2016 03:13:16 -0700 (PDT) X-Virus-Scanned: Debian amavisd-new at cworth.org X-Spam-Flag: NO X-Spam-Score: 0 X-Spam-Level: X-Spam-Status: No, score=0 tagged_above=-999 required=5 tests=[none] autolearn=disabled Received: from arlo.cworth.org ([127.0.0.1]) by localhost (arlo.cworth.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id RsaQ-QlS0tUS for ; Fri, 8 Jul 2016 03:13:08 -0700 (PDT) Received: from che.mayfirst.org (che.mayfirst.org [162.247.75.118]) by arlo.cworth.org (Postfix) with ESMTP id 121A16DE01BA for ; Fri, 8 Jul 2016 03:13:07 -0700 (PDT) Received: from fifthhorseman.net (unknown [88.128.80.54]) by che.mayfirst.org (Postfix) with ESMTPSA id 8414CF99A for ; Fri, 8 Jul 2016 06:13:06 -0400 (EDT) Received: by fifthhorseman.net (Postfix, from userid 1000) id C9C402174C; Fri, 8 Jul 2016 11:27:34 +0200 (CEST) From: Daniel Kahn Gillmor To: Notmuch Mail Subject: [PATCH v4 15/16] added notmuch_message_reindex Date: Fri, 8 Jul 2016 11:27:26 +0200 Message-Id: <1467970047-8013-16-git-send-email-dkg@fifthhorseman.net> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1467970047-8013-1-git-send-email-dkg@fifthhorseman.net> References: <1467970047-8013-1-git-send-email-dkg@fifthhorseman.net> X-BeenThere: notmuch@notmuchmail.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: "Use and development of the notmuch mail system." List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 08 Jul 2016 10:13:16 -0000 This new function asks the database to reindex a given message, using the supplied indexopts. This can be used, for example, to index the cleartext of an encrypted message. My initial inclination for this implementation was to remove all the indexed terms for a given message's body, and then to add them back in. Unfortunately, that doesn't appear to be possible due to the way we're using xapian. I could find no way to distinguish terms which were added during indexing of the message body from other terms associated with the document. As a result, we just save the tags and properties, remove the message from the database entirely, and add it back into the database in full, re-adding tags and properties as needed. --- lib/message.cc | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- lib/notmuch.h | 14 ++++++++ 2 files changed, 121 insertions(+), 1 deletion(-) diff --git a/lib/message.cc b/lib/message.cc index 9d3e807..ab807b7 100644 --- a/lib/message.cc +++ b/lib/message.cc @@ -557,7 +557,9 @@ void _notmuch_message_remove_terms (notmuch_message_t *message, const char *prefix) { Xapian::TermIterator i; - size_t prefix_len = strlen (prefix); + size_t prefix_len = 0; + + prefix_len = strlen (prefix); while (1) { i = message->doc.termlist_begin (); @@ -1847,3 +1849,107 @@ _notmuch_message_frozen (notmuch_message_t *message) { return message->frozen; } + +notmuch_status_t +notmuch_message_reindex (notmuch_message_t *message, + notmuch_indexopts_t *indexopts) +{ + notmuch_database_t *notmuch = NULL; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, status; + notmuch_tags_t *tags = NULL; + notmuch_message_properties_t *properties = NULL; + notmuch_filenames_t *filenames, *orig_filenames = NULL; + const char *filename = NULL, *tag = NULL, *propkey = NULL; + notmuch_message_t *newmsg = NULL; + notmuch_bool_t readded = FALSE, skip; + const char *autotags[] = { + "attachment", + "encrypted", + "signed" }; + const char *autoproperties[] = { "index-decryption" }; + + if (message == NULL) + return NOTMUCH_STATUS_NULL_POINTER; + + notmuch = _notmuch_message_database (message); + + /* cache tags, properties, and filenames */ + tags = notmuch_message_get_tags (message); + properties = notmuch_message_get_properties (message, "", FALSE); + filenames = notmuch_message_get_filenames (message); + orig_filenames = notmuch_message_get_filenames (message); + + /* walk through filenames, removing them until the message is gone */ + for ( ; notmuch_filenames_valid (filenames); + notmuch_filenames_move_to_next (filenames)) { + filename = notmuch_filenames_get (filenames); + + ret = notmuch_database_remove_message (notmuch, filename); + if (ret != NOTMUCH_STATUS_SUCCESS && + ret != NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID) + return ret; + } + if (ret != NOTMUCH_STATUS_SUCCESS) + return ret; + + /* re-add the filenames with the associated indexopts */ + for (; notmuch_filenames_valid (orig_filenames); + notmuch_filenames_move_to_next (orig_filenames)) { + filename = notmuch_filenames_get (orig_filenames); + + status = notmuch_database_add_message_with_indexopts(notmuch, + filename, + indexopts, + readded ? NULL : &newmsg); + if (status == NOTMUCH_STATUS_SUCCESS || + status == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID) { + if (!readded) { + /* re-add tags */ + for (; notmuch_tags_valid (tags); + notmuch_tags_move_to_next (tags)) { + tag = notmuch_tags_get (tags); + skip = FALSE; + + for (size_t i = 0; i < ARRAY_SIZE (autotags); i++) + if (strcmp (tag, autotags[i]) == 0) + skip = TRUE; + + if (!skip) { + status = notmuch_message_add_tag (newmsg, tag); + if (status != NOTMUCH_STATUS_SUCCESS) + ret = status; + } + } + /* re-add properties */ + for (; notmuch_message_properties_valid (properties); + notmuch_message_properties_move_to_next (properties)) { + propkey = notmuch_message_properties_key (properties); + skip = FALSE; + + for (size_t i = 0; i < ARRAY_SIZE (autoproperties); i++) + if (strcmp (propkey, autoproperties[i]) == 0) + skip = TRUE; + + if (!skip) { + status = notmuch_message_add_property (newmsg, propkey, + notmuch_message_properties_value (properties)); + if (status != NOTMUCH_STATUS_SUCCESS) + ret = status; + } + } + readded = TRUE; + } + } else { + /* if we failed to add this filename, go ahead and try the + * next one as though it were first, but report the + * error... */ + ret = status; + } + } + if (newmsg) + notmuch_message_destroy (newmsg); + + /* should we also destroy the incoming message object? at the + * moment, we leave that to the caller */ + return ret; +} diff --git a/lib/notmuch.h b/lib/notmuch.h index 66b3503..9076a9b 100644 --- a/lib/notmuch.h +++ b/lib/notmuch.h @@ -1394,6 +1394,20 @@ notmuch_filenames_t * notmuch_message_get_filenames (notmuch_message_t *message); /** + * Re-index the e-mail corresponding to 'message' using the supplied index options + * + * Returns the status of the re-index operation. (see the return + * codes documented in notmuch_database_add_message) + * + * After reindexing, the user should discard the message object passed + * in here by calling notmuch_message_destroy, since it refers to the + * original message, not to the reindexed message. + */ +notmuch_status_t +notmuch_message_reindex (notmuch_message_t *message, + notmuch_indexopts_t *indexopts); + +/** * Message flags. */ typedef enum _notmuch_message_flag { -- 2.8.1