From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by arlo.cworth.org (Postfix) with ESMTP id 108F66DE0F59 for ; Mon, 15 Apr 2019 18:46:28 -0700 (PDT) X-Virus-Scanned: Debian amavisd-new at cworth.org X-Spam-Flag: NO X-Spam-Score: -0.025 X-Spam-Level: X-Spam-Status: No, score=-0.025 tagged_above=-999 required=5 tests=[AWL=-0.024, SPF_PASS=-0.001] autolearn=disabled Received: from arlo.cworth.org ([127.0.0.1]) by localhost (arlo.cworth.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id E2tayZgtxTD6 for ; Mon, 15 Apr 2019 18:46:27 -0700 (PDT) Received: from fethera.tethera.net (fethera.tethera.net [198.245.60.197]) by arlo.cworth.org (Postfix) with ESMTPS id E602C6DE0F40 for ; Mon, 15 Apr 2019 18:46:26 -0700 (PDT) Received: from remotemail by fethera.tethera.net with local (Exim 4.89) (envelope-from ) id 1hGDB3-0007oz-3l; Mon, 15 Apr 2019 21:46:25 -0400 Received: (nullmailer pid 31749 invoked by uid 1000); Tue, 16 Apr 2019 01:46:21 -0000 From: David Bremner To: notmuch@notmuchmail.org Subject: [PATCH 2/2] n_m_remove_indexed_terms: reduce number of Xapian API calls. Date: Mon, 15 Apr 2019 22:46:16 -0300 Message-Id: <20190416014616.31623-3-david@tethera.net> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190416014616.31623-1-david@tethera.net> References: <20190416014616.31623-1-david@tethera.net> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-BeenThere: notmuch@notmuchmail.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: "Use and development of the notmuch mail system." List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 16 Apr 2019 01:46:28 -0000 Previously this functioned scanned every term attached to a given Xapian document. It turns out we know how to read only the terms we need to preserve (and we might have already done so). This commit replaces many calls to Xapian::Document::remove_term with one call to ::clear_terms, and a (typically much smaller) number of calls to ::add_term. Roughly speaking this is based on the assumption that most messages have more text than they have tags. According to the performance test suite, this yields a roughly 40% speedup on "notmuch reindex '*'" --- lib/message.cc | 66 +++++++++++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 28 deletions(-) diff --git a/lib/message.cc b/lib/message.cc index 6f2f6345..3e33d8b8 100644 --- a/lib/message.cc +++ b/lib/message.cc @@ -716,6 +716,8 @@ _notmuch_message_remove_terms (notmuch_message_t *message, const char *prefix) /* Remove all terms generated by indexing, i.e. not tags or * properties, along with any automatic tags*/ +/* According to Xapian API docs, none of these calls throw + * exceptions */ notmuch_private_status_t _notmuch_message_remove_indexed_terms (notmuch_message_t *message) { @@ -727,45 +729,53 @@ _notmuch_message_remove_indexed_terms (notmuch_message_t *message) tag_prefix = _find_prefix ("tag"), type_prefix = _find_prefix ("type"); - for (i = message->doc.termlist_begin (); - i != message->doc.termlist_end (); i++) { + /* Make sure we have the data to restore to Xapian*/ + _notmuch_message_ensure_metadata (message,NULL); - const std::string term = *i; + /* Empirically, it turns out to be faster to remove all the terms, + * and add back the ones we want. */ + message->doc.clear_terms (); + message->modified = true; - if (term.compare (0, type_prefix.size (), type_prefix) == 0) - continue; + /* still a mail message */ + message->doc.add_term (type_prefix + "mail"); - if (term.compare (0, id_prefix.size (), id_prefix) == 0) - continue; + /* Put back message-id */ + message->doc.add_term (id_prefix + message->message_id); - if (term.compare (0, property_prefix.size (), property_prefix) == 0) - continue; + /* Put back non-automatic tags */ + for (notmuch_tags_t *tags = notmuch_message_get_tags (message); + notmuch_tags_valid (tags); + notmuch_tags_move_to_next (tags)) { - if (term.compare (0, tag_prefix.size (), tag_prefix) == 0 && - term.compare (1, strlen("encrypted"), "encrypted") != 0 && - term.compare (1, strlen("signed"), "signed") != 0 && - term.compare (1, strlen("attachment"), "attachment") != 0) - continue; + const char *tag = notmuch_tags_get (tags); - try { - message->doc.remove_term ((*i)); - message->modified = true; - } catch (const Xapian::InvalidArgumentError) { - /* Ignore failure to remove non-existent term. */ - } catch (const Xapian::Error &error) { - notmuch_database_t *notmuch = message->notmuch; - - if (!notmuch->exception_reported) { - _notmuch_database_log(notmuch_message_get_database (message), "A Xapian exception occurred creating message: %s\n", - error.get_msg().c_str()); - notmuch->exception_reported = true; - } - return NOTMUCH_PRIVATE_STATUS_XAPIAN_EXCEPTION; + if (STRNCMP_LITERAL (tag, "encrypted") != 0 && + STRNCMP_LITERAL (tag, "signed") != 0 && + STRNCMP_LITERAL (tag, "attachment") != 0) { + std::string term = tag_prefix + tag; + message->doc.add_term(term); } } + + /* Put back properties */ + notmuch_message_properties_t *list; + + for (list = notmuch_message_get_properties (message, "", false); + notmuch_message_properties_valid (list); notmuch_message_properties_move_to_next (list)) { + std::string term = property_prefix + + notmuch_message_properties_key(list) + "=" + + notmuch_message_properties_value(list); + + message->doc.add_term(term); + } + + notmuch_message_properties_destroy (list); + return NOTMUCH_PRIVATE_STATUS_SUCCESS; } + /* Return true if p points at "new" or "cur". */ static bool is_maildir (const char *p) { -- 2.20.1