From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mp0 ([2001:41d0:8:6d80::]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)) by ms0.migadu.com with LMTPS id KG39NalVFWG7NwEAgWs5BA (envelope-from ) for ; Thu, 12 Aug 2021 19:08:57 +0200 Received: from aspmx1.migadu.com ([2001:41d0:8:6d80::]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)) by mp0 with LMTPS id ADzEMalVFWFrdgAA1q6Kng (envelope-from ) for ; Thu, 12 Aug 2021 17:08:57 +0000 Received: from mail.notmuchmail.org (nmbug.tethera.net [IPv6:2607:5300:201:3100::1657]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by aspmx1.migadu.com (Postfix) with ESMTPS id 59EBBBA44 for ; Thu, 12 Aug 2021 19:08:57 +0200 (CEST) Received: from nmbug.tethera.net (localhost [127.0.0.1]) by mail.notmuchmail.org (Postfix) with ESMTP id BECB52931E; Thu, 12 Aug 2021 13:08:22 -0400 (EDT) Received: from fethera.tethera.net (fethera.tethera.net [198.245.60.197]) by mail.notmuchmail.org (Postfix) with ESMTP id 6C15029231 for ; Thu, 12 Aug 2021 13:08:09 -0400 (EDT) Received: by fethera.tethera.net (Postfix, from userid 1001) id 63F6D5FD5C; Thu, 12 Aug 2021 13:08:09 -0400 (EDT) Received: (nullmailer pid 1348767 invoked by uid 1000); Thu, 12 Aug 2021 17:07:43 -0000 From: David Bremner To: notmuch@notmuchmail.org Cc: David Bremner Subject: [PATCH 22/31] lib/thread-fp: factor out query expansion, rewrite in Xapian Date: Thu, 12 Aug 2021 10:07:19 -0700 Message-Id: <20210812170728.1348333-23-david@tethera.net> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20210812170728.1348333-1-david@tethera.net> References: <20210812170728.1348333-1-david@tethera.net> MIME-Version: 1.0 Message-ID-Hash: 3VBIXJEECG3ILEBX336LZIQYRAYT5QS2 X-Message-ID-Hash: 3VBIXJEECG3ILEBX336LZIQYRAYT5QS2 X-MailFrom: bremner@tethera.net X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; header-match-notmuch.notmuchmail.org-0; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; suspicious-header X-Mailman-Version: 3.2.1 Precedence: list List-Id: "Use and development of the notmuch mail system." List-Help: List-Post: List-Subscribe: List-Unsubscribe: Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit X-Migadu-Flow: FLOW_IN ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=yhetil.org; s=key1; t=1628788137; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references:list-id:list-help: list-unsubscribe:list-subscribe:list-post; bh=akURlVkCaGzsJ29kGbsS1FLyImDPAGV09cDhdmEMPOY=; b=i2g1Xua9iNPMjDvEXVzkUu61Bnr2fjKf9nt159vKMeVu6HDet+KZBW0RbHnc0g6jy9Npyj d80WYa7kIUN0L6XKd+rQyd5+RKdq3hwMfork7bhI0mOJakNqN28gKP171kpeQCyRA7nPy0 OSw13LH+iY38+byLVtppRe+AejbIzUiQpK8cyVKDdV2gTOnA+Mu0DtLLeyHBaehWJq44F+ al4/I5sBQIRJX/wW3u4hBgf0vbFbef/YQgzGfu4wOjJeEQMaX7wV964Csdf3Jl+R5jDmxl or3/3qjd16HE0QxeG2Pdc0p5CsLd5BGMXFi9FsIK+DpE8GaVze+xIiqYHSJx2g== ARC-Seal: i=1; s=key1; d=yhetil.org; t=1628788137; a=rsa-sha256; cv=none; b=L9W/uJwfmGVl8LmX2rI021SswHfvMqvb89p7yh2uKD+IkYpfBw2Ohb1P3p4sYXV7yR6yNw NoE7bT/b7sE4uOB9xQ0KEZTyB8PPBf/ABdx/oSQj0f8d3FuFrTDvuA7LHerA/fgB/4o2C6 PkXeqwEr79Lk81iQ/omDd9+n2j6tBGWYC+jm+6xJybTfMUqhGURazJ/thfoz+de5Owzlh7 ApXGSSkNrasEd0F2+2sjO3hW53LHycPVvik3vbYiw3983xhye85r/GWGdvlRB5gubLrdNw oP3MpFqdbadjatNzlczlykoWhglTyr2s1zg50hxl1BOErMKkokXUnCGBJgqccQ== ARC-Authentication-Results: i=1; aspmx1.migadu.com; dkim=none; dmarc=none; spf=pass (aspmx1.migadu.com: domain of notmuch-bounces@notmuchmail.org designates 2607:5300:201:3100::1657 as permitted sender) smtp.mailfrom=notmuch-bounces@notmuchmail.org X-Migadu-Spam-Score: 0.56 Authentication-Results: aspmx1.migadu.com; dkim=none; dmarc=none; spf=pass (aspmx1.migadu.com: domain of notmuch-bounces@notmuchmail.org designates 2607:5300:201:3100::1657 as permitted sender) smtp.mailfrom=notmuch-bounces@notmuchmail.org X-Migadu-Queue-Id: 59EBBBA44 X-Spam-Score: 0.56 X-Migadu-Scanner: scn1.migadu.com X-TUID: qluYIQwcBRv7 It will be convenient not to have to construct a notmuch query object when parsing subqueries, so the commit rewrites the query expansion (currently only used for thread:{} queries) using only Xapian. As a bonus it seems about 15% faster in initial experiments. --- lib/database-private.h | 16 +++++++++++++- lib/parse-sexp.cc | 2 -- lib/query.cc | 47 ++++++++++++++++++++++++++++++++++++++++++ lib/thread-fp.cc | 26 ++++++++--------------- 4 files changed, 71 insertions(+), 20 deletions(-) diff --git a/lib/database-private.h b/lib/database-private.h index 7ee8e62d..9ee3b933 100644 --- a/lib/database-private.h +++ b/lib/database-private.h @@ -40,6 +40,10 @@ #include +#if HAVE_SFSEXP +#include +#endif + /* Bit masks for _notmuch_database::features. Features are named, * independent aspects of the database schema. * @@ -313,11 +317,21 @@ notmuch_status_t _notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr, Xapian::Query &output); +notmuch_status_t +_notmuch_query_expand (notmuch_database_t *notmuch, const char *field, Xapian::Query subquery, + Xapian::Query &output, std::string &msg); + /* regexp-fields.cc */ notmuch_status_t _notmuch_regexp_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, std::string field, std::string regexp_str, Xapian::Query &output, std::string &msg); -#endif +#if HAVE_SFSEXP +/* parse-sexp.cc */ +notmuch_status_t +_notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr, + Xapian::Query &output); +#endif +#endif #endif diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc index 48728edb..f48c94be 100644 --- a/lib/parse-sexp.cc +++ b/lib/parse-sexp.cc @@ -219,8 +219,6 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent Xapian::Query &output) { if (sx->ty == SEXP_VALUE) { - std::string term = Xapian::Unicode::tolower (sx->val); - Xapian::Stem stem = *(notmuch->stemmer); std::string term_prefix = parent ? _find_prefix (parent->name) : ""; if (sx->aty == SEXP_BASIC && strcmp (sx->val, "*") == 0) { diff --git a/lib/query.cc b/lib/query.cc index 87ee18fc..83b82a1d 100644 --- a/lib/query.cc +++ b/lib/query.cc @@ -821,3 +821,50 @@ notmuch_query_get_database (const notmuch_query_t *query) { return query->notmuch; } + +notmuch_status_t +_notmuch_query_expand (notmuch_database_t *notmuch, const char *field, Xapian::Query subquery, + Xapian::Query &output, std::string &msg) +{ + std::set terms; + const std::string term_prefix = _find_prefix (field); + + if (_debug_query ()) { + fprintf (stderr, "Expanding subquery:\n%s\n", + subquery.get_description ().c_str ()); + } + + try { + Xapian::Enquire enquire (*notmuch->xapian_db); + Xapian::MSet mset; + + enquire.set_weighting_scheme (Xapian::BoolWeight ()); + enquire.set_query (subquery); + + mset = enquire.get_mset (0, notmuch->xapian_db->get_doccount ()); + + for (Xapian::MSetIterator iterator = mset.begin (); iterator != mset.end (); iterator++) { + Xapian::docid doc_id = *iterator; + Xapian::Document doc = notmuch->xapian_db->get_document (doc_id); + Xapian::TermIterator i = doc.termlist_begin (); + + for (i.skip_to (term_prefix); + i != doc.termlist_end () && ((*i).rfind (term_prefix, 0) == 0); i++) { + terms.insert (*i); + } + } + output = Xapian::Query (Xapian::Query::OP_OR, terms.begin (), terms.end ()); + if (_debug_query ()) { + fprintf (stderr, "Expanded query:\n%s\n", + subquery.get_description ().c_str ()); + } + + } catch (const Xapian::Error &error) { + _notmuch_database_log (notmuch, + "A Xapian exception occurred expanding query: %s\n", + error.get_msg ().c_str ()); + return NOTMUCH_STATUS_XAPIAN_EXCEPTION; + } + + return NOTMUCH_STATUS_SUCCESS; +} diff --git a/lib/thread-fp.cc b/lib/thread-fp.cc index 06708ef2..3aa9c423 100644 --- a/lib/thread-fp.cc +++ b/lib/thread-fp.cc @@ -34,28 +34,20 @@ ThreadFieldProcessor::operator() (const std::string & str) if (str.size () <= 1 || str.at (str.size () - 1) != '}') { throw Xapian::QueryParserError ("missing } in '" + str + "'"); } else { + Xapian::Query subquery; + Xapian::Query query; + std::string msg; std::string subquery_str = str.substr (1, str.size () - 2); - notmuch_query_t *subquery = notmuch_query_create (notmuch, subquery_str.c_str ()); - notmuch_messages_t *messages; - std::set terms; - if (! subquery) - throw Xapian::QueryParserError ("failed to create subquery for '" + subquery_str + - "'"); + status = _notmuch_query_string_to_xapian_query (notmuch, subquery_str, subquery, msg); + if (status) + throw Xapian::QueryParserError (msg); - status = notmuch_query_search_messages (subquery, &messages); + status = _notmuch_query_expand (notmuch, "thread", subquery, query, msg); if (status) - throw Xapian::QueryParserError ("failed to search messages for '" + subquery_str + - "'"); + throw Xapian::QueryParserError (msg); - for (; notmuch_messages_valid (messages); notmuch_messages_move_to_next (messages)) { - std::string term = thread_prefix; - notmuch_message_t *message; - message = notmuch_messages_get (messages); - term += _notmuch_message_get_thread_id_only (message); - terms.insert (term); - } - return Xapian::Query (Xapian::Query::OP_OR, terms.begin (), terms.end ()); + return query; } } else { /* literal thread id */ -- 2.30.2