From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mp0 ([2001:41d0:2:bcc0::]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)) by ms0.migadu.com with LMTPS id aEPvKAH3A2FIgwEAgWs5BA (envelope-from ) for ; Fri, 30 Jul 2021 14:56:33 +0200 Received: from aspmx1.migadu.com ([2001:41d0:2:bcc0::]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)) by mp0 with LMTPS id cNuoJAH3A2FyJAAA1q6Kng (envelope-from ) for ; Fri, 30 Jul 2021 12:56:33 +0000 Received: from mail.notmuchmail.org (nmbug.tethera.net [144.217.243.247]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by aspmx1.migadu.com (Postfix) with ESMTPS id DB907D7D5 for ; Fri, 30 Jul 2021 14:56:32 +0200 (CEST) Received: from nmbug.tethera.net (localhost [127.0.0.1]) by mail.notmuchmail.org (Postfix) with ESMTP id 30BD7291ED; Fri, 30 Jul 2021 08:56:20 -0400 (EDT) Received: from fethera.tethera.net (fethera.tethera.net [IPv6:2607:5300:60:c5::1]) by mail.notmuchmail.org (Postfix) with ESMTP id CEF1E291E6 for ; Fri, 30 Jul 2021 08:56:13 -0400 (EDT) Received: by fethera.tethera.net (Postfix, from userid 1001) id C76925FD17; Fri, 30 Jul 2021 08:56:13 -0400 (EDT) Received: (nullmailer pid 2166910 invoked by uid 1000); Fri, 30 Jul 2021 12:56:10 -0000 From: David Bremner To: notmuch@notmuchmail.org Cc: David Bremner Subject: [PATCH 22/27] lib/thread-fp: factor out query expansion, rewrite in Xapian Date: Fri, 30 Jul 2021 09:56:02 -0300 Message-Id: <20210730125607.2165433-23-david@tethera.net> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20210730125607.2165433-1-david@tethera.net> References: <20210730125607.2165433-1-david@tethera.net> MIME-Version: 1.0 Message-ID-Hash: K6MCPMIGH3ANL2QYUA35T45SYQOPDYLC X-Message-ID-Hash: K6MCPMIGH3ANL2QYUA35T45SYQOPDYLC X-MailFrom: bremner@tethera.net X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; header-match-notmuch.notmuchmail.org-0; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; suspicious-header X-Mailman-Version: 3.2.1 Precedence: list List-Id: "Use and development of the notmuch mail system." List-Help: List-Post: List-Subscribe: List-Unsubscribe: Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit X-Migadu-Flow: FLOW_IN ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=yhetil.org; s=key1; t=1627649793; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references:list-id:list-help: list-unsubscribe:list-subscribe:list-post; bh=akURlVkCaGzsJ29kGbsS1FLyImDPAGV09cDhdmEMPOY=; b=NSvgfBje8W3KBBMYK5XXkZGAyiMe/lx8foPXmbqe043ZgNtjZqNJyxXnT6U0Nw87+Jg+2o ekhqURA+P1yeZH1NrIthvAvgUP57vc4UrkdFm1pVpRGu0cWzolwZtNpNdP2m1ihi8Z0J4k O2nvLpvbaoEf9tTP8itB6qMfkh5mwb1dHApgtoX4YF/QUKEqLzZDb+LS1q1IWpMB/IAT62 bLDEckIsGvjytW/lWeiz/u79QlG0ZiLOFjG6ywNCEPEfva/tnJcdURDSckGAwBfG2FWJ+7 aG+TqDC3e/Jv92AaRewwp19aSl0MBB1eWW0nNmBxHabzCR1nYg37Fd0K7if5MA== ARC-Seal: i=1; s=key1; d=yhetil.org; t=1627649793; a=rsa-sha256; cv=none; b=iv3ime/0QKYA0Qg9cBmsK6eN7ZFaPNlLvxK85VOet/z1vivNnwnm/9GWU5mwFo7PbS/4Mq tEkBTVNPuL12yYwKNbI2a6gmtaa1pI/DxqVbJGXSOV0CMsUVf+R3ZeYMlNj70KA6NFI+TL o/8MIhcmjwA7w5I1/YsaMV+eSzVGofywcn/qcSIdSlxq1KobFmU6/BpF4xnQwcsEDBni2a Un72Ge9OuHAsCkTHXABv2xlrU7pHrluf1PIO2VgNWePp2c7ZHhAGEiNqTAWP5q1T2EyMU7 4o+lqlOPnMxeY0OvEHzyT6vWOaIeMRyUjJsyOe5UTEEv0v1tueIXw4gCvld2ZQ== ARC-Authentication-Results: i=1; aspmx1.migadu.com; dkim=none; spf=pass (aspmx1.migadu.com: domain of notmuch-bounces@notmuchmail.org designates 144.217.243.247 as permitted sender) smtp.mailfrom=notmuch-bounces@notmuchmail.org X-Migadu-Spam-Score: 0.49 Authentication-Results: aspmx1.migadu.com; dkim=none; dmarc=none; spf=pass (aspmx1.migadu.com: domain of notmuch-bounces@notmuchmail.org designates 144.217.243.247 as permitted sender) smtp.mailfrom=notmuch-bounces@notmuchmail.org X-Migadu-Queue-Id: DB907D7D5 X-Spam-Score: 0.49 X-Migadu-Scanner: scn1.migadu.com X-TUID: b6/veXB6LSvA It will be convenient not to have to construct a notmuch query object when parsing subqueries, so the commit rewrites the query expansion (currently only used for thread:{} queries) using only Xapian. As a bonus it seems about 15% faster in initial experiments. --- lib/database-private.h | 16 +++++++++++++- lib/parse-sexp.cc | 2 -- lib/query.cc | 47 ++++++++++++++++++++++++++++++++++++++++++ lib/thread-fp.cc | 26 ++++++++--------------- 4 files changed, 71 insertions(+), 20 deletions(-) diff --git a/lib/database-private.h b/lib/database-private.h index 7ee8e62d..9ee3b933 100644 --- a/lib/database-private.h +++ b/lib/database-private.h @@ -40,6 +40,10 @@ #include +#if HAVE_SFSEXP +#include +#endif + /* Bit masks for _notmuch_database::features. Features are named, * independent aspects of the database schema. * @@ -313,11 +317,21 @@ notmuch_status_t _notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr, Xapian::Query &output); +notmuch_status_t +_notmuch_query_expand (notmuch_database_t *notmuch, const char *field, Xapian::Query subquery, + Xapian::Query &output, std::string &msg); + /* regexp-fields.cc */ notmuch_status_t _notmuch_regexp_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, std::string field, std::string regexp_str, Xapian::Query &output, std::string &msg); -#endif +#if HAVE_SFSEXP +/* parse-sexp.cc */ +notmuch_status_t +_notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr, + Xapian::Query &output); +#endif +#endif #endif diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc index 48728edb..f48c94be 100644 --- a/lib/parse-sexp.cc +++ b/lib/parse-sexp.cc @@ -219,8 +219,6 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent Xapian::Query &output) { if (sx->ty == SEXP_VALUE) { - std::string term = Xapian::Unicode::tolower (sx->val); - Xapian::Stem stem = *(notmuch->stemmer); std::string term_prefix = parent ? _find_prefix (parent->name) : ""; if (sx->aty == SEXP_BASIC && strcmp (sx->val, "*") == 0) { diff --git a/lib/query.cc b/lib/query.cc index 87ee18fc..83b82a1d 100644 --- a/lib/query.cc +++ b/lib/query.cc @@ -821,3 +821,50 @@ notmuch_query_get_database (const notmuch_query_t *query) { return query->notmuch; } + +notmuch_status_t +_notmuch_query_expand (notmuch_database_t *notmuch, const char *field, Xapian::Query subquery, + Xapian::Query &output, std::string &msg) +{ + std::set terms; + const std::string term_prefix = _find_prefix (field); + + if (_debug_query ()) { + fprintf (stderr, "Expanding subquery:\n%s\n", + subquery.get_description ().c_str ()); + } + + try { + Xapian::Enquire enquire (*notmuch->xapian_db); + Xapian::MSet mset; + + enquire.set_weighting_scheme (Xapian::BoolWeight ()); + enquire.set_query (subquery); + + mset = enquire.get_mset (0, notmuch->xapian_db->get_doccount ()); + + for (Xapian::MSetIterator iterator = mset.begin (); iterator != mset.end (); iterator++) { + Xapian::docid doc_id = *iterator; + Xapian::Document doc = notmuch->xapian_db->get_document (doc_id); + Xapian::TermIterator i = doc.termlist_begin (); + + for (i.skip_to (term_prefix); + i != doc.termlist_end () && ((*i).rfind (term_prefix, 0) == 0); i++) { + terms.insert (*i); + } + } + output = Xapian::Query (Xapian::Query::OP_OR, terms.begin (), terms.end ()); + if (_debug_query ()) { + fprintf (stderr, "Expanded query:\n%s\n", + subquery.get_description ().c_str ()); + } + + } catch (const Xapian::Error &error) { + _notmuch_database_log (notmuch, + "A Xapian exception occurred expanding query: %s\n", + error.get_msg ().c_str ()); + return NOTMUCH_STATUS_XAPIAN_EXCEPTION; + } + + return NOTMUCH_STATUS_SUCCESS; +} diff --git a/lib/thread-fp.cc b/lib/thread-fp.cc index 06708ef2..3aa9c423 100644 --- a/lib/thread-fp.cc +++ b/lib/thread-fp.cc @@ -34,28 +34,20 @@ ThreadFieldProcessor::operator() (const std::string & str) if (str.size () <= 1 || str.at (str.size () - 1) != '}') { throw Xapian::QueryParserError ("missing } in '" + str + "'"); } else { + Xapian::Query subquery; + Xapian::Query query; + std::string msg; std::string subquery_str = str.substr (1, str.size () - 2); - notmuch_query_t *subquery = notmuch_query_create (notmuch, subquery_str.c_str ()); - notmuch_messages_t *messages; - std::set terms; - if (! subquery) - throw Xapian::QueryParserError ("failed to create subquery for '" + subquery_str + - "'"); + status = _notmuch_query_string_to_xapian_query (notmuch, subquery_str, subquery, msg); + if (status) + throw Xapian::QueryParserError (msg); - status = notmuch_query_search_messages (subquery, &messages); + status = _notmuch_query_expand (notmuch, "thread", subquery, query, msg); if (status) - throw Xapian::QueryParserError ("failed to search messages for '" + subquery_str + - "'"); + throw Xapian::QueryParserError (msg); - for (; notmuch_messages_valid (messages); notmuch_messages_move_to_next (messages)) { - std::string term = thread_prefix; - notmuch_message_t *message; - message = notmuch_messages_get (messages); - term += _notmuch_message_get_thread_id_only (message); - terms.insert (term); - } - return Xapian::Query (Xapian::Query::OP_OR, terms.begin (), terms.end ()); + return query; } } else { /* literal thread id */ -- 2.30.2