unofficial mirror of notmuch@notmuchmail.org
 help / color / mirror / code / Atom feed
From: David Bremner <david@tethera.net>
To: notmuch@notmuchmail.org
Cc: David Bremner <david@tethera.net>
Subject: [PATCH 23/36] lib/thread-fp: factor out query expansion, rewrite in Xapian
Date: Tue, 24 Aug 2021 08:17:32 -0700	[thread overview]
Message-ID: <20210824151745.2941868-24-david@tethera.net> (raw)
In-Reply-To: <20210824151745.2941868-1-david@tethera.net>

It will be convenient not to have to construct a notmuch query object
when parsing subqueries, so the commit rewrites the query
expansion (currently only used for thread:{} queries) using only
Xapian. As a bonus it seems about 15% faster in initial experiments.
---
 lib/database-private.h | 16 +++++++++++++-
 lib/parse-sexp.cc      |  2 --
 lib/query.cc           | 48 ++++++++++++++++++++++++++++++++++++++++++
 lib/thread-fp.cc       | 26 ++++++++---------------
 4 files changed, 72 insertions(+), 20 deletions(-)

diff --git a/lib/database-private.h b/lib/database-private.h
index 7ee8e62d..9ee3b933 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -40,6 +40,10 @@
 
 #include <xapian.h>
 
+#if HAVE_SFSEXP
+#include <sexp.h>
+#endif
+
 /* Bit masks for _notmuch_database::features.  Features are named,
  * independent aspects of the database schema.
  *
@@ -313,11 +317,21 @@ notmuch_status_t
 _notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr,
 				      Xapian::Query &output);
 
+notmuch_status_t
+_notmuch_query_expand (notmuch_database_t *notmuch, const char *field, Xapian::Query subquery,
+		       Xapian::Query &output, std::string &msg);
+
 /* regexp-fields.cc */
 notmuch_status_t
 _notmuch_regexp_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, std::string field,
 			  std::string regexp_str,
 			  Xapian::Query &output, std::string &msg);
-#endif
 
+#if HAVE_SFSEXP
+/* parse-sexp.cc */
+notmuch_status_t
+_notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr,
+				      Xapian::Query &output);
+#endif
+#endif
 #endif
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 84914296..17401f47 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -219,8 +219,6 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
 		       Xapian::Query &output)
 {
     if (sx->ty == SEXP_VALUE) {
-	std::string term = Xapian::Unicode::tolower (sx->val);
-	Xapian::Stem stem = *(notmuch->stemmer);
 	std::string term_prefix = parent ? _find_prefix (parent->name) : "";
 
 	if (sx->aty == SEXP_BASIC && strcmp (sx->val, "*") == 0) {
diff --git a/lib/query.cc b/lib/query.cc
index 87ee18fc..b0937fcc 100644
--- a/lib/query.cc
+++ b/lib/query.cc
@@ -821,3 +821,51 @@ notmuch_query_get_database (const notmuch_query_t *query)
 {
     return query->notmuch;
 }
+
+notmuch_status_t
+_notmuch_query_expand (notmuch_database_t *notmuch, const char *field, Xapian::Query subquery,
+		       Xapian::Query &output, std::string &msg)
+{
+    std::set<std::string> terms;
+    const std::string term_prefix =  _find_prefix (field);
+
+    if (_debug_query ()) {
+	fprintf (stderr, "Expanding subquery:\n%s\n",
+		 subquery.get_description ().c_str ());
+    }
+
+    try {
+	Xapian::Enquire enquire (*notmuch->xapian_db);
+	Xapian::MSet mset;
+
+	enquire.set_weighting_scheme (Xapian::BoolWeight ());
+	enquire.set_query (subquery);
+
+	mset = enquire.get_mset (0, notmuch->xapian_db->get_doccount ());
+
+	for (Xapian::MSetIterator iterator = mset.begin (); iterator != mset.end (); iterator++) {
+	    Xapian::docid doc_id = *iterator;
+	    Xapian::Document doc = notmuch->xapian_db->get_document (doc_id);
+	    Xapian::TermIterator i = doc.termlist_begin ();
+
+	    for (i.skip_to (term_prefix);
+		 i != doc.termlist_end () && ((*i).rfind (term_prefix, 0) == 0); i++) {
+		terms.insert (*i);
+	    }
+	}
+	output = Xapian::Query (Xapian::Query::OP_OR, terms.begin (), terms.end ());
+	if (_debug_query ()) {
+	    fprintf (stderr, "Expanded query:\n%s\n",
+		     subquery.get_description ().c_str ());
+	}
+
+    } catch (const Xapian::Error &error) {
+	_notmuch_database_log (notmuch,
+			       "A Xapian exception occurred expanding query: %s\n",
+			       error.get_msg ().c_str ());
+	msg = error.get_msg ();
+	return NOTMUCH_STATUS_XAPIAN_EXCEPTION;
+    }
+
+    return NOTMUCH_STATUS_SUCCESS;
+}
diff --git a/lib/thread-fp.cc b/lib/thread-fp.cc
index 06708ef2..3aa9c423 100644
--- a/lib/thread-fp.cc
+++ b/lib/thread-fp.cc
@@ -34,28 +34,20 @@ ThreadFieldProcessor::operator() (const std::string & str)
 	if (str.size () <= 1 || str.at (str.size () - 1) != '}') {
 	    throw Xapian::QueryParserError ("missing } in '" + str + "'");
 	} else {
+	    Xapian::Query subquery;
+	    Xapian::Query query;
+	    std::string msg;
 	    std::string subquery_str = str.substr (1, str.size () - 2);
-	    notmuch_query_t *subquery = notmuch_query_create (notmuch, subquery_str.c_str ());
-	    notmuch_messages_t *messages;
-	    std::set<std::string> terms;
 
-	    if (! subquery)
-		throw Xapian::QueryParserError ("failed to create subquery for '" + subquery_str +
-						"'");
+	    status = _notmuch_query_string_to_xapian_query (notmuch, subquery_str, subquery, msg);
+	    if (status)
+		throw Xapian::QueryParserError (msg);
 
-	    status = notmuch_query_search_messages (subquery, &messages);
+	    status = _notmuch_query_expand (notmuch, "thread", subquery, query, msg);
 	    if (status)
-		throw Xapian::QueryParserError ("failed to search messages for '" + subquery_str +
-						"'");
+		throw Xapian::QueryParserError (msg);
 
-	    for (; notmuch_messages_valid (messages); notmuch_messages_move_to_next (messages)) {
-		std::string term = thread_prefix;
-		notmuch_message_t *message;
-		message = notmuch_messages_get (messages);
-		term += _notmuch_message_get_thread_id_only (message);
-		terms.insert (term);
-	    }
-	    return Xapian::Query (Xapian::Query::OP_OR, terms.begin (), terms.end ());
+	    return query;
 	}
     } else {
 	/* literal thread id */
-- 
2.32.0

  parent reply	other threads:[~2021-08-24 15:22 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-24 15:17 v5 sexp query parser David Bremner
2021-08-24 15:17 ` [PATCH 01/36] CLI: make variable n_requested_db_uuid file scope David Bremner
2021-08-24 15:17 ` [PATCH 02/36] configure: optional library sfsexp David Bremner
2021-08-24 15:17 ` [PATCH 03/36] lib: split notmuch_query_create David Bremner
2021-08-24 15:17 ` [PATCH 04/36] lib: define notmuch_query_create_with_syntax David Bremner
2021-08-24 15:17 ` [PATCH 05/36] CLI/search+address: support sexpr queries David Bremner
2021-08-24 15:17 ` [PATCH 06/36] lib: add new status code for query syntax errors David Bremner
2021-08-24 15:17 ` [PATCH 07/36] lib/parse-sexp: parse single terms and the empty list David Bremner
2021-08-24 15:17 ` [PATCH 08/36] lib: leave stemmer object accessible David Bremner
2021-08-24 15:17 ` [PATCH 09/36] lib/parse-sexp: stem unquoted atoms David Bremner
2021-08-24 15:17 ` [PATCH 10/36] lib/parse-sexp: support and, not, and or David Bremner
2021-08-24 15:17 ` [PATCH 11/36] lib/parse-sexp: support subject field David Bremner
2021-08-24 15:17 ` [PATCH 12/36] util/unicode: allow calling from C++ David Bremner
2021-08-24 15:17 ` [PATCH 13/36] lib/parse-sexp: support phrase queries David Bremner
2021-08-24 15:17 ` [PATCH 14/36] lib/parse-sexp: add term prefix backed fields David Bremner
2021-08-24 15:17 ` [PATCH 15/36] lib/parse-sexp: 'starts-with' wildcard searches David Bremner
2021-08-24 15:17 ` [PATCH 16/36] lib/parse-sexp: add '*' as syntactic sugar for '(starts-with "")' David Bremner
2021-08-24 15:17 ` [PATCH 17/36] lib/parse-sexp: handle unprefixed terms David Bremner
2021-08-24 15:17 ` [PATCH 18/36] lib/query: generalize exclude handling to s-expression queries David Bremner
2021-08-24 15:17 ` [PATCH 19/36] lib: factor out query construction from regexp David Bremner
2021-08-24 15:17 ` [PATCH 20/36] lib/parse-sexp: support regular expressions David Bremner
2021-08-24 15:17 ` [PATCH 21/36] lib: generate actual Xapian query for "*" and "" David Bremner
2021-08-24 15:17 ` [PATCH 22/36] lib/query: factor out _notmuch_query_string_to_xapian_query David Bremner
2021-08-24 15:17 ` David Bremner [this message]
2021-08-24 15:17 ` [PATCH 24/36] lib/parse-sexp: expand queries David Bremner
2021-08-24 15:17 ` [PATCH 25/36] lib/parse-sexp: support infix subqueries David Bremner
2021-08-24 15:17 ` [PATCH 26/36] lib/parse-sexp: parse user headers David Bremner
2021-08-24 15:17 ` [PATCH 27/36] lib: factor out expansion of saved queries David Bremner
2021-08-24 15:17 ` [PATCH 28/36] lib/parse-sexp: handle " David Bremner
2021-08-24 15:17 ` [PATCH 29/36] CLI/config support saving s-expression queries David Bremner
2021-08-24 15:17 ` [PATCH 30/36] lib/parse-sexp: support saved " David Bremner
2021-08-24 15:17 ` [PATCH 31/36] lib/parse-sexp: thread environment argument through parser David Bremner
2021-08-24 15:17 ` [PATCH 32/36] lib/parse-sexp: apply macros David Bremner
2021-08-24 15:17 ` [PATCH 33/36] CLI: move query syntax to shared option David Bremner
2021-08-24 15:17 ` [PATCH 34/36] CLI/{count, dump, reindex, reply, show}: enable sexp queries David Bremner
2021-08-24 15:17 ` [PATCH 35/36] CLI/tag: " David Bremner
2021-08-24 15:17 ` [PATCH 36/36] doc/sexp-queries: update synopsis and description David Bremner
2021-09-05 19:31 ` v5 sexp query parser David Bremner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://notmuchmail.org/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210824151745.2941868-24-david@tethera.net \
    --to=david@tethera.net \
    --cc=notmuch@notmuchmail.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://yhetil.org/notmuch.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).