unofficial mirror of notmuch@notmuchmail.org
 help / color / mirror / code / Atom feed
From: David Bremner <david@tethera.net>
To: notmuch@notmuchmail.org
Cc: David Bremner <david@tethera.net>
Subject: [PATCH 18/27] lib: factor out query construction from regexp
Date: Fri, 30 Jul 2021 09:55:58 -0300	[thread overview]
Message-ID: <20210730125607.2165433-19-david@tethera.net> (raw)
In-Reply-To: <20210730125607.2165433-1-david@tethera.net>

This will allow re-use of this code outside of the Xapian query parser.
---
 lib/database-private.h |  5 +++
 lib/regexp-fields.cc   | 81 +++++++++++++++++++++++++++++-------------
 lib/regexp-fields.h    |  6 ++++
 3 files changed, 68 insertions(+), 24 deletions(-)

diff --git a/lib/database-private.h b/lib/database-private.h
index 85d55299..cf4eb94b 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -306,6 +306,11 @@ _notmuch_database_setup_user_query_fields (notmuch_database_t *notmuch);
 notmuch_status_t
 _notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr,
 				      Xapian::Query &output);
+
+notmuch_status_t
+_notmuch_regexp_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, std::string field,
+			  std::string regexp_str,
+			  Xapian::Query &output, std::string &msg);
 #endif
 
 #endif
diff --git a/lib/regexp-fields.cc b/lib/regexp-fields.cc
index 0feb50e5..c6d9d94f 100644
--- a/lib/regexp-fields.cc
+++ b/lib/regexp-fields.cc
@@ -26,27 +26,32 @@
 #include "notmuch-private.h"
 #include "database-private.h"
 
-static void
-compile_regex (regex_t &regexp, const char *str)
+notmuch_status_t
+compile_regex (regex_t &regexp, const char *str, std::string &msg)
 {
     int err = regcomp (&regexp, str, REG_EXTENDED | REG_NOSUB);
 
     if (err != 0) {
 	size_t len = regerror (err, &regexp, NULL, 0);
 	char *buffer = new char[len];
-	std::string msg = "Regexp error: ";
+	msg = "Regexp error: ";
 	(void) regerror (err, &regexp, buffer, len);
 	msg.append (buffer, len);
 	delete[] buffer;
 
-	throw Xapian::QueryParserError (msg);
+	return NOTMUCH_STATUS_ILLEGAL_ARGUMENT;
     }
+    return NOTMUCH_STATUS_SUCCESS;
 }
 
 RegexpPostingSource::RegexpPostingSource (Xapian::valueno slot, const std::string &regexp)
     : slot_ (slot)
 {
-    compile_regex (regexp_, regexp.c_str ());
+    std::string msg;
+    notmuch_status_t status = compile_regex (regexp_, regexp.c_str (), msg);
+
+    if (status)
+	throw Xapian::QueryParserError (msg);
 }
 
 RegexpPostingSource::~RegexpPostingSource ()
@@ -141,18 +146,54 @@ _find_slot (std::string prefix)
 	return Xapian::BAD_VALUENO;
 }
 
-RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix,
+RegexpFieldProcessor::RegexpFieldProcessor (std::string field_,
 					    notmuch_field_flag_t options_,
 					    Xapian::QueryParser &parser_,
 					    notmuch_database_t *notmuch_)
-    : slot (_find_slot (prefix)),
-    term_prefix (_find_prefix (prefix.c_str ())),
+    : slot (_find_slot (field_)),
+    field (field_),
+    term_prefix (_find_prefix (field_.c_str ())),
     options (options_),
     parser (parser_),
     notmuch (notmuch_)
 {
 };
 
+notmuch_status_t
+_notmuch_regexp_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, std::string field,
+			  std::string regexp_str,
+			  Xapian::Query &output, std::string &msg)
+{
+    regex_t regexp;
+    notmuch_status_t status;
+
+    status = compile_regex (regexp, regexp_str.c_str (), msg);
+    if (status) {
+	_notmuch_database_log_append (notmuch, "error compiling regex %s", msg.c_str ());
+	return status;
+    }
+
+    if (slot == Xapian::BAD_VALUENO)
+	slot = _find_slot (field);
+
+    if (slot == Xapian::BAD_VALUENO) {
+	std::string term_prefix = _find_prefix (field.c_str ());
+	std::vector<std::string> terms;
+
+	for (Xapian::TermIterator it = notmuch->xapian_db->allterms_begin (term_prefix);
+	     it != notmuch->xapian_db->allterms_end (); ++it) {
+	    if (regexec (&regexp, (*it).c_str () + term_prefix.size (),
+			 0, NULL, 0) == 0)
+		terms.push_back (*it);
+	}
+	output = Xapian::Query (Xapian::Query::OP_OR, terms.begin (), terms.end ());
+    } else {
+	RegexpPostingSource *postings = new RegexpPostingSource (slot, regexp_str);
+	output = Xapian::Query (postings->release ());
+    }
+    return NOTMUCH_STATUS_SUCCESS;
+}
+
 Xapian::Query
 RegexpFieldProcessor::operator() (const std::string & str)
 {
@@ -168,23 +209,15 @@ RegexpFieldProcessor::operator() (const std::string & str)
 
     if (str.at (0) == '/') {
 	if (str.length () > 1 && str.at (str.size () - 1) == '/') {
+	    Xapian::Query query;
 	    std::string regexp_str = str.substr (1, str.size () - 2);
-	    if (slot != Xapian::BAD_VALUENO) {
-		RegexpPostingSource *postings = new RegexpPostingSource (slot, regexp_str);
-		return Xapian::Query (postings->release ());
-	    } else {
-		std::vector<std::string> terms;
-		regex_t regexp;
-
-		compile_regex (regexp, regexp_str.c_str ());
-		for (Xapian::TermIterator it = notmuch->xapian_db->allterms_begin (term_prefix);
-		     it != notmuch->xapian_db->allterms_end (); ++it) {
-		    if (regexec (&regexp, (*it).c_str () + term_prefix.size (),
-				 0, NULL, 0) == 0)
-			terms.push_back (*it);
-		}
-		return Xapian::Query (Xapian::Query::OP_OR, terms.begin (), terms.end ());
-	    }
+	    std::string msg;
+	    notmuch_status_t status;
+
+	    status = _notmuch_regexp_to_query (notmuch, slot, field, regexp_str, query, msg);
+	    if (status)
+		throw Xapian::QueryParserError (msg);
+	    return query;
 	} else {
 	    throw Xapian::QueryParserError ("unmatched regex delimiter in '" + str + "'");
 	}
diff --git a/lib/regexp-fields.h b/lib/regexp-fields.h
index a8cca243..9c871de7 100644
--- a/lib/regexp-fields.h
+++ b/lib/regexp-fields.h
@@ -30,6 +30,11 @@
 #include "database-private.h"
 #include "notmuch-private.h"
 
+notmuch_status_t
+_notmuch_regex_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, std::string field,
+			 std::string regexp_str,
+			 Xapian::Query &output, std::string &msg);
+
 /* A posting source that returns documents where a value matches a
  * regexp.
  */
@@ -64,6 +69,7 @@ public:
 class RegexpFieldProcessor : public Xapian::FieldProcessor {
 protected:
     Xapian::valueno slot;
+    std::string field;
     std::string term_prefix;
     notmuch_field_flag_t options;
     Xapian::QueryParser &parser;
-- 
2.30.2

  parent reply	other threads:[~2021-07-30 12:57 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-30 12:55 v3 sexpr query parser David Bremner
2021-07-30 12:55 ` [PATCH 01/27] configure: optional library sfsexp David Bremner
2021-07-30 12:55 ` [PATCH 02/27] lib: split notmuch_query_create David Bremner
2021-07-30 12:55 ` [PATCH 03/27] lib: define notmuch_query_create_with_syntax David Bremner
2021-07-30 12:55 ` [PATCH 04/27] CLI/search+address: support sexpr queries David Bremner
2021-07-30 12:55 ` [PATCH 05/27] lib: add new status code for query syntax errors David Bremner
2021-07-30 12:55 ` [PATCH 06/27] lib/parse-sexp: parse single terms and the empty list David Bremner
2021-07-30 12:55 ` [PATCH 07/27] lib: leave stemmer object accessible David Bremner
2021-07-30 12:55 ` [PATCH 08/27] lib/parse-sexp: stem unquoted atoms David Bremner
2021-07-30 12:55 ` [PATCH 09/27] lib/parse-sexp: support and, not, and or David Bremner
2021-07-30 12:55 ` [PATCH 10/27] lib/parse-sexp: support subject field David Bremner
2021-07-30 12:55 ` [PATCH 11/27] util/unicode: allow calling from C++ David Bremner
2021-07-30 12:55 ` [PATCH 12/27] lib/parse-sexp: support phrase queries David Bremner
2021-07-30 12:55 ` [PATCH 13/27] lib/parse-sexp: add term prefix backed fields David Bremner
2021-07-30 12:55 ` [PATCH 14/27] lib/parse-sexp: 'starts-with' wildcard searches David Bremner
2021-07-30 12:55 ` [PATCH 15/27] lib/parse-sexp: add '*' as syntactic sugar for '(starts-with "")' David Bremner
2021-07-30 12:55 ` [PATCH 16/27] lib/parse-sexp: handle unprefixed terms David Bremner
2021-07-30 12:55 ` [PATCH 17/27] lib/query: generalize exclude handling to s-expression queries David Bremner
2021-07-30 12:55 ` David Bremner [this message]
2021-07-30 12:55 ` [PATCH 19/27] lib/parse-sexp: support regular expressions David Bremner
2021-07-30 12:56 ` [PATCH 20/27] lib: generate actual Xapian query for "*" and "" David Bremner
2021-07-30 12:56 ` [PATCH 21/27] lib/query: factor out _notmuch_query_string_to_xapian_query David Bremner
2021-07-30 12:56 ` [PATCH 22/27] lib/thread-fp: factor out query expansion, rewrite in Xapian David Bremner
2021-07-30 12:56 ` [PATCH 23/27] lib/parse-sexp: expand queries David Bremner
2021-07-30 12:56 ` [PATCH 24/27] lib/parse-sexp: support infix subqueries David Bremner
2021-07-30 12:56 ` [PATCH 25/27] lib/parse-sexp: parse user headers David Bremner
2021-07-30 12:56 ` [PATCH 26/27] lib: factor out expansion of saved queries David Bremner
2021-07-30 12:56 ` [PATCH 27/27] lib/parse-sexp: handle " David Bremner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://notmuchmail.org/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210730125607.2165433-19-david@tethera.net \
    --to=david@tethera.net \
    --cc=notmuch@notmuchmail.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://yhetil.org/notmuch.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).