From: David Bremner <david@tethera.net>
To: notmuch@notmuchmail.org
Cc: David Bremner <david@tethera.net>
Subject: [PATCH 18/27] lib: factor out query construction from regexp
Date: Fri, 30 Jul 2021 09:55:58 -0300 [thread overview]
Message-ID: <20210730125607.2165433-19-david@tethera.net> (raw)
In-Reply-To: <20210730125607.2165433-1-david@tethera.net>
This will allow re-use of this code outside of the Xapian query parser.
---
lib/database-private.h | 5 +++
lib/regexp-fields.cc | 81 +++++++++++++++++++++++++++++-------------
lib/regexp-fields.h | 6 ++++
3 files changed, 68 insertions(+), 24 deletions(-)
diff --git a/lib/database-private.h b/lib/database-private.h
index 85d55299..cf4eb94b 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -306,6 +306,11 @@ _notmuch_database_setup_user_query_fields (notmuch_database_t *notmuch);
notmuch_status_t
_notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr,
Xapian::Query &output);
+
+notmuch_status_t
+_notmuch_regexp_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, std::string field,
+ std::string regexp_str,
+ Xapian::Query &output, std::string &msg);
#endif
#endif
diff --git a/lib/regexp-fields.cc b/lib/regexp-fields.cc
index 0feb50e5..c6d9d94f 100644
--- a/lib/regexp-fields.cc
+++ b/lib/regexp-fields.cc
@@ -26,27 +26,32 @@
#include "notmuch-private.h"
#include "database-private.h"
-static void
-compile_regex (regex_t ®exp, const char *str)
+notmuch_status_t
+compile_regex (regex_t ®exp, const char *str, std::string &msg)
{
int err = regcomp (®exp, str, REG_EXTENDED | REG_NOSUB);
if (err != 0) {
size_t len = regerror (err, ®exp, NULL, 0);
char *buffer = new char[len];
- std::string msg = "Regexp error: ";
+ msg = "Regexp error: ";
(void) regerror (err, ®exp, buffer, len);
msg.append (buffer, len);
delete[] buffer;
- throw Xapian::QueryParserError (msg);
+ return NOTMUCH_STATUS_ILLEGAL_ARGUMENT;
}
+ return NOTMUCH_STATUS_SUCCESS;
}
RegexpPostingSource::RegexpPostingSource (Xapian::valueno slot, const std::string ®exp)
: slot_ (slot)
{
- compile_regex (regexp_, regexp.c_str ());
+ std::string msg;
+ notmuch_status_t status = compile_regex (regexp_, regexp.c_str (), msg);
+
+ if (status)
+ throw Xapian::QueryParserError (msg);
}
RegexpPostingSource::~RegexpPostingSource ()
@@ -141,18 +146,54 @@ _find_slot (std::string prefix)
return Xapian::BAD_VALUENO;
}
-RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix,
+RegexpFieldProcessor::RegexpFieldProcessor (std::string field_,
notmuch_field_flag_t options_,
Xapian::QueryParser &parser_,
notmuch_database_t *notmuch_)
- : slot (_find_slot (prefix)),
- term_prefix (_find_prefix (prefix.c_str ())),
+ : slot (_find_slot (field_)),
+ field (field_),
+ term_prefix (_find_prefix (field_.c_str ())),
options (options_),
parser (parser_),
notmuch (notmuch_)
{
};
+notmuch_status_t
+_notmuch_regexp_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, std::string field,
+ std::string regexp_str,
+ Xapian::Query &output, std::string &msg)
+{
+ regex_t regexp;
+ notmuch_status_t status;
+
+ status = compile_regex (regexp, regexp_str.c_str (), msg);
+ if (status) {
+ _notmuch_database_log_append (notmuch, "error compiling regex %s", msg.c_str ());
+ return status;
+ }
+
+ if (slot == Xapian::BAD_VALUENO)
+ slot = _find_slot (field);
+
+ if (slot == Xapian::BAD_VALUENO) {
+ std::string term_prefix = _find_prefix (field.c_str ());
+ std::vector<std::string> terms;
+
+ for (Xapian::TermIterator it = notmuch->xapian_db->allterms_begin (term_prefix);
+ it != notmuch->xapian_db->allterms_end (); ++it) {
+ if (regexec (®exp, (*it).c_str () + term_prefix.size (),
+ 0, NULL, 0) == 0)
+ terms.push_back (*it);
+ }
+ output = Xapian::Query (Xapian::Query::OP_OR, terms.begin (), terms.end ());
+ } else {
+ RegexpPostingSource *postings = new RegexpPostingSource (slot, regexp_str);
+ output = Xapian::Query (postings->release ());
+ }
+ return NOTMUCH_STATUS_SUCCESS;
+}
+
Xapian::Query
RegexpFieldProcessor::operator() (const std::string & str)
{
@@ -168,23 +209,15 @@ RegexpFieldProcessor::operator() (const std::string & str)
if (str.at (0) == '/') {
if (str.length () > 1 && str.at (str.size () - 1) == '/') {
+ Xapian::Query query;
std::string regexp_str = str.substr (1, str.size () - 2);
- if (slot != Xapian::BAD_VALUENO) {
- RegexpPostingSource *postings = new RegexpPostingSource (slot, regexp_str);
- return Xapian::Query (postings->release ());
- } else {
- std::vector<std::string> terms;
- regex_t regexp;
-
- compile_regex (regexp, regexp_str.c_str ());
- for (Xapian::TermIterator it = notmuch->xapian_db->allterms_begin (term_prefix);
- it != notmuch->xapian_db->allterms_end (); ++it) {
- if (regexec (®exp, (*it).c_str () + term_prefix.size (),
- 0, NULL, 0) == 0)
- terms.push_back (*it);
- }
- return Xapian::Query (Xapian::Query::OP_OR, terms.begin (), terms.end ());
- }
+ std::string msg;
+ notmuch_status_t status;
+
+ status = _notmuch_regexp_to_query (notmuch, slot, field, regexp_str, query, msg);
+ if (status)
+ throw Xapian::QueryParserError (msg);
+ return query;
} else {
throw Xapian::QueryParserError ("unmatched regex delimiter in '" + str + "'");
}
diff --git a/lib/regexp-fields.h b/lib/regexp-fields.h
index a8cca243..9c871de7 100644
--- a/lib/regexp-fields.h
+++ b/lib/regexp-fields.h
@@ -30,6 +30,11 @@
#include "database-private.h"
#include "notmuch-private.h"
+notmuch_status_t
+_notmuch_regex_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, std::string field,
+ std::string regexp_str,
+ Xapian::Query &output, std::string &msg);
+
/* A posting source that returns documents where a value matches a
* regexp.
*/
@@ -64,6 +69,7 @@ public:
class RegexpFieldProcessor : public Xapian::FieldProcessor {
protected:
Xapian::valueno slot;
+ std::string field;
std::string term_prefix;
notmuch_field_flag_t options;
Xapian::QueryParser &parser;
--
2.30.2
next prev parent reply other threads:[~2021-07-30 12:57 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-07-30 12:55 v3 sexpr query parser David Bremner
2021-07-30 12:55 ` [PATCH 01/27] configure: optional library sfsexp David Bremner
2021-07-30 12:55 ` [PATCH 02/27] lib: split notmuch_query_create David Bremner
2021-07-30 12:55 ` [PATCH 03/27] lib: define notmuch_query_create_with_syntax David Bremner
2021-07-30 12:55 ` [PATCH 04/27] CLI/search+address: support sexpr queries David Bremner
2021-07-30 12:55 ` [PATCH 05/27] lib: add new status code for query syntax errors David Bremner
2021-07-30 12:55 ` [PATCH 06/27] lib/parse-sexp: parse single terms and the empty list David Bremner
2021-07-30 12:55 ` [PATCH 07/27] lib: leave stemmer object accessible David Bremner
2021-07-30 12:55 ` [PATCH 08/27] lib/parse-sexp: stem unquoted atoms David Bremner
2021-07-30 12:55 ` [PATCH 09/27] lib/parse-sexp: support and, not, and or David Bremner
2021-07-30 12:55 ` [PATCH 10/27] lib/parse-sexp: support subject field David Bremner
2021-07-30 12:55 ` [PATCH 11/27] util/unicode: allow calling from C++ David Bremner
2021-07-30 12:55 ` [PATCH 12/27] lib/parse-sexp: support phrase queries David Bremner
2021-07-30 12:55 ` [PATCH 13/27] lib/parse-sexp: add term prefix backed fields David Bremner
2021-07-30 12:55 ` [PATCH 14/27] lib/parse-sexp: 'starts-with' wildcard searches David Bremner
2021-07-30 12:55 ` [PATCH 15/27] lib/parse-sexp: add '*' as syntactic sugar for '(starts-with "")' David Bremner
2021-07-30 12:55 ` [PATCH 16/27] lib/parse-sexp: handle unprefixed terms David Bremner
2021-07-30 12:55 ` [PATCH 17/27] lib/query: generalize exclude handling to s-expression queries David Bremner
2021-07-30 12:55 ` David Bremner [this message]
2021-07-30 12:55 ` [PATCH 19/27] lib/parse-sexp: support regular expressions David Bremner
2021-07-30 12:56 ` [PATCH 20/27] lib: generate actual Xapian query for "*" and "" David Bremner
2021-07-30 12:56 ` [PATCH 21/27] lib/query: factor out _notmuch_query_string_to_xapian_query David Bremner
2021-07-30 12:56 ` [PATCH 22/27] lib/thread-fp: factor out query expansion, rewrite in Xapian David Bremner
2021-07-30 12:56 ` [PATCH 23/27] lib/parse-sexp: expand queries David Bremner
2021-07-30 12:56 ` [PATCH 24/27] lib/parse-sexp: support infix subqueries David Bremner
2021-07-30 12:56 ` [PATCH 25/27] lib/parse-sexp: parse user headers David Bremner
2021-07-30 12:56 ` [PATCH 26/27] lib: factor out expansion of saved queries David Bremner
2021-07-30 12:56 ` [PATCH 27/27] lib/parse-sexp: handle " David Bremner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://notmuchmail.org/
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210730125607.2165433-19-david@tethera.net \
--to=david@tethera.net \
--cc=notmuch@notmuchmail.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://yhetil.org/notmuch.git/
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).