From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mp0 ([2001:41d0:8:6d80::]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)) by ms0.migadu.com with LMTPS id cMs6G4xVFWG7NwEAgWs5BA (envelope-from ) for ; Thu, 12 Aug 2021 19:08:28 +0200 Received: from aspmx1.migadu.com ([2001:41d0:8:6d80::]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)) by mp0 with LMTPS id EP35FoxVFWGicAAA1q6Kng (envelope-from ) for ; Thu, 12 Aug 2021 17:08:28 +0000 Received: from mail.notmuchmail.org (nmbug.tethera.net [144.217.243.247]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by aspmx1.migadu.com (Postfix) with ESMTPS id 05792BB07 for ; Thu, 12 Aug 2021 19:08:28 +0200 (CEST) Received: from nmbug.tethera.net (localhost [127.0.0.1]) by mail.notmuchmail.org (Postfix) with ESMTP id 60C252920D; Thu, 12 Aug 2021 13:08:08 -0400 (EDT) Received: from fethera.tethera.net (fethera.tethera.net [IPv6:2607:5300:60:c5::1]) by mail.notmuchmail.org (Postfix) with ESMTP id 5D7CC291FA for ; Thu, 12 Aug 2021 13:07:56 -0400 (EDT) Received: by fethera.tethera.net (Postfix, from userid 1001) id 511A25FD5C; Thu, 12 Aug 2021 13:07:56 -0400 (EDT) Received: (nullmailer pid 1348759 invoked by uid 1000); Thu, 12 Aug 2021 17:07:43 -0000 From: David Bremner To: notmuch@notmuchmail.org Cc: David Bremner Subject: [PATCH 18/31] lib: factor out query construction from regexp Date: Thu, 12 Aug 2021 10:07:15 -0700 Message-Id: <20210812170728.1348333-19-david@tethera.net> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20210812170728.1348333-1-david@tethera.net> References: <20210812170728.1348333-1-david@tethera.net> MIME-Version: 1.0 Message-ID-Hash: DAHJGRI26HNP5IUZRT6CAFYWKKRVWFED X-Message-ID-Hash: DAHJGRI26HNP5IUZRT6CAFYWKKRVWFED X-MailFrom: bremner@tethera.net X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; header-match-notmuch.notmuchmail.org-0; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; suspicious-header X-Mailman-Version: 3.2.1 Precedence: list List-Id: "Use and development of the notmuch mail system." List-Help: List-Post: List-Subscribe: List-Unsubscribe: Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit X-Migadu-Flow: FLOW_IN ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=yhetil.org; s=key1; t=1628788108; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references:list-id:list-help: list-unsubscribe:list-subscribe:list-post; bh=uIIJ/vkRB0Qw+eFZ+QtwSl0rRxqSK1fWRZHc7/+kn7A=; b=aMkelADGtn4kOyE+9U2ggcy8VC7Q9A9peZSJ2Esuk86gjdU+vA6eT9iJ0AmWLYvvcb2yKS 2Ebx6wWgC7ose860Lz1mfVRdsdlwurHijOTTCZjEssmUz3NotkK/2LyGZdDuFezDpMAM4v fFVwm+mb59glOZ28sGZNCvi3Vlw8DISIH8lPmmSeo2eoPuo20KBzlwA4ritCiO6+HBn6fJ 7ooNtR3l01B3z8/id/dolebkP3EQPvXtM0lUlhWtpVBBmr3Z4bk23F5P6AvQZISvIaeVty qVu2a1MXtahPiN07ul5nVFi1x6RaMED+6TQdxyCrTNsYVdGKk+yrk/saMqqPpA== ARC-Seal: i=1; s=key1; d=yhetil.org; t=1628788108; a=rsa-sha256; cv=none; b=S73AjSihWEQn5CBcZrun/wfLa6r01LYdKNyeK45RHmIbo/VMSCQJjbW3v8acgcEpHACybI zZLWRKgpD/YA8XR2godd4y2SJ079USsp/vgALm/T7Meiz0U3YngFNboM4UV4efMFXj7t+N fqOJcBZm11gcqwKseeHLEcPSziosm+UWOGXrqRYZNVG4cRrCQl+rUDsKNfV8u6vqcP6fag HwTEUor19DkzhCrelZbcYiIwXj1VdgHJtIwtvEMOiFAsyNrWQZNcUhYUK0/CkQCVGV6cUf FrdH1tNveXkuHrMAcN0ltTiHCBo29giPp/c0+5ViNLm5Wgm9+FqAQHz0gnvAtQ== ARC-Authentication-Results: i=1; aspmx1.migadu.com; dkim=none; dmarc=none; spf=pass (aspmx1.migadu.com: domain of notmuch-bounces@notmuchmail.org designates 144.217.243.247 as permitted sender) smtp.mailfrom=notmuch-bounces@notmuchmail.org X-Migadu-Spam-Score: 0.48 Authentication-Results: aspmx1.migadu.com; dkim=none; dmarc=none; spf=pass (aspmx1.migadu.com: domain of notmuch-bounces@notmuchmail.org designates 144.217.243.247 as permitted sender) smtp.mailfrom=notmuch-bounces@notmuchmail.org X-Migadu-Queue-Id: 05792BB07 X-Spam-Score: 0.48 X-Migadu-Scanner: scn1.migadu.com X-TUID: JZVTzAv6R8DK This will allow re-use of this code outside of the Xapian query parser. --- lib/database-private.h | 5 +++ lib/regexp-fields.cc | 81 +++++++++++++++++++++++++++++------------- lib/regexp-fields.h | 6 ++++ 3 files changed, 68 insertions(+), 24 deletions(-) diff --git a/lib/database-private.h b/lib/database-private.h index 85d55299..cf4eb94b 100644 --- a/lib/database-private.h +++ b/lib/database-private.h @@ -306,6 +306,11 @@ _notmuch_database_setup_user_query_fields (notmuch_database_t *notmuch); notmuch_status_t _notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr, Xapian::Query &output); + +notmuch_status_t +_notmuch_regexp_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, std::string field, + std::string regexp_str, + Xapian::Query &output, std::string &msg); #endif #endif diff --git a/lib/regexp-fields.cc b/lib/regexp-fields.cc index 0feb50e5..c6d9d94f 100644 --- a/lib/regexp-fields.cc +++ b/lib/regexp-fields.cc @@ -26,27 +26,32 @@ #include "notmuch-private.h" #include "database-private.h" -static void -compile_regex (regex_t ®exp, const char *str) +notmuch_status_t +compile_regex (regex_t ®exp, const char *str, std::string &msg) { int err = regcomp (®exp, str, REG_EXTENDED | REG_NOSUB); if (err != 0) { size_t len = regerror (err, ®exp, NULL, 0); char *buffer = new char[len]; - std::string msg = "Regexp error: "; + msg = "Regexp error: "; (void) regerror (err, ®exp, buffer, len); msg.append (buffer, len); delete[] buffer; - throw Xapian::QueryParserError (msg); + return NOTMUCH_STATUS_ILLEGAL_ARGUMENT; } + return NOTMUCH_STATUS_SUCCESS; } RegexpPostingSource::RegexpPostingSource (Xapian::valueno slot, const std::string ®exp) : slot_ (slot) { - compile_regex (regexp_, regexp.c_str ()); + std::string msg; + notmuch_status_t status = compile_regex (regexp_, regexp.c_str (), msg); + + if (status) + throw Xapian::QueryParserError (msg); } RegexpPostingSource::~RegexpPostingSource () @@ -141,18 +146,54 @@ _find_slot (std::string prefix) return Xapian::BAD_VALUENO; } -RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix, +RegexpFieldProcessor::RegexpFieldProcessor (std::string field_, notmuch_field_flag_t options_, Xapian::QueryParser &parser_, notmuch_database_t *notmuch_) - : slot (_find_slot (prefix)), - term_prefix (_find_prefix (prefix.c_str ())), + : slot (_find_slot (field_)), + field (field_), + term_prefix (_find_prefix (field_.c_str ())), options (options_), parser (parser_), notmuch (notmuch_) { }; +notmuch_status_t +_notmuch_regexp_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, std::string field, + std::string regexp_str, + Xapian::Query &output, std::string &msg) +{ + regex_t regexp; + notmuch_status_t status; + + status = compile_regex (regexp, regexp_str.c_str (), msg); + if (status) { + _notmuch_database_log_append (notmuch, "error compiling regex %s", msg.c_str ()); + return status; + } + + if (slot == Xapian::BAD_VALUENO) + slot = _find_slot (field); + + if (slot == Xapian::BAD_VALUENO) { + std::string term_prefix = _find_prefix (field.c_str ()); + std::vector terms; + + for (Xapian::TermIterator it = notmuch->xapian_db->allterms_begin (term_prefix); + it != notmuch->xapian_db->allterms_end (); ++it) { + if (regexec (®exp, (*it).c_str () + term_prefix.size (), + 0, NULL, 0) == 0) + terms.push_back (*it); + } + output = Xapian::Query (Xapian::Query::OP_OR, terms.begin (), terms.end ()); + } else { + RegexpPostingSource *postings = new RegexpPostingSource (slot, regexp_str); + output = Xapian::Query (postings->release ()); + } + return NOTMUCH_STATUS_SUCCESS; +} + Xapian::Query RegexpFieldProcessor::operator() (const std::string & str) { @@ -168,23 +209,15 @@ RegexpFieldProcessor::operator() (const std::string & str) if (str.at (0) == '/') { if (str.length () > 1 && str.at (str.size () - 1) == '/') { + Xapian::Query query; std::string regexp_str = str.substr (1, str.size () - 2); - if (slot != Xapian::BAD_VALUENO) { - RegexpPostingSource *postings = new RegexpPostingSource (slot, regexp_str); - return Xapian::Query (postings->release ()); - } else { - std::vector terms; - regex_t regexp; - - compile_regex (regexp, regexp_str.c_str ()); - for (Xapian::TermIterator it = notmuch->xapian_db->allterms_begin (term_prefix); - it != notmuch->xapian_db->allterms_end (); ++it) { - if (regexec (®exp, (*it).c_str () + term_prefix.size (), - 0, NULL, 0) == 0) - terms.push_back (*it); - } - return Xapian::Query (Xapian::Query::OP_OR, terms.begin (), terms.end ()); - } + std::string msg; + notmuch_status_t status; + + status = _notmuch_regexp_to_query (notmuch, slot, field, regexp_str, query, msg); + if (status) + throw Xapian::QueryParserError (msg); + return query; } else { throw Xapian::QueryParserError ("unmatched regex delimiter in '" + str + "'"); } diff --git a/lib/regexp-fields.h b/lib/regexp-fields.h index a8cca243..9c871de7 100644 --- a/lib/regexp-fields.h +++ b/lib/regexp-fields.h @@ -30,6 +30,11 @@ #include "database-private.h" #include "notmuch-private.h" +notmuch_status_t +_notmuch_regex_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, std::string field, + std::string regexp_str, + Xapian::Query &output, std::string &msg); + /* A posting source that returns documents where a value matches a * regexp. */ @@ -64,6 +69,7 @@ public: class RegexpFieldProcessor : public Xapian::FieldProcessor { protected: Xapian::valueno slot; + std::string field; std::string term_prefix; notmuch_field_flag_t options; Xapian::QueryParser &parser; -- 2.30.2