From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by arlo.cworth.org (Postfix) with ESMTP id ED78A6DE12C3 for ; Wed, 29 Mar 2017 17:46:11 -0700 (PDT) X-Virus-Scanned: Debian amavisd-new at cworth.org X-Spam-Flag: NO X-Spam-Score: -0.005 X-Spam-Level: X-Spam-Status: No, score=-0.005 tagged_above=-999 required=5 tests=[AWL=0.006, SPF_PASS=-0.001, T_RP_MATCHES_RCVD=-0.01] autolearn=disabled Received: from arlo.cworth.org ([127.0.0.1]) by localhost (arlo.cworth.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id CBVEMFFCxFR8 for ; Wed, 29 Mar 2017 17:46:11 -0700 (PDT) Received: from fethera.tethera.net (fethera.tethera.net [198.245.60.197]) by arlo.cworth.org (Postfix) with ESMTPS id 418896DE1344 for ; Wed, 29 Mar 2017 17:46:11 -0700 (PDT) Received: from remotemail by fethera.tethera.net with local (Exim 4.84_2) (envelope-from ) id 1ctODO-0004Wz-C8; Wed, 29 Mar 2017 20:45:26 -0400 Received: (nullmailer pid 1583 invoked by uid 1000); Thu, 30 Mar 2017 00:46:08 -0000 From: David Bremner To: David Bremner , notmuch@notmuchmail.org Subject: [PATCH 2/2] lib: Add regexp expansion for for tags and paths Date: Wed, 29 Mar 2017 21:46:04 -0300 Message-Id: <20170330004604.1504-3-david@tethera.net> X-Mailer: git-send-email 2.11.0 In-Reply-To: <20170330004604.1504-1-david@tethera.net> References: <20170324121436.28978-1-david@tethera.net> <20170330004604.1504-1-david@tethera.net> X-BeenThere: notmuch@notmuchmail.org X-Mailman-Version: 2.1.22 Precedence: list List-Id: "Use and development of the notmuch mail system." List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 30 Mar 2017 00:46:12 -0000 >From a ui perspective this looks similar to what was already provided for from, subject, and mid, but the implimentation is quite different. It uses the database's list of terms to construct a term based query equivalent to the passed regular expression. --- lib/database.cc | 12 ++++++++---- lib/regexp-fields.cc | 31 +++++++++++++++++++++++++------ 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/lib/database.cc b/lib/database.cc index 49b3849c..5b13f541 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -259,12 +259,15 @@ prefix_t prefix_table[] = { { "file-direntry", "XFDIRENTRY", NOTMUCH_FIELD_NO_FLAGS }, { "directory-direntry", "XDDIRENTRY", NOTMUCH_FIELD_NO_FLAGS }, { "thread", "G", NOTMUCH_FIELD_EXTERNAL }, - { "tag", "K", NOTMUCH_FIELD_EXTERNAL }, - { "is", "K", NOTMUCH_FIELD_EXTERNAL }, + { "tag", "K", NOTMUCH_FIELD_EXTERNAL | + NOTMUCH_FIELD_PROCESSOR }, + { "is", "K", NOTMUCH_FIELD_EXTERNAL | + NOTMUCH_FIELD_PROCESSOR }, { "id", "Q", NOTMUCH_FIELD_EXTERNAL }, { "mid", "Q", NOTMUCH_FIELD_EXTERNAL | NOTMUCH_FIELD_PROCESSOR }, - { "path", "P", NOTMUCH_FIELD_EXTERNAL }, + { "path", "P", NOTMUCH_FIELD_EXTERNAL| + NOTMUCH_FIELD_PROCESSOR }, { "property", "XPROPERTY", NOTMUCH_FIELD_EXTERNAL }, /* * Unconditionally add ':' to reduce potential ambiguity with @@ -272,7 +275,8 @@ prefix_t prefix_table[] = { * letters. See Xapian document termprefixes.html for related * discussion. */ - { "folder", "XFOLDER:", NOTMUCH_FIELD_EXTERNAL }, + { "folder", "XFOLDER:", NOTMUCH_FIELD_EXTERNAL | + NOTMUCH_FIELD_PROCESSOR }, #if HAVE_XAPIAN_FIELD_PROCESSOR { "date", NULL, NOTMUCH_FIELD_EXTERNAL | NOTMUCH_FIELD_PROCESSOR }, diff --git a/lib/regexp-fields.cc b/lib/regexp-fields.cc index 7ae55e70..1598c17f 100644 --- a/lib/regexp-fields.cc +++ b/lib/regexp-fields.cc @@ -138,7 +138,7 @@ static inline Xapian::valueno _find_slot (std::string prefix) else if (prefix == "mid") return NOTMUCH_VALUE_MESSAGE_ID; else - throw Xapian::QueryParserError ("unsupported regexp field '" + prefix + "'"); + return Xapian::BAD_VALUENO; } RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix, @@ -156,15 +156,34 @@ RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix, Xapian::Query RegexpFieldProcessor::operator() (const std::string & str) { - if (str.size () == 0) - return Xapian::Query(Xapian::Query::OP_AND_NOT, + if (str.empty ()) { + if (options & NOTMUCH_FIELD_PROBABILISTIC) { + return Xapian::Query(Xapian::Query::OP_AND_NOT, Xapian::Query::MatchAll, Xapian::Query (Xapian::Query::OP_WILDCARD, term_prefix)); + } else { + return Xapian::Query (term_prefix); + } + } if (str.at (0) == '/') { - if (str.at (str.size () - 1) == '/'){ - RegexpPostingSource *postings = new RegexpPostingSource (slot, str.substr(1,str.size () - 2)); - return Xapian::Query (postings->release ()); + if (str.length() > 1 && str.at (str.size () - 1) == '/'){ + std::string regexp_str = str.substr(1,str.size () - 2); + if (slot != Xapian::BAD_VALUENO) { + RegexpPostingSource *postings = new RegexpPostingSource (slot, regexp_str); + return Xapian::Query (postings->release ()); + } else { + std::vector terms; + regex_t regexp; + + compile_regex(regexp, regexp_str.c_str ()); + for (Xapian::TermIterator it = notmuch->xapian_db->allterms_begin (term_prefix); + it != notmuch->xapian_db->allterms_end (); ++it) { + if (regexec (®exp, (*it).c_str (), 0, NULL, 0) == 0) + terms.push_back(*it); + } + return Xapian::Query (Xapian::Query::OP_OR, terms.begin(), terms.end()); + } } else { throw Xapian::QueryParserError ("unmatched regex delimiter in '" + str + "'"); } -- 2.11.0