From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by arlo.cworth.org (Postfix) with ESMTP id 2AE6E6DE024A for ; Thu, 19 Jan 2017 06:27:47 -0800 (PST) X-Virus-Scanned: Debian amavisd-new at cworth.org X-Spam-Flag: NO X-Spam-Score: -0.005 X-Spam-Level: X-Spam-Status: No, score=-0.005 tagged_above=-999 required=5 tests=[AWL=0.006, SPF_PASS=-0.001, T_RP_MATCHES_RCVD=-0.01] autolearn=disabled Received: from arlo.cworth.org ([127.0.0.1]) by localhost (arlo.cworth.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id 6PO-g50hZy18 for ; Thu, 19 Jan 2017 06:27:46 -0800 (PST) Received: from fethera.tethera.net (fethera.tethera.net [198.245.60.197]) by arlo.cworth.org (Postfix) with ESMTPS id 1A9C26DE00D3 for ; Thu, 19 Jan 2017 06:27:46 -0800 (PST) Received: from remotemail by fethera.tethera.net with local (Exim 4.84_2) (envelope-from ) id 1cUDgJ-0000of-W3; Thu, 19 Jan 2017 09:27:16 -0500 Received: (nullmailer pid 11631 invoked by uid 1000); Thu, 19 Jan 2017 14:27:43 -0000 From: David Bremner To: notmuch@notmuchmail.org Subject: Re: [Patch v2] lib: regexp matching in 'subject' and 'from' In-Reply-To: <20161114214651.19770-1-david@tethera.net> References: <1467034387-16885-1-git-send-email-david@tethera.net> <20161114214651.19770-1-david@tethera.net> Date: Thu, 19 Jan 2017 10:27:43 -0400 Message-ID: <87o9z32kgw.fsf@rocinante.cs.unb.ca> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: quoted-printable X-BeenThere: notmuch@notmuchmail.org X-Mailman-Version: 2.1.22 Precedence: list List-Id: "Use and development of the notmuch mail system." List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 19 Jan 2017 14:27:47 -0000 David Bremner writes: > the idea is that you can run > > % notmuch search re:subject: > % notmuch search re:from:' > > or > > % notmuch search subject:"your usual phrase search" > % notmuch search from:"usual phrase search" I'm not sure how useful it is, but here's an interdiff. diff --git a/lib/database-private.h b/lib/database-private.h index e7cbed8f..92f4b72f 100644 --- a/lib/database-private.h +++ b/lib/database-private.h @@ -190,7 +190,8 @@ struct _notmuch_database { #if HAVE_XAPIAN_FIELD_PROCESSOR Xapian::FieldProcessor *date_field_processor; Xapian::FieldProcessor *query_field_processor; - Xapian::FieldProcessor *re_field_processor; + Xapian::FieldProcessor *re_from_field_processor; + Xapian::FieldProcessor *re_subject_field_processor; #endif Xapian::ValueRangeProcessor *last_mod_range_processor; }; diff --git a/lib/database.cc b/lib/database.cc index 851a62d1..2b2f8f5e 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -1043,8 +1043,10 @@ notmuch_database_open_verbose (const char *path, notmuch->query_parser->add_boolean_prefix("date", notmuch->date_field_pro= cessor); notmuch->query_field_processor =3D new QueryFieldProcessor (*notmuch->que= ry_parser, notmuch); notmuch->query_parser->add_boolean_prefix("query", notmuch->query_field_p= rocessor); - notmuch->re_field_processor =3D new RegexpFieldProcessor (*notmuch->query= _parser, notmuch); - notmuch->query_parser->add_boolean_prefix("re", notmuch->re_field_process= or); + notmuch->re_from_field_processor =3D new RegexpFieldProcessor ("from", *n= otmuch->query_parser, notmuch); + notmuch->re_subject_field_processor =3D new RegexpFieldProcessor ("subjec= t", *notmuch->query_parser, notmuch); + notmuch->query_parser->add_boolean_prefix("re_from", notmuch->re_from_fie= ld_processor); + notmuch->query_parser->add_boolean_prefix("re_subject", notmuch->re_subje= ct_field_processor); #endif notmuch->last_mod_range_processor =3D new Xapian::NumberValueRangeProcess= or (NOTMUCH_VALUE_LAST_MOD, "lastmod:"); =20 @@ -1141,8 +1143,10 @@ notmuch_database_close (notmuch_database_t *notmuch) notmuch->date_field_processor =3D NULL; delete notmuch->query_field_processor; notmuch->query_field_processor =3D NULL; - delete notmuch->re_field_processor; - notmuch->re_field_processor =3D NULL; + delete notmuch->re_from_field_processor; + notmuch->re_from_field_processor =3D NULL; + delete notmuch->re_subject_field_processor; + notmuch->re_subject_field_processor =3D NULL; #endif =20 return status; diff --git a/lib/regexp-fields.cc b/lib/regexp-fields.cc index 4d3d9721..211ec02d 100644 --- a/lib/regexp-fields.cc +++ b/lib/regexp-fields.cc @@ -101,25 +101,10 @@ RegexpPostingSource::next (unused (double min_wt)) } } =20 -static Xapian::valueno -_find_slot (std::string prefix) -{ - if (prefix =3D=3D "from") - return NOTMUCH_VALUE_FROM; - else if (prefix =3D=3D "subject") - return NOTMUCH_VALUE_SUBJECT; - else - throw Xapian::QueryParserError ("unsupported regexp field '" + prefix + "= '"); -} - Xapian::Query RegexpFieldProcessor::operator() (const std::string & str) { - size_t pos =3D str.find_first_of (':'); - std::string prefix =3D str.substr (0, pos); - std::string regexp =3D str.substr (pos + 1); - - postings =3D new RegexpPostingSource (_find_slot (prefix), regexp); + postings =3D new RegexpPostingSource (slot, str); return Xapian::Query (postings); } #endif diff --git a/lib/regexp-fields.h b/lib/regexp-fields.h index 2c9c2d7e..c2c44167 100644 --- a/lib/regexp-fields.h +++ b/lib/regexp-fields.h @@ -61,13 +61,26 @@ class RegexpPostingSource : public Xapian::PostingSource =20 class RegexpFieldProcessor : public Xapian::FieldProcessor { protected: + Xapian::valueno slot; Xapian::QueryParser &parser; notmuch_database_t *notmuch; RegexpPostingSource *postings =3D NULL; =20 + + static inline Xapian::valueno _find_slot (std::string prefix) + { + if (prefix =3D=3D "from") + return NOTMUCH_VALUE_FROM; + else if (prefix =3D=3D "subject") + return NOTMUCH_VALUE_SUBJECT; + else + throw Xapian::QueryParserError ("unsupported regexp field '" + prefix= + "'"); + } + + public: - RegexpFieldProcessor (Xapian::QueryParser &parser_, notmuch_database_t= *notmuch_) - : parser(parser_), notmuch(notmuch_) { }; + RegexpFieldProcessor (std::string prefix, Xapian::QueryParser &parser_= , notmuch_database_t *notmuch_) + : slot(_find_slot (prefix)), parser(parser_), notmuch(notmuch_) { }; =20 ~RegexpFieldProcessor () { delete postings; }; =20 diff --git a/test/T630-regexp-query.sh b/test/T630-regexp-query.sh index 3bbe47cf..1b25634d 100755 --- a/test/T630-regexp-query.sh +++ b/test/T630-regexp-query.sh @@ -10,15 +10,15 @@ if [ $NOTMUCH_HAVE_XAPIAN_FIELD_PROCESSOR -eq 1 ]; then notmuch search --output=3Dmessages from:cworth > cworth.msg-ids =20 test_begin_subtest "regexp from search, case sensitive" - notmuch search --output=3Dmessages re:from:carl > OUTPUT + notmuch search --output=3Dmessages re_from:carl > OUTPUT test_expect_equal_file /dev/null OUTPUT =20 test_begin_subtest "empty regexp or query" - notmuch search --output=3Dmessages re:from:carl or from:cworth > OUTPUT + notmuch search --output=3Dmessages re_from:carl or from:cworth > OUTPUT test_expect_equal_file cworth.msg-ids OUTPUT =20 test_begin_subtest "non-empty regexp and query" - notmuch search re:from:cworth and subject:patch > OUTPUT + notmuch search re_from:cworth and subject:patch > OUTPUT cat < EXPECTED thread:0000000000000008 2009-11-18 [1/2] Carl Worth| Alex Botero-Lowry; = [notmuch] [PATCH] Error out if no query is supplied to search instead of go= ing into an infinite loop (attachment inbox unread) thread:0000000000000007 2009-11-18 [1/2] Carl Worth| Ingmar Vanhassel; [= notmuch] [PATCH] Typsos (inbox unread) @@ -30,32 +30,32 @@ EOF test_expect_equal_file EXPECTED OUTPUT =20 test_begin_subtest "regexp from search, duplicate term search" - notmuch search --output=3Dmessages re:from:cworth > OUTPUT + notmuch search --output=3Dmessages re_from:cworth > OUTPUT test_expect_equal_file cworth.msg-ids OUTPUT =20 test_begin_subtest "long enough regexp matches only desired senders" - notmuch search --output=3Dmessages 're:"from:C.* Wo"' > OUTPUT + notmuch search --output=3Dmessages 're_from:"C.* Wo"' > OUTPUT test_expect_equal_file cworth.msg-ids OUTPUT =20 test_begin_subtest "shorter regexp matches one more sender" - notmuch search --output=3Dmessages 're:"from:C.* W"' > OUTPUT + notmuch search --output=3Dmessages 're_from:"C.* W"' > OUTPUT (echo id:1258544095-16616-1-git-send-email-chris@chris-wilson.co.uk ; = cat cworth.msg-ids) > EXPECTED test_expect_equal_file EXPECTED OUTPUT =20 test_begin_subtest "regexp subject search, non-ASCII" - notmuch search --output=3Dmessages re:subject:accentu=C3=A9 > OUTPUT + notmuch search --output=3Dmessages re_subject:accentu=C3=A9 > OUTPUT echo id:877h1wv7mg.fsf@inf-8657.int-evry.fr > EXPECTED test_expect_equal_file EXPECTED OUTPUT =20 test_begin_subtest "regexp subject search, punctuation" - notmuch search re:subject:\'X\' > OUTPUT + notmuch search re_subject:\'X\' > OUTPUT cat < EXPECTED thread:0000000000000017 2009-11-18 [2/2] Keith Packard, Carl Worth; [not= much] [PATCH] Make notmuch-show 'X' (and 'x') commands remove inbox (and un= read) tags (inbox unread) EOF test_expect_equal_file EXPECTED OUTPUT =20 test_begin_subtest "regexp subject search, no punctuation" - notmuch search re:subject:X > OUTPUT + notmuch search re_subject:X > OUTPUT cat < EXPECTED thread:0000000000000017 2009-11-18 [2/2] Keith Packard, Carl Worth; [not= much] [PATCH] Make notmuch-show 'X' (and 'x') commands remove inbox (and un= read) tags (inbox unread) thread:000000000000000f 2009-11-18 [4/4] Jjgod Jiang, Alexander Botero-L= owry; [notmuch] Mac OS X/Darwin compatibility issues (inbox unread) @@ -63,27 +63,18 @@ EOF test_expect_equal_file EXPECTED OUTPUT =20 test_begin_subtest "combine regexp from and subject" - notmuch search re:subject:-C and re:from:.an.k > OUTPUT + notmuch search re_subject:-C and re_from:.an.k > OUTPUT cat < EXPECTED thread:0000000000000018 2009-11-17 [1/2] Jan Janak| Carl Worth; [notmuch= ] [PATCH] Older versions of install do not support -C. (inbox unread) EOF test_expect_equal_file EXPECTED OUTPUT =20 - test_begin_subtest "bad subprefix" - notmuch search 're:unsupported:.*' 1>OUTPUT 2>&1 - cat < EXPECTED -notmuch search: A Xapian exception occurred -A Xapian exception occurred performing query: unsupported regexp field 'un= supported' -Query string was: re:unsupported:.* -EOF - test_expect_equal_file EXPECTED OUTPUT - test_begin_subtest "regexp error reporting" - notmuch search 're:from:unbalanced[' 1>OUTPUT 2>&1 + notmuch search 're_from:unbalanced[' 1>OUTPUT 2>&1 cat < EXPECTED notmuch search: A Xapian exception occurred A Xapian exception occurred performing query: Invalid regular expression -Query string was: re:from:unbalanced[ +Query string was: re_from:unbalanced[ EOF test_expect_equal_file EXPECTED OUTPUT fi