unofficial mirror of notmuch@notmuchmail.org
 help / color / mirror / code / Atom feed
* v6 of regexp searching
@ 2017-02-27  2:34 David Bremner
  2017-02-27  2:34 ` [PATCH 1/4] lib: create field processors from prefix table David Bremner
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: David Bremner @ 2017-02-27  2:34 UTC (permalink / raw)
  To: notmuch

This obsoletes the unmerged patches from

     id:20170217030754.32069-1-david@tethera.net

The first two I plan to merge for 0.24 (barring corrections or objections)

[PATCH 1/4] lib: create field processors from prefix table
[PATCH 2/4] lib: regexp matching in 'subject' and 'from'

The second two could go in for 0.24, or wait.

[PATCH 3/4] lib: add mid: as a synonym for id:
[PATCH 4/4] lib: Add regexp searching for mid: prefix

The big change is a fix for the problem Jani noticed in

    id:87innwhhid.fsf@nikula.org

in the case where field processors are not present.

Interdiff follows.

diff --git a/lib/database-private.h b/lib/database-private.h
index 9fd4102c..ab3d9691 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -154,7 +154,7 @@ typedef enum notmuch_field_flags {
     NOTMUCH_FIELD_NO_FLAGS = 0,
     NOTMUCH_FIELD_EXTERNAL = 1 << 0,
     NOTMUCH_FIELD_PROBABILISTIC = 1 << 1,
-    NOTMUCH_FIELD_PROCESSOR = 1 << 2
+    NOTMUCH_FIELD_PROCESSOR = 1 << 2,
 } notmuch_field_flag_t;
 
 /*
diff --git a/lib/database.cc b/lib/database.cc
index 6e5ea106..09337602 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -293,19 +293,42 @@ prefix_t prefix_table[] = {
 						NOTMUCH_FIELD_PROCESSOR},
 };
 
+static void
+_setup_query_field_default (const prefix_t *prefix, notmuch_database_t *notmuch)
+{
+    if (prefix->flags & NOTMUCH_FIELD_PROBABILISTIC)
+	notmuch->query_parser->add_prefix (prefix->name, prefix->prefix);
+    else
+	notmuch->query_parser->add_boolean_prefix (prefix->name, prefix->prefix);
+}
+
 #if HAVE_XAPIAN_FIELD_PROCESSOR
-static Xapian::FieldProcessor *
-_make_field_processor (const char *name, notmuch_field_flag_t options,
-		       notmuch_database_t *notmuch) {
-    if (STRNCMP_LITERAL (name, "date") == 0)
-	return (new DateFieldProcessor())->release ();
-    else if (STRNCMP_LITERAL(name, "query") == 0)
-	return (new QueryFieldProcessor (*notmuch->query_parser, notmuch))->release ();
+static void
+_setup_query_field (const prefix_t *prefix, notmuch_database_t *notmuch)
+{
+    if (prefix->flags & NOTMUCH_FIELD_PROCESSOR) {
+	Xapian::FieldProcessor *fp;
+
+	if (STRNCMP_LITERAL (prefix->name, "date") == 0)
+	    fp = (new DateFieldProcessor())->release ();
+	else if (STRNCMP_LITERAL(prefix->name, "query") == 0)
+	    fp = (new QueryFieldProcessor (*notmuch->query_parser, notmuch))->release ();
 	else
-	return (new RegexpFieldProcessor (name, options, *notmuch->query_parser, notmuch))->release ();
+	    fp = (new RegexpFieldProcessor (prefix->name, prefix->flags,
+					    *notmuch->query_parser, notmuch))->release ();
+
+	/* we treat all field-processor fields as boolean in order to get the raw input */
+	notmuch->query_parser->add_boolean_prefix (prefix->name, fp);
+    } else {
+	_setup_query_field_default (prefix, notmuch);
+    }
 }
 #else
-#define _make_field_processor(name, options, db) NULL
+static inline void
+_setup_query_field (const prefix_t *prefix, notmuch_database_t *notmuch)
+{
+    _setup_query_field_default (prefix, notmuch);
+}
 #endif
 
 const char *
@@ -1067,22 +1090,7 @@ notmuch_database_open_verbose (const char *path,
 	for (i = 0; i < ARRAY_SIZE (prefix_table); i++) {
 	    const prefix_t *prefix = &prefix_table[i];
 	    if (prefix->flags & NOTMUCH_FIELD_EXTERNAL) {
-		/* we treat all field-processor fields as boolean in order
-		   to get the raw input */
-		if (HAVE_XAPIAN_FIELD_PROCESSOR &&
-		    (prefix->flags & NOTMUCH_FIELD_PROCESSOR)) {
-		    Xapian::FieldProcessor *fp = _make_field_processor (prefix->name,
-									prefix->flags,
-									notmuch);
-
-		    notmuch->query_parser->add_boolean_prefix (prefix->name, fp);
-		} else if (prefix->flags & NOTMUCH_FIELD_PROBABILISTIC) {
-			notmuch->query_parser->add_prefix (prefix->name,
-							   prefix->prefix);
-		} else {
-		    notmuch->query_parser->add_boolean_prefix (prefix->name,
-							       prefix->prefix);
-		}
+		_setup_query_field (prefix, notmuch);
 	    }
 	}
     } catch (const Xapian::Error &error) {
diff --git a/lib/regexp-fields.h b/lib/regexp-fields.h
index 8a0e72e1..72d12b37 100644
--- a/lib/regexp-fields.h
+++ b/lib/regexp-fields.h
@@ -63,7 +63,7 @@ class RegexpFieldProcessor : public Xapian::FieldProcessor {
  protected:
     Xapian::valueno slot;
     std::string term_prefix;
-    int options;
+    notmuch_field_flag_t options;
     Xapian::QueryParser &parser;
     notmuch_database_t *notmuch;
 
diff --git a/test/T630-regexp-query.sh b/test/T650-regexp-query.sh
similarity index 37%
rename from test/T630-regexp-query.sh
rename to test/T650-regexp-query.sh
index 96bd8746..f0868a15 100755
--- a/test/T630-regexp-query.sh
+++ b/test/T650-regexp-query.sh
@@ -5,7 +5,9 @@ test_description='regular expression searches'
 add_email_corpus
 
 
-if [ $NOTMUCH_HAVE_XAPIAN_FIELD_PROCESSOR -eq 1 ]; then
+if [ $NOTMUCH_HAVE_XAPIAN_FIELD_PROCESSOR -eq 0 ]; then
+    test_done
+fi
 
 notmuch search --output=messages from:cworth > cworth.msg-ids
 
@@ -76,6 +78,21 @@ A Xapian exception occurred performing query: Invalid regular expression
 Query string was: from:/unbalanced[/
 EOF
 test_expect_equal_file EXPECTED OUTPUT
-fi
+
+test_begin_subtest "empty mid search"
+notmuch search --output=messages mid:yoom > OUTPUT
+cp /dev/null EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "non-empty mid regex search"
+notmuch search --output=messages mid:/yoom/ > OUTPUT
+test_expect_equal_file cworth.msg-ids OUTPUT
+
+test_begin_subtest "combine regexp mid and subject"
+notmuch search  subject:/-C/ and mid:/y..m/ | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX   2009-11-18 [1/2] Carl Worth| Jan Janak; [notmuch] [PATCH] Older versions of install do not support -C. (inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
 
 test_done

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 1/4] lib: create field processors from prefix table
  2017-02-27  2:34 v6 of regexp searching David Bremner
@ 2017-02-27  2:34 ` David Bremner
  2017-02-27  2:34 ` [PATCH 2/4] lib: regexp matching in 'subject' and 'from' David Bremner
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 7+ messages in thread
From: David Bremner @ 2017-02-27  2:34 UTC (permalink / raw)
  To: notmuch

This is a bit more code than hardcoding the two existing field
processors, but it should make it easy to add more.
---
 lib/database-private.h |  3 ++-
 lib/database.cc        | 62 +++++++++++++++++++++++++++++++++++---------------
 2 files changed, 46 insertions(+), 19 deletions(-)

diff --git a/lib/database-private.h b/lib/database-private.h
index 06882439..ab3d9691 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -153,7 +153,8 @@ operator&=(_notmuch_features &a, _notmuch_features b)
 typedef enum notmuch_field_flags {
     NOTMUCH_FIELD_NO_FLAGS = 0,
     NOTMUCH_FIELD_EXTERNAL = 1 << 0,
-    NOTMUCH_FIELD_PROBABILISTIC = 1 << 1
+    NOTMUCH_FIELD_PROBABILISTIC = 1 << 1,
+    NOTMUCH_FIELD_PROCESSOR = 1 << 2,
 } notmuch_field_flag_t;
 
 /*
diff --git a/lib/database.cc b/lib/database.cc
index ba440d4d..fa4c3116 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -270,6 +270,12 @@ prefix_t prefix_table[] = {
      * discussion.
      */
     { "folder",			"XFOLDER:",	NOTMUCH_FIELD_EXTERNAL },
+#if HAVE_XAPIAN_FIELD_PROCESSOR
+    { "date",			NULL,		NOTMUCH_FIELD_EXTERNAL |
+						NOTMUCH_FIELD_PROCESSOR },
+    { "query",			NULL,		NOTMUCH_FIELD_EXTERNAL |
+						NOTMUCH_FIELD_PROCESSOR },
+#endif
     { "from",			"XFROM",	NOTMUCH_FIELD_EXTERNAL |
 						NOTMUCH_FIELD_PROBABILISTIC },
     { "to",			"XTO",		NOTMUCH_FIELD_EXTERNAL |
@@ -282,6 +288,43 @@ prefix_t prefix_table[] = {
 						NOTMUCH_FIELD_PROBABILISTIC },
 };
 
+static void
+_setup_query_field_default (const prefix_t *prefix, notmuch_database_t *notmuch)
+{
+    if (prefix->flags & NOTMUCH_FIELD_PROBABILISTIC)
+	notmuch->query_parser->add_prefix (prefix->name, prefix->prefix);
+    else
+	notmuch->query_parser->add_boolean_prefix (prefix->name, prefix->prefix);
+}
+
+#if HAVE_XAPIAN_FIELD_PROCESSOR
+static void
+_setup_query_field (const prefix_t *prefix, notmuch_database_t *notmuch)
+{
+    if (prefix->flags & NOTMUCH_FIELD_PROCESSOR) {
+	Xapian::FieldProcessor *fp;
+
+	if (STRNCMP_LITERAL (prefix->name, "date") == 0)
+	    fp = (new DateFieldProcessor())->release ();
+	else if (STRNCMP_LITERAL(prefix->name, "query") == 0)
+	    fp = (new QueryFieldProcessor (*notmuch->query_parser, notmuch))->release ();
+	else
+	    INTERNAL_ERROR("unsupported field processor prefix: %s\n", prefix->name);
+
+	/* we treat all field-processor fields as boolean in order to get the raw input */
+	notmuch->query_parser->add_boolean_prefix (prefix->name, fp);
+    } else {
+	_setup_query_field_default (prefix, notmuch);
+    }
+}
+#else
+static inline void
+_setup_query_field (const prefix_t *prefix, notmuch_database_t *notmuch)
+{
+    _setup_query_field_default (prefix, notmuch);
+}
+#endif
+
 const char *
 _find_prefix (const char *name)
 {
@@ -1028,18 +1071,6 @@ notmuch_database_open_verbose (const char *path,
 	notmuch->term_gen->set_stemmer (Xapian::Stem ("english"));
 	notmuch->value_range_processor = new Xapian::NumberValueRangeProcessor (NOTMUCH_VALUE_TIMESTAMP);
 	notmuch->date_range_processor = new ParseTimeValueRangeProcessor (NOTMUCH_VALUE_TIMESTAMP);
-#if HAVE_XAPIAN_FIELD_PROCESSOR
-	/* This currently relies on the query parser to pass anything
-	 * with a .. to the range processor */
-	{
-	    Xapian::FieldProcessor * date_fp = new DateFieldProcessor();
-	    Xapian::FieldProcessor * query_fp =
-		new QueryFieldProcessor (*notmuch->query_parser, notmuch);
-
-	    notmuch->query_parser->add_boolean_prefix("date", date_fp->release ());
-	    notmuch->query_parser->add_boolean_prefix("query", query_fp->release ());
-	}
-#endif
 	notmuch->last_mod_range_processor = new Xapian::NumberValueRangeProcessor (NOTMUCH_VALUE_LAST_MOD, "lastmod:");
 
 	notmuch->query_parser->set_default_op (Xapian::Query::OP_AND);
@@ -1053,12 +1084,7 @@ notmuch_database_open_verbose (const char *path,
 	for (i = 0; i < ARRAY_SIZE (prefix_table); i++) {
 	    const prefix_t *prefix = &prefix_table[i];
 	    if (prefix->flags & NOTMUCH_FIELD_EXTERNAL) {
-		if (prefix->flags & NOTMUCH_FIELD_PROBABILISTIC) {
-		    notmuch->query_parser->add_prefix (prefix->name, prefix->prefix);
-		} else {
-		    notmuch->query_parser->add_boolean_prefix (prefix->name,
-							       prefix->prefix);
-		}
+		_setup_query_field (prefix, notmuch);
 	    }
 	}
     } catch (const Xapian::Error &error) {
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/4] lib: regexp matching in 'subject' and 'from'
  2017-02-27  2:34 v6 of regexp searching David Bremner
  2017-02-27  2:34 ` [PATCH 1/4] lib: create field processors from prefix table David Bremner
@ 2017-02-27  2:34 ` David Bremner
  2017-03-03 13:23   ` [PATCH] fixup! " David Bremner
  2017-02-27  2:34 ` [PATCH 3/4] lib: add mid: as a synonym for id: David Bremner
  2017-02-27  2:34 ` [PATCH 4/4] lib: Add regexp searching for mid: prefix David Bremner
  3 siblings, 1 reply; 7+ messages in thread
From: David Bremner @ 2017-02-27  2:34 UTC (permalink / raw)
  To: notmuch

the idea is that you can run

% notmuch search subject:/<your-favourite-regexp>/
% notmuch search from:/<your-favourite-regexp>/

or

% notmuch search subject:"your usual phrase search"
% notmuch search from:"usual phrase search"

This feature is only available with recent Xapian, specifically
support for field processors is needed.

It should work with bindings, since it extends the query parser.

This is easy to extend for other value slots, but currently the only
value slots are date, message_id, from, subject, and last_mod. Date is
already searchable;  message_id is left for a followup commit.

This was originally written by Austin Clements, and ported to Xapian
field processors (from Austin's custom query parser) by yours truly.
---
 doc/man7/notmuch-search-terms.rst |  25 ++++++-
 lib/Makefile.local                |   1 +
 lib/database.cc                   |   9 ++-
 lib/regexp-fields.cc              | 144 ++++++++++++++++++++++++++++++++++++++
 lib/regexp-fields.h               |  77 ++++++++++++++++++++
 test/T650-regexp-query.sh         |  82 ++++++++++++++++++++++
 6 files changed, 332 insertions(+), 6 deletions(-)
 create mode 100644 lib/regexp-fields.cc
 create mode 100644 lib/regexp-fields.h
 create mode 100755 test/T650-regexp-query.sh

diff --git a/doc/man7/notmuch-search-terms.rst b/doc/man7/notmuch-search-terms.rst
index de93d733..47cab48d 100644
--- a/doc/man7/notmuch-search-terms.rst
+++ b/doc/man7/notmuch-search-terms.rst
@@ -34,10 +34,14 @@ indicate user-supplied values):
 
 -  from:<name-or-address>
 
+-  from:/<regex>/
+
 -  to:<name-or-address>
 
 -  subject:<word-or-quoted-phrase>
 
+-  subject:/<regex>/
+
 -  attachment:<word>
 
 -  mimetype:<word>
@@ -71,6 +75,15 @@ subject of an email. Searching for a phrase in the subject is supported
 by including quotation marks around the phrase, immediately following
 **subject:**.
 
+If notmuch is built with **Xapian Field Processors** (see below) the
+**from:** and **subject** prefix can be also used to restrict the
+results to those whose from/subject value matches a regular expression
+(see **regex(7)**) delimited with //.
+
+::
+
+   notmuch search 'from:/bob@.*[.]example[.]com/'
+
 The **attachment:** prefix can be used to search for specific filenames
 (or extensions) of attachments to email messages.
 
@@ -220,13 +233,18 @@ Boolean and Probabilistic Prefixes
 ----------------------------------
 
 Xapian (and hence notmuch) prefixes are either **boolean**, supporting
-exact matches like "tag:inbox"  or **probabilistic**, supporting a more flexible **term** based searching. The prefixes currently supported by notmuch are as follows.
-
+exact matches like "tag:inbox" or **probabilistic**, supporting a more
+flexible **term** based searching. Certain **special** prefixes are
+processed by notmuch in a way not stricly fitting either of Xapian's
+built in styles. The prefixes currently supported by notmuch are as
+follows.
 
 Boolean
    **tag:**, **id:**, **thread:**, **folder:**, **path:**, **property:**
 Probabilistic
-   **from:**, **to:**, **subject:**, **attachment:**, **mimetype:**
+  **to:**, **attachment:**, **mimetype:**
+Special
+   **from:**, **query:**, **subject:**
 
 Terms and phrases
 -----------------
@@ -396,6 +414,7 @@ Currently the following features require field processor support:
 
 - non-range date queries, e.g. "date:today"
 - named queries e.g. "query:my_special_query"
+- regular expression searches, e.g. "subject:/^\\[SPAM\\]/"
 
 SEE ALSO
 ========
diff --git a/lib/Makefile.local b/lib/Makefile.local
index b77e5780..cd92fc79 100644
--- a/lib/Makefile.local
+++ b/lib/Makefile.local
@@ -52,6 +52,7 @@ libnotmuch_cxx_srcs =		\
 	$(dir)/query.cc		\
 	$(dir)/query-fp.cc      \
 	$(dir)/config.cc	\
+	$(dir)/regexp-fields.cc	\
 	$(dir)/thread.cc
 
 libnotmuch_modules := $(libnotmuch_c_srcs:.c=.o) $(libnotmuch_cxx_srcs:.cc=.o)
diff --git a/lib/database.cc b/lib/database.cc
index fa4c3116..573c9fe0 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -21,6 +21,7 @@
 #include "database-private.h"
 #include "parse-time-vrp.h"
 #include "query-fp.h"
+#include "regexp-fields.h"
 #include "string-util.h"
 
 #include <iostream>
@@ -277,7 +278,8 @@ prefix_t prefix_table[] = {
 						NOTMUCH_FIELD_PROCESSOR },
 #endif
     { "from",			"XFROM",	NOTMUCH_FIELD_EXTERNAL |
-						NOTMUCH_FIELD_PROBABILISTIC },
+						NOTMUCH_FIELD_PROBABILISTIC |
+						NOTMUCH_FIELD_PROCESSOR },
     { "to",			"XTO",		NOTMUCH_FIELD_EXTERNAL |
 						NOTMUCH_FIELD_PROBABILISTIC },
     { "attachment",		"XATTACHMENT",	NOTMUCH_FIELD_EXTERNAL |
@@ -285,7 +287,8 @@ prefix_t prefix_table[] = {
     { "mimetype",		"XMIMETYPE",	NOTMUCH_FIELD_EXTERNAL |
 						NOTMUCH_FIELD_PROBABILISTIC },
     { "subject",		"XSUBJECT",	NOTMUCH_FIELD_EXTERNAL |
-						NOTMUCH_FIELD_PROBABILISTIC },
+						NOTMUCH_FIELD_PROBABILISTIC |
+						NOTMUCH_FIELD_PROCESSOR},
 };
 
 static void
@@ -309,7 +312,7 @@ _setup_query_field (const prefix_t *prefix, notmuch_database_t *notmuch)
 	else if (STRNCMP_LITERAL(prefix->name, "query") == 0)
 	    fp = (new QueryFieldProcessor (*notmuch->query_parser, notmuch))->release ();
 	else
-	    INTERNAL_ERROR("unsupported field processor prefix: %s\n", prefix->name);
+	    fp = (new RegexpFieldProcessor (prefix->name, *notmuch->query_parser, notmuch))->release ();
 
 	/* we treat all field-processor fields as boolean in order to get the raw input */
 	notmuch->query_parser->add_boolean_prefix (prefix->name, fp);
diff --git a/lib/regexp-fields.cc b/lib/regexp-fields.cc
new file mode 100644
index 00000000..b2b39504
--- /dev/null
+++ b/lib/regexp-fields.cc
@@ -0,0 +1,144 @@
+/* regexp-fields.cc - field processor glue for regex supporting fields
+ *
+ * This file is part of notmuch.
+ *
+ * Copyright © 2015 Austin Clements
+ * Copyright © 2016 David Bremner
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see https://www.gnu.org/licenses/ .
+ *
+ * Author: Austin Clements <aclements@csail.mit.edu>
+ *                David Bremner <david@tethera.net>
+ */
+
+#include "regexp-fields.h"
+#include "notmuch-private.h"
+#include "database-private.h"
+
+#if HAVE_XAPIAN_FIELD_PROCESSOR
+static void
+compile_regex (regex_t &regexp, const char *str)
+{
+    int err = regcomp (&regexp, str, REG_EXTENDED | REG_NOSUB);
+
+    if (err != 0) {
+	size_t len = regerror (err, &regexp, NULL, 0);
+	char *buffer = new char[len];
+	std::string msg;
+	(void) regerror (err, &regexp, buffer, len);
+	msg.assign (buffer, len);
+	delete buffer;
+
+	throw Xapian::QueryParserError (msg);
+    }
+}
+
+RegexpPostingSource::RegexpPostingSource (Xapian::valueno slot, const std::string &regexp)
+    : slot_ (slot)
+{
+    compile_regex (regexp_, regexp.c_str ());
+}
+
+RegexpPostingSource::~RegexpPostingSource ()
+{
+    regfree (&regexp_);
+}
+
+void
+RegexpPostingSource::init (const Xapian::Database &db)
+{
+    db_ = db;
+    it_ = db_.valuestream_begin (slot_);
+    end_ = db.valuestream_end (slot_);
+    started_ = false;
+}
+
+Xapian::doccount
+RegexpPostingSource::get_termfreq_min () const
+{
+    return 0;
+}
+
+Xapian::doccount
+RegexpPostingSource::get_termfreq_est () const
+{
+    return get_termfreq_max () / 2;
+}
+
+Xapian::doccount
+RegexpPostingSource::get_termfreq_max () const
+{
+    return db_.get_value_freq (slot_);
+}
+
+Xapian::docid
+RegexpPostingSource::get_docid () const
+{
+    return it_.get_docid ();
+}
+
+bool
+RegexpPostingSource::at_end () const
+{
+    return it_ == end_;
+}
+
+void
+RegexpPostingSource::next (unused (double min_wt))
+{
+    if (started_ && ! at_end ())
+	++it_;
+    started_ = true;
+
+    for (; ! at_end (); ++it_) {
+	std::string value = *it_;
+	if (regexec (&regexp_, value.c_str (), 0, NULL, 0) == 0)
+	    break;
+    }
+}
+
+static inline Xapian::valueno _find_slot (std::string prefix)
+{
+    if (prefix == "from")
+	return NOTMUCH_VALUE_FROM;
+    else if (prefix == "subject")
+	return NOTMUCH_VALUE_SUBJECT;
+    else
+	throw Xapian::QueryParserError ("unsupported regexp field '" + prefix + "'");
+}
+
+RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix, Xapian::QueryParser &parser_, notmuch_database_t *notmuch_)
+	: slot (_find_slot (prefix)), term_prefix (_find_prefix (prefix.c_str ())),
+	  parser (parser_), notmuch (notmuch_)
+{
+};
+
+Xapian::Query
+RegexpFieldProcessor::operator() (const std::string & str)
+{
+    if (str.at (0) == '/') {
+	if (str.at (str.size () - 1) == '/'){
+	    RegexpPostingSource *postings = new RegexpPostingSource (slot, str.substr(1,str.size () - 2));
+	    return Xapian::Query (postings->release ());
+	} else {
+	    throw Xapian::QueryParserError ("unmatch regex delimiter in '" + str + "'");
+	}
+    } else {
+	/* TODO replace this with a nicer API level triggering of
+	 * phrase parsing, when possible */
+	std::string quoted='"' + str + '"';
+	return parser.parse_query (quoted, NOTMUCH_QUERY_PARSER_FLAGS, term_prefix);
+    }
+}
+#endif
diff --git a/lib/regexp-fields.h b/lib/regexp-fields.h
new file mode 100644
index 00000000..bac11999
--- /dev/null
+++ b/lib/regexp-fields.h
@@ -0,0 +1,77 @@
+/* regex-fields.h - xapian glue for semi-bruteforce regexp search
+ *
+ * This file is part of notmuch.
+ *
+ * Copyright © 2015 Austin Clements
+ * Copyright © 2016 David Bremner
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see https://www.gnu.org/licenses/ .
+ *
+ * Author: Austin Clements <aclements@csail.mit.edu>
+ *                David Bremner <david@tethera.net>
+ */
+
+#ifndef NOTMUCH_REGEXP_FIELDS_H
+#define NOTMUCH_REGEXP_FIELDS_H
+#if HAVE_XAPIAN_FIELD_PROCESSOR
+#include <sys/types.h>
+#include <regex.h>
+#include "database-private.h"
+#include "notmuch-private.h"
+
+/* A posting source that returns documents where a value matches a
+ * regexp.
+ */
+class RegexpPostingSource : public Xapian::PostingSource
+{
+ protected:
+    const Xapian::valueno slot_;
+    regex_t regexp_;
+    Xapian::Database db_;
+    bool started_;
+    Xapian::ValueIterator it_, end_;
+
+/* No copying */
+    RegexpPostingSource (const RegexpPostingSource &);
+    RegexpPostingSource &operator= (const RegexpPostingSource &);
+
+ public:
+    RegexpPostingSource (Xapian::valueno slot, const std::string &regexp);
+    ~RegexpPostingSource ();
+    void init (const Xapian::Database &db);
+    Xapian::doccount get_termfreq_min () const;
+    Xapian::doccount get_termfreq_est () const;
+    Xapian::doccount get_termfreq_max () const;
+    Xapian::docid get_docid () const;
+    bool at_end () const;
+    void next (unused (double min_wt));
+};
+
+
+class RegexpFieldProcessor : public Xapian::FieldProcessor {
+ protected:
+    Xapian::valueno slot;
+    std::string term_prefix;
+    Xapian::QueryParser &parser;
+    notmuch_database_t *notmuch;
+
+ public:
+    RegexpFieldProcessor (std::string prefix, Xapian::QueryParser &parser_, notmuch_database_t *notmuch_);
+
+    ~RegexpFieldProcessor () { };
+
+    Xapian::Query operator()(const std::string & str);
+};
+#endif
+#endif /* NOTMUCH_REGEXP_FIELDS_H */
diff --git a/test/T650-regexp-query.sh b/test/T650-regexp-query.sh
new file mode 100755
index 00000000..a8039610
--- /dev/null
+++ b/test/T650-regexp-query.sh
@@ -0,0 +1,82 @@
+#!/usr/bin/env bash
+test_description='regular expression searches'
+. ./test-lib.sh || exit 1
+
+add_email_corpus
+
+
+if [ $NOTMUCH_HAVE_XAPIAN_FIELD_PROCESSOR -eq 0 ]; then
+    test_done
+fi
+
+notmuch search --output=messages from:cworth > cworth.msg-ids
+
+test_begin_subtest "regexp from search, case sensitive"
+notmuch search --output=messages from:/carl/ > OUTPUT
+test_expect_equal_file /dev/null OUTPUT
+
+test_begin_subtest "empty regexp or query"
+notmuch search --output=messages from:/carl/ or from:/cworth/ > OUTPUT
+test_expect_equal_file cworth.msg-ids OUTPUT
+
+test_begin_subtest "non-empty regexp and query"
+notmuch search  from:/cworth@cworth.org/ and subject:patch | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX   2009-11-18 [1/2] Carl Worth| Alex Botero-Lowry; [notmuch] [PATCH] Error out if no query is supplied to search instead of going into an infinite loop (attachment inbox unread)
+thread:XXX   2009-11-18 [1/2] Carl Worth| Ingmar Vanhassel; [notmuch] [PATCH] Typsos (inbox unread)
+thread:XXX   2009-11-18 [1/2] Carl Worth| Jan Janak; [notmuch] [PATCH] Older versions of install do not support -C. (inbox unread)
+thread:XXX   2009-11-18 [1/2] Carl Worth| Keith Packard; [notmuch] [PATCH] Make notmuch-show 'X' (and 'x') commands remove inbox (and unread) tags (inbox unread)
+thread:XXX   2009-11-18 [2/5] Carl Worth| Mikhail Gusarov, Keith Packard; [notmuch] [PATCH 1/2] Close message file after parsing message headers (inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "regexp from search, duplicate term search"
+notmuch search --output=messages from:/cworth/ > OUTPUT
+test_expect_equal_file cworth.msg-ids OUTPUT
+
+test_begin_subtest "long enough regexp matches only desired senders"
+notmuch search --output=messages 'from:"/C.* Wo/"' > OUTPUT
+test_expect_equal_file cworth.msg-ids OUTPUT
+
+test_begin_subtest "shorter regexp matches one more sender"
+notmuch search --output=messages 'from:"/C.* W/"' > OUTPUT
+{ echo id:1258544095-16616-1-git-send-email-chris@chris-wilson.co.uk; cat cworth.msg-ids; } > EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "regexp subject search, non-ASCII"
+notmuch search --output=messages subject:/accentué/ > OUTPUT
+echo id:877h1wv7mg.fsf@inf-8657.int-evry.fr > EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "regexp subject search, punctuation"
+notmuch search subject:/\'X\'/ | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX   2009-11-18 [2/2] Keith Packard, Carl Worth; [notmuch] [PATCH] Make notmuch-show 'X' (and 'x') commands remove inbox (and unread) tags (inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "regexp subject search, no punctuation"
+notmuch search  subject:/X/ | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX   2009-11-18 [2/2] Keith Packard, Carl Worth; [notmuch] [PATCH] Make notmuch-show 'X' (and 'x') commands remove inbox (and unread) tags (inbox unread)
+thread:XXX   2009-11-18 [4/4] Jjgod Jiang, Alexander Botero-Lowry; [notmuch] Mac OS X/Darwin compatibility issues (inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "combine regexp from and subject"
+notmuch search  subject:/-C/ and from:/.an.k/ | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX   2009-11-17 [1/2] Jan Janak| Carl Worth; [notmuch] [PATCH] Older versions of install do not support -C. (inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "regexp error reporting"
+notmuch search 'from:/unbalanced[/' 1>OUTPUT 2>&1
+cat <<EOF > EXPECTED
+notmuch search: A Xapian exception occurred
+A Xapian exception occurred performing query: Invalid regular expression
+Query string was: from:/unbalanced[/
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_done
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/4] lib: add mid: as a synonym for id:
  2017-02-27  2:34 v6 of regexp searching David Bremner
  2017-02-27  2:34 ` [PATCH 1/4] lib: create field processors from prefix table David Bremner
  2017-02-27  2:34 ` [PATCH 2/4] lib: regexp matching in 'subject' and 'from' David Bremner
@ 2017-02-27  2:34 ` David Bremner
  2017-03-03 21:54   ` David Bremner
  2017-02-27  2:34 ` [PATCH 4/4] lib: Add regexp searching for mid: prefix David Bremner
  3 siblings, 1 reply; 7+ messages in thread
From: David Bremner @ 2017-02-27  2:34 UTC (permalink / raw)
  To: notmuch

mid: is the url scheme suggested by URL 2392. We also plan to
introduce more flexible searches for mid: than are possible with
id: (in order not to break assumptions about the special behaviour of
id:, e.g. identifying at most one message).
---
 lib/database.cc     | 1 +
 test/T080-search.sh | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/lib/database.cc b/lib/database.cc
index 573c9fe0..b7fc53ee 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -262,6 +262,7 @@ prefix_t prefix_table[] = {
     { "tag",			"K",		NOTMUCH_FIELD_EXTERNAL },
     { "is",			"K",		NOTMUCH_FIELD_EXTERNAL },
     { "id",			"Q",		NOTMUCH_FIELD_EXTERNAL },
+    { "mid",			"Q",		NOTMUCH_FIELD_EXTERNAL },
     { "path",			"P",		NOTMUCH_FIELD_EXTERNAL },
     { "property",		"XPROPERTY",	NOTMUCH_FIELD_EXTERNAL },
     /*
diff --git a/test/T080-search.sh b/test/T080-search.sh
index 5e8b20ce..6149da93 100755
--- a/test/T080-search.sh
+++ b/test/T080-search.sh
@@ -34,6 +34,11 @@ add_message '[subject]="search by id"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"
 output=$(notmuch search id:${gen_msg_id} | notmuch_search_sanitize)
 test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; search by id (inbox unread)"
 
+test_begin_subtest "Search by mid:"
+add_message '[subject]="search by mid"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
+output=$(notmuch search mid:${gen_msg_id} | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; search by mid (inbox unread)"
+
 test_begin_subtest "Search by tag:"
 add_message '[subject]="search by tag"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
 notmuch tag +searchbytag id:${gen_msg_id}
@@ -127,6 +132,7 @@ thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; search by to (inbox unread)
 thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; subjectsearchtest (inbox unread)
 thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)
 thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; search by id (inbox unread)
+thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; search by mid (inbox unread)
 thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; search by tag (inbox searchbytag unread)
 thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; search by thread (inbox unread)
 thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; body search (phrase) (inbox unread)
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 4/4] lib: Add regexp searching for mid: prefix
  2017-02-27  2:34 v6 of regexp searching David Bremner
                   ` (2 preceding siblings ...)
  2017-02-27  2:34 ` [PATCH 3/4] lib: add mid: as a synonym for id: David Bremner
@ 2017-02-27  2:34 ` David Bremner
  3 siblings, 0 replies; 7+ messages in thread
From: David Bremner @ 2017-02-27  2:34 UTC (permalink / raw)
  To: notmuch

The bulk of the change is passing in the field options to the regexp
field processor, so that we can properly handle the
fallback (non-regexp case).
---
 lib/database.cc           |  6 ++++--
 lib/regexp-fields.cc      | 28 +++++++++++++++++++++-------
 lib/regexp-fields.h       |  4 +++-
 test/T650-regexp-query.sh | 16 ++++++++++++++++
 4 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/lib/database.cc b/lib/database.cc
index b7fc53ee..09337602 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -262,7 +262,8 @@ prefix_t prefix_table[] = {
     { "tag",			"K",		NOTMUCH_FIELD_EXTERNAL },
     { "is",			"K",		NOTMUCH_FIELD_EXTERNAL },
     { "id",			"Q",		NOTMUCH_FIELD_EXTERNAL },
-    { "mid",			"Q",		NOTMUCH_FIELD_EXTERNAL },
+    { "mid",			"Q",		NOTMUCH_FIELD_EXTERNAL |
+						NOTMUCH_FIELD_PROCESSOR },
     { "path",			"P",		NOTMUCH_FIELD_EXTERNAL },
     { "property",		"XPROPERTY",	NOTMUCH_FIELD_EXTERNAL },
     /*
@@ -313,7 +314,8 @@ _setup_query_field (const prefix_t *prefix, notmuch_database_t *notmuch)
 	else if (STRNCMP_LITERAL(prefix->name, "query") == 0)
 	    fp = (new QueryFieldProcessor (*notmuch->query_parser, notmuch))->release ();
 	else
-	    fp = (new RegexpFieldProcessor (prefix->name, *notmuch->query_parser, notmuch))->release ();
+	    fp = (new RegexpFieldProcessor (prefix->name, prefix->flags,
+					    *notmuch->query_parser, notmuch))->release ();
 
 	/* we treat all field-processor fields as boolean in order to get the raw input */
 	notmuch->query_parser->add_boolean_prefix (prefix->name, fp);
diff --git a/lib/regexp-fields.cc b/lib/regexp-fields.cc
index b2b39504..a32b965e 100644
--- a/lib/regexp-fields.cc
+++ b/lib/regexp-fields.cc
@@ -114,13 +114,21 @@ static inline Xapian::valueno _find_slot (std::string prefix)
 	return NOTMUCH_VALUE_FROM;
     else if (prefix == "subject")
 	return NOTMUCH_VALUE_SUBJECT;
+    else if (prefix == "mid")
+	return NOTMUCH_VALUE_MESSAGE_ID;
     else
 	throw Xapian::QueryParserError ("unsupported regexp field '" + prefix + "'");
 }
 
-RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix, Xapian::QueryParser &parser_, notmuch_database_t *notmuch_)
-	: slot (_find_slot (prefix)), term_prefix (_find_prefix (prefix.c_str ())),
-	  parser (parser_), notmuch (notmuch_)
+RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix,
+					    notmuch_field_flag_t options_,
+					    Xapian::QueryParser &parser_,
+					    notmuch_database_t *notmuch_)
+	: slot (_find_slot (prefix)),
+	  term_prefix (_find_prefix (prefix.c_str ())),
+	  options (options_),
+	  parser (parser_),
+	  notmuch (notmuch_)
 {
 };
 
@@ -135,10 +143,16 @@ RegexpFieldProcessor::operator() (const std::string & str)
 	    throw Xapian::QueryParserError ("unmatch regex delimiter in '" + str + "'");
 	}
     } else {
-	/* TODO replace this with a nicer API level triggering of
-	 * phrase parsing, when possible */
-	std::string quoted='"' + str + '"';
-	return parser.parse_query (quoted, NOTMUCH_QUERY_PARSER_FLAGS, term_prefix);
+	if (options & NOTMUCH_FIELD_PROBABILISTIC) {
+	    /* TODO replace this with a nicer API level triggering of
+	     * phrase parsing, when possible */
+	    std::string quoted='"' + str + '"';
+	    return parser.parse_query (quoted, NOTMUCH_QUERY_PARSER_FLAGS, term_prefix);
+	} else {
+	    /* Boolean prefix */
+	    std::string term = term_prefix + str;
+	    return Xapian::Query (term);
+	}
     }
 }
 #endif
diff --git a/lib/regexp-fields.h b/lib/regexp-fields.h
index bac11999..72d12b37 100644
--- a/lib/regexp-fields.h
+++ b/lib/regexp-fields.h
@@ -63,11 +63,13 @@ class RegexpFieldProcessor : public Xapian::FieldProcessor {
  protected:
     Xapian::valueno slot;
     std::string term_prefix;
+    notmuch_field_flag_t options;
     Xapian::QueryParser &parser;
     notmuch_database_t *notmuch;
 
  public:
-    RegexpFieldProcessor (std::string prefix, Xapian::QueryParser &parser_, notmuch_database_t *notmuch_);
+    RegexpFieldProcessor (std::string prefix, notmuch_field_flag_t options,
+			  Xapian::QueryParser &parser_, notmuch_database_t *notmuch_);
 
     ~RegexpFieldProcessor () { };
 
diff --git a/test/T650-regexp-query.sh b/test/T650-regexp-query.sh
index a8039610..f0868a15 100755
--- a/test/T650-regexp-query.sh
+++ b/test/T650-regexp-query.sh
@@ -79,4 +79,20 @@ Query string was: from:/unbalanced[/
 EOF
 test_expect_equal_file EXPECTED OUTPUT
 
+test_begin_subtest "empty mid search"
+notmuch search --output=messages mid:yoom > OUTPUT
+cp /dev/null EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "non-empty mid regex search"
+notmuch search --output=messages mid:/yoom/ > OUTPUT
+test_expect_equal_file cworth.msg-ids OUTPUT
+
+test_begin_subtest "combine regexp mid and subject"
+notmuch search  subject:/-C/ and mid:/y..m/ | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX   2009-11-18 [1/2] Carl Worth| Jan Janak; [notmuch] [PATCH] Older versions of install do not support -C. (inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
 test_done
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH] fixup! lib: regexp matching in 'subject' and 'from'
  2017-02-27  2:34 ` [PATCH 2/4] lib: regexp matching in 'subject' and 'from' David Bremner
@ 2017-03-03 13:23   ` David Bremner
  0 siblings, 0 replies; 7+ messages in thread
From: David Bremner @ 2017-03-03 13:23 UTC (permalink / raw)
  To: David Bremner, notmuch

---
 lib/regexp-fields.cc | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/lib/regexp-fields.cc b/lib/regexp-fields.cc
index b2b39504..65108e38 100644
--- a/lib/regexp-fields.cc
+++ b/lib/regexp-fields.cc
@@ -62,6 +62,11 @@ RegexpPostingSource::init (const Xapian::Database &db)
     it_ = db_.valuestream_begin (slot_);
     end_ = db.valuestream_end (slot_);
     started_ = false;
+
+    /* make sure we start on a matching value */
+    while (!at_end() && regexec (&regexp_, (*it_).c_str (), 0, NULL, 0) != 0) {
+	++it_;
+    }
 }
 
 Xapian::doccount
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/4] lib: add mid: as a synonym for id:
  2017-02-27  2:34 ` [PATCH 3/4] lib: add mid: as a synonym for id: David Bremner
@ 2017-03-03 21:54   ` David Bremner
  0 siblings, 0 replies; 7+ messages in thread
From: David Bremner @ 2017-03-03 21:54 UTC (permalink / raw)
  To: notmuch

David Bremner <david@tethera.net> writes:

> mid: is the url scheme suggested by URL 2392. We also plan to
> introduce more flexible searches for mid: than are possible with
> id: (in order not to break assumptions about the special behaviour of
> id:, e.g. identifying at most one message).

I have pushed the first three patches in this series (along with the
fixup for 2/3)

d

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2017-03-03 21:54 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-27  2:34 v6 of regexp searching David Bremner
2017-02-27  2:34 ` [PATCH 1/4] lib: create field processors from prefix table David Bremner
2017-02-27  2:34 ` [PATCH 2/4] lib: regexp matching in 'subject' and 'from' David Bremner
2017-03-03 13:23   ` [PATCH] fixup! " David Bremner
2017-02-27  2:34 ` [PATCH 3/4] lib: add mid: as a synonym for id: David Bremner
2017-03-03 21:54   ` David Bremner
2017-02-27  2:34 ` [PATCH 4/4] lib: Add regexp searching for mid: prefix David Bremner

Code repositories for project(s) associated with this public inbox

	https://yhetil.org/notmuch.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).