* [PATCH 01/27] configure: optional library sfsexp
2021-07-30 12:55 v3 sexpr query parser David Bremner
@ 2021-07-30 12:55 ` David Bremner
2021-07-30 12:55 ` [PATCH 02/27] lib: split notmuch_query_create David Bremner
` (25 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2021-07-30 12:55 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
The configure part is essentially the same as the other checks using
pkg-config. Since the optional inclusion of this feature changes what
options are available to the user, include it in the "built_with"
pseudo-configuration keys.
---
configure | 26 +++++++++++++++++++++++++-
lib/built-with.c | 2 ++
notmuch-config.c | 3 +++
test/T030-config.sh | 1 +
test/T055-path-config.sh | 9 +++++----
5 files changed, 36 insertions(+), 5 deletions(-)
diff --git a/configure b/configure
index cfa9c09b..4262d122 100755
--- a/configure
+++ b/configure
@@ -820,6 +820,19 @@ else
WITH_BASH=0
fi
+printf "Checking for sfsexp... "
+if pkg-config --exists sfsexp; then
+ printf "Yes.\n"
+ have_sfsexp=1
+ sfsexp_cflags=$(pkg-config --cflags sfsexp)
+ sfsexp_ldflags=$(pkg-config --libs sfsexp)
+else
+ printf "No (will not enable s-expression queries).\n"
+ have_sfsexp=0
+ sfsexp_cflags=
+ sfsexp_ldflags=
+fi
+
if [ -z "${EMACSLISPDIR-}" ]; then
EMACSLISPDIR="\$(prefix)/share/emacs/site-lisp"
fi
@@ -1443,6 +1456,13 @@ HAVE_VALGRIND = ${have_valgrind}
# And if so, flags needed at compile time for valgrind macros
VALGRIND_CFLAGS = ${valgrind_cflags}
+# Whether the sfsexp library is available
+HAVE_SFSEXP = ${have_sfsexp}
+
+# And if so, flags needed at compile/link time for sfsexp
+SFSEXP_CFLAGS = ${sfsexp_cflags}
+SFSEXP_LDFLAGS = ${sfsexp_ldflags}
+
# Support for emacs
WITH_EMACS = ${WITH_EMACS}
@@ -1459,6 +1479,7 @@ WITH_ZSH = ${WITH_ZSH}
COMMON_CONFIGURE_CFLAGS = \\
\$(GMIME_CFLAGS) \$(TALLOC_CFLAGS) \$(ZLIB_CFLAGS) \\
-DHAVE_VALGRIND=\$(HAVE_VALGRIND) \$(VALGRIND_CFLAGS) \\
+ -DHAVE_SFSEXP=\$(HAVE_SFSEXP) \$(SFSEXP_CFLAGS) \\
-DHAVE_GETLINE=\$(HAVE_GETLINE) \\
-DWITH_EMACS=\$(WITH_EMACS) \\
-DHAVE_CANONICALIZE_FILE_NAME=\$(HAVE_CANONICALIZE_FILE_NAME) \\
@@ -1475,7 +1496,7 @@ CONFIGURE_CFLAGS = \$(COMMON_CONFIGURE_CFLAGS)
CONFIGURE_CXXFLAGS = \$(COMMON_CONFIGURE_CFLAGS) \$(XAPIAN_CXXFLAGS)
-CONFIGURE_LDFLAGS = \$(GMIME_LDFLAGS) \$(TALLOC_LDFLAGS) \$(ZLIB_LDFLAGS) \$(XAPIAN_LDFLAGS)
+CONFIGURE_LDFLAGS = \$(GMIME_LDFLAGS) \$(TALLOC_LDFLAGS) \$(ZLIB_LDFLAGS) \$(XAPIAN_LDFLAGS) \$(SFSEXP_LDFLAGS)
EOF
# construct the sh.config
@@ -1524,6 +1545,9 @@ NOTMUCH_HAVE_PYTHON3_CFFI=${have_python3_cffi}
# Is the python pytest package available?
NOTMUCH_HAVE_PYTHON3_PYTEST=${have_python3_pytest}
+# Is the sfsexp library available?
+NOTMUCH_HAVE_SFSEXP=${have_sfsexp}
+
# Platform we are run on
PLATFORM=${platform}
EOF
diff --git a/lib/built-with.c b/lib/built-with.c
index 0c70010b..89958e12 100644
--- a/lib/built-with.c
+++ b/lib/built-with.c
@@ -32,6 +32,8 @@ notmuch_built_with (const char *name)
return HAVE_XAPIAN_DB_RETRY_LOCK;
} else if (STRNCMP_LITERAL (name, "session_key") == 0) {
return true;
+ } else if (STRNCMP_LITERAL (name, "sexpr_query") == 0) {
+ return HAVE_SFSEXP;
} else {
return false;
}
diff --git a/notmuch-config.c b/notmuch-config.c
index 3430a3d3..328446ae 100644
--- a/notmuch-config.c
+++ b/notmuch-config.c
@@ -672,6 +672,9 @@ _notmuch_config_list_built_with ()
printf ("%sretry_lock=%s\n",
BUILT_WITH_PREFIX,
notmuch_built_with ("retry_lock") ? "true" : "false");
+ printf ("%ssexpr_query=%s\n",
+ BUILT_WITH_PREFIX,
+ notmuch_built_with ("sexpr_query") ? "true" : "false");
}
static int
diff --git a/test/T030-config.sh b/test/T030-config.sh
index 751feaf3..05c68653 100755
--- a/test/T030-config.sh
+++ b/test/T030-config.sh
@@ -51,6 +51,7 @@ cat <<EOF > EXPECTED
built_with.compact=something
built_with.field_processor=something
built_with.retry_lock=something
+built_with.sexpr_query=something
database.autocommit=8000
database.mail_root=MAIL_DIR
database.path=MAIL_DIR
diff --git a/test/T055-path-config.sh b/test/T055-path-config.sh
index bb3bf665..9e9b9df8 100755
--- a/test/T055-path-config.sh
+++ b/test/T055-path-config.sh
@@ -250,15 +250,16 @@ EOF
test_expect_equal "${output}+${output2}" "${value}+"
test_begin_subtest "Config list ($config)"
- notmuch config list | notmuch_dir_sanitize | sed -e "s/^database.backup_dir=.*$/database.backup_dir/" \
+ notmuch config list | notmuch_config_sanitize | sed -e "s/^database.backup_dir=.*$/database.backup_dir/" \
-e "s/^database.hook_dir=.*$/database.hook_dir/" \
-e "s/^database.path=.*$/database.path/" \
-e "s,^database.mail_root=CWD/home/mail,database.mail_root=MAIL_DIR," \
> OUTPUT
cat <<EOF > EXPECTED
-built_with.compact=true
-built_with.field_processor=true
-built_with.retry_lock=true
+built_with.compact=something
+built_with.field_processor=something
+built_with.retry_lock=something
+built_with.sexpr_query=something
database.autocommit=8000
database.backup_dir
database.hook_dir
--
2.30.2
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 02/27] lib: split notmuch_query_create
2021-07-30 12:55 v3 sexpr query parser David Bremner
2021-07-30 12:55 ` [PATCH 01/27] configure: optional library sfsexp David Bremner
@ 2021-07-30 12:55 ` David Bremner
2021-07-30 12:55 ` [PATCH 03/27] lib: define notmuch_query_create_with_syntax David Bremner
` (24 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2021-07-30 12:55 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
Most of the function will be re-usable when creating a query from an
s-expression.
---
lib/query.cc | 19 ++++++++++++++++---
1 file changed, 16 insertions(+), 3 deletions(-)
diff --git a/lib/query.cc b/lib/query.cc
index 792aba21..39b85e91 100644
--- a/lib/query.cc
+++ b/lib/query.cc
@@ -84,9 +84,9 @@ _notmuch_query_destructor (notmuch_query_t *query)
return 0;
}
-notmuch_query_t *
-notmuch_query_create (notmuch_database_t *notmuch,
- const char *query_string)
+static notmuch_query_t *
+_notmuch_query_constructor (notmuch_database_t *notmuch,
+ const char *query_string)
{
notmuch_query_t *query;
@@ -116,6 +116,19 @@ notmuch_query_create (notmuch_database_t *notmuch,
return query;
}
+notmuch_query_t *
+notmuch_query_create (notmuch_database_t *notmuch,
+ const char *query_string)
+{
+
+ notmuch_query_t *query = _notmuch_query_constructor (notmuch, query_string);
+
+ if (! query)
+ return NULL;
+
+ return query;
+}
+
static notmuch_status_t
_notmuch_query_ensure_parsed (notmuch_query_t *query)
{
--
2.30.2
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 03/27] lib: define notmuch_query_create_with_syntax
2021-07-30 12:55 v3 sexpr query parser David Bremner
2021-07-30 12:55 ` [PATCH 01/27] configure: optional library sfsexp David Bremner
2021-07-30 12:55 ` [PATCH 02/27] lib: split notmuch_query_create David Bremner
@ 2021-07-30 12:55 ` David Bremner
2021-07-30 12:55 ` [PATCH 04/27] CLI/search+address: support sexpr queries David Bremner
` (23 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2021-07-30 12:55 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
Set the parsing syntax when the (notmuch) query object is
created. Initially the library always returns a trivial query that
matches all messages when using s-expression syntax.
It seems better to select the syntax at query creation time because
the lazy parsing is an implementation detail.
---
lib/notmuch.h | 10 +++++++
lib/query.cc | 73 ++++++++++++++++++++++++++++++++++++++++++++++-----
2 files changed, 76 insertions(+), 7 deletions(-)
diff --git a/lib/notmuch.h b/lib/notmuch.h
index 3b28bea3..6f33c045 100644
--- a/lib/notmuch.h
+++ b/lib/notmuch.h
@@ -961,6 +961,16 @@ notmuch_query_t *
notmuch_query_create (notmuch_database_t *database,
const char *query_string);
+typedef enum {
+ NOTMUCH_QUERY_SYNTAX_XAPIAN,
+ NOTMUCH_QUERY_SYNTAX_SEXP
+} notmuch_query_syntax_t;
+
+notmuch_status_t
+notmuch_query_create_with_syntax (notmuch_database_t *database,
+ const char *query_string,
+ notmuch_query_syntax_t syntax,
+ notmuch_query_t **output);
/**
* Sort values for notmuch_query_set_sort.
*/
diff --git a/lib/query.cc b/lib/query.cc
index 39b85e91..12fd9482 100644
--- a/lib/query.cc
+++ b/lib/query.cc
@@ -23,6 +23,8 @@
#include <glib.h> /* GHashTable, GPtrArray */
+#include "sexp.h"
+
struct _notmuch_query {
notmuch_database_t *notmuch;
const char *query_string;
@@ -30,6 +32,7 @@ struct _notmuch_query {
notmuch_string_list_t *exclude_terms;
notmuch_exclude_t omit_excluded;
bool parsed;
+ notmuch_query_syntax_t syntax;
Xapian::Query xapian_query;
std::set<std::string> terms;
};
@@ -105,7 +108,10 @@ _notmuch_query_constructor (notmuch_database_t *notmuch,
query->notmuch = notmuch;
- query->query_string = talloc_strdup (query, query_string);
+ if (query_string)
+ query->query_string = talloc_strdup (query, query_string);
+ else
+ query->query_string = NULL;
query->sort = NOTMUCH_SORT_NEWEST_FIRST;
@@ -121,20 +127,49 @@ notmuch_query_create (notmuch_database_t *notmuch,
const char *query_string)
{
- notmuch_query_t *query = _notmuch_query_constructor (notmuch, query_string);
+ notmuch_query_t *query;
+ notmuch_status_t status;
- if (! query)
+ status = notmuch_query_create_with_syntax (notmuch, query_string,
+ NOTMUCH_QUERY_SYNTAX_XAPIAN,
+ &query);
+ if (status)
return NULL;
return query;
}
-static notmuch_status_t
-_notmuch_query_ensure_parsed (notmuch_query_t *query)
+notmuch_status_t
+notmuch_query_create_with_syntax (notmuch_database_t *notmuch,
+ const char *query_string,
+ notmuch_query_syntax_t syntax,
+ notmuch_query_t **output)
{
- if (query->parsed)
- return NOTMUCH_STATUS_SUCCESS;
+ notmuch_query_t *query;
+
+ if (! output)
+ return NOTMUCH_STATUS_NULL_POINTER;
+
+ query = _notmuch_query_constructor (notmuch, query_string);
+ if (! query)
+ return NOTMUCH_STATUS_OUT_OF_MEMORY;
+
+ if (syntax == NOTMUCH_QUERY_SYNTAX_SEXP && ! HAVE_SFSEXP) {
+ _notmuch_database_log (notmuch, "sexp query parser not available");
+ return NOTMUCH_STATUS_ILLEGAL_ARGUMENT;
+ }
+
+ query->syntax = syntax;
+
+ *output = query;
+
+ return NOTMUCH_STATUS_SUCCESS;
+}
+
+static notmuch_status_t
+_notmuch_query_ensure_parsed_xapian (notmuch_query_t *query)
+{
try {
query->xapian_query =
query->notmuch->query_parser->
@@ -167,6 +202,30 @@ _notmuch_query_ensure_parsed (notmuch_query_t *query)
return NOTMUCH_STATUS_SUCCESS;
}
+static notmuch_status_t
+_notmuch_query_ensure_parsed_sexpr (notmuch_query_t *query)
+{
+ if (query->parsed)
+ return NOTMUCH_STATUS_SUCCESS;
+
+ query->xapian_query = Xapian::Query::MatchAll;
+ return NOTMUCH_STATUS_SUCCESS;
+}
+
+static notmuch_status_t
+_notmuch_query_ensure_parsed (notmuch_query_t *query)
+{
+ if (query->parsed)
+ return NOTMUCH_STATUS_SUCCESS;
+
+#if HAVE_SFSEXP
+ if (query->syntax == NOTMUCH_QUERY_SYNTAX_SEXP)
+ return _notmuch_query_ensure_parsed_sexpr (query);
+#endif
+
+ return _notmuch_query_ensure_parsed_xapian (query);
+}
+
const char *
notmuch_query_get_query_string (const notmuch_query_t *query)
{
--
2.30.2
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 04/27] CLI/search+address: support sexpr queries
2021-07-30 12:55 v3 sexpr query parser David Bremner
` (2 preceding siblings ...)
2021-07-30 12:55 ` [PATCH 03/27] lib: define notmuch_query_create_with_syntax David Bremner
@ 2021-07-30 12:55 ` David Bremner
2021-07-30 12:55 ` [PATCH 05/27] lib: add new status code for query syntax errors David Bremner
` (22 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2021-07-30 12:55 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
Initially support selection of query syntax in two subcommands to
enable testing.
---
notmuch-search.c | 13 +++++++++----
test/T080-search.sh | 5 +++++
test/T095-address.sh | 5 +++++
3 files changed, 19 insertions(+), 4 deletions(-)
diff --git a/notmuch-search.c b/notmuch-search.c
index 244817a9..962e2e5c 100644
--- a/notmuch-search.c
+++ b/notmuch-search.c
@@ -56,6 +56,7 @@ typedef struct {
int format_sel;
sprinter_t *format;
int exclude;
+ int query_syntax;
notmuch_query_t *query;
int sort;
int output;
@@ -721,11 +722,10 @@ _notmuch_search_prepare (search_context_t *ctx, int argc, char *argv[])
return EXIT_FAILURE;
}
- ctx->query = notmuch_query_create (ctx->notmuch, query_str);
- if (ctx->query == NULL) {
- fprintf (stderr, "Out of memory\n");
+ if (print_status_database ("notmuch search", ctx->notmuch,
+ notmuch_query_create_with_syntax (ctx->notmuch, query_str,
+ ctx->query_syntax, &ctx->query)))
return EXIT_FAILURE;
- }
notmuch_query_set_sort (ctx->query, ctx->sort);
@@ -771,6 +771,7 @@ static search_context_t search_context = {
.format_sel = NOTMUCH_FORMAT_TEXT,
.exclude = NOTMUCH_EXCLUDE_TRUE,
.sort = NOTMUCH_SORT_NEWEST_FIRST,
+ .query_syntax = NOTMUCH_QUERY_SYNTAX_XAPIAN,
.output = 0,
.offset = 0,
.limit = -1, /* unlimited */
@@ -789,6 +790,10 @@ static const notmuch_opt_desc_t common_options[] = {
{ "text", NOTMUCH_FORMAT_TEXT },
{ "text0", NOTMUCH_FORMAT_TEXT0 },
{ 0, 0 } } },
+ { .opt_keyword = &search_context.query_syntax, .name = "query-syntax", .keywords =
+ (notmuch_keyword_t []){ { "xapian", NOTMUCH_QUERY_SYNTAX_XAPIAN },
+ { "sexp", NOTMUCH_QUERY_SYNTAX_SEXP },
+ { 0, 0 } } },
{ .opt_int = ¬much_format_version, .name = "format-version" },
{ }
};
diff --git a/test/T080-search.sh b/test/T080-search.sh
index a3f0dead..966e772a 100755
--- a/test/T080-search.sh
+++ b/test/T080-search.sh
@@ -189,4 +189,9 @@ test_begin_subtest "parts do not have adjacent term positions"
output=$(notmuch search id:termpos and '"c x"')
test_expect_equal "$output" ""
+test_begin_subtest "sexpr query: all messages"
+notmuch search '*' > EXPECTED
+notmuch search --query-syntax=sexp '()' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
test_done
diff --git a/test/T095-address.sh b/test/T095-address.sh
index 817be538..adf0b307 100755
--- a/test/T095-address.sh
+++ b/test/T095-address.sh
@@ -325,4 +325,9 @@ cat <<EOF >EXPECTED
EOF
test_expect_equal_file EXPECTED OUTPUT
+test_begin_subtest "sexpr query: all messages"
+notmuch address '*' > EXPECTED
+notmuch address --query-syntax=sexp '()' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
test_done
--
2.30.2
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 05/27] lib: add new status code for query syntax errors.
2021-07-30 12:55 v3 sexpr query parser David Bremner
` (3 preceding siblings ...)
2021-07-30 12:55 ` [PATCH 04/27] CLI/search+address: support sexpr queries David Bremner
@ 2021-07-30 12:55 ` David Bremner
2021-07-30 12:55 ` [PATCH 06/27] lib/parse-sexp: parse single terms and the empty list David Bremner
` (21 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2021-07-30 12:55 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
This will help provide more meaningful error messages without special
casing on the client side.
---
bindings/python-cffi/notmuch2/_build.py | 1 +
bindings/python-cffi/notmuch2/_errors.py | 3 +++
lib/database.cc | 2 ++
lib/notmuch.h | 4 ++++
4 files changed, 10 insertions(+)
diff --git a/bindings/python-cffi/notmuch2/_build.py b/bindings/python-cffi/notmuch2/_build.py
index f712b6c5..24df939e 100644
--- a/bindings/python-cffi/notmuch2/_build.py
+++ b/bindings/python-cffi/notmuch2/_build.py
@@ -53,6 +53,7 @@ ffibuilder.cdef(
NOTMUCH_STATUS_NO_CONFIG,
NOTMUCH_STATUS_NO_DATABASE,
NOTMUCH_STATUS_DATABASE_EXISTS,
+ NOTMUCH_STATUS_BAD_QUERY_SYNTAX,
NOTMUCH_STATUS_LAST_STATUS
} notmuch_status_t;
typedef enum {
diff --git a/bindings/python-cffi/notmuch2/_errors.py b/bindings/python-cffi/notmuch2/_errors.py
index 9301073e..f55cc96b 100644
--- a/bindings/python-cffi/notmuch2/_errors.py
+++ b/bindings/python-cffi/notmuch2/_errors.py
@@ -56,6 +56,8 @@ class NotmuchError(Exception):
NoDatabaseError,
capi.lib.NOTMUCH_STATUS_DATABASE_EXISTS:
DatabaseExistsError,
+ capi.lib.NOTMUCH_STATUS_BAD_QUERY_SYNTAX:
+ QuerySyntaxError,
}
return types[status]
@@ -103,6 +105,7 @@ class IllegalArgumentError(NotmuchError): pass
class NoConfigError(NotmuchError): pass
class NoDatabaseError(NotmuchError): pass
class DatabaseExistsError(NotmuchError): pass
+class QuerySyntaxError(NotmuchError): pass
class ObjectDestroyedError(NotmuchError):
"""The object has already been destroyed and it's memory freed.
diff --git a/lib/database.cc b/lib/database.cc
index 31794900..7eb0de79 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -309,6 +309,8 @@ notmuch_status_to_string (notmuch_status_t status)
return "No database found";
case NOTMUCH_STATUS_DATABASE_EXISTS:
return "Database exists, not recreated";
+ case NOTMUCH_STATUS_BAD_QUERY_SYNTAX:
+ return "Syntax error in query";
default:
case NOTMUCH_STATUS_LAST_STATUS:
return "Unknown error status value";
diff --git a/lib/notmuch.h b/lib/notmuch.h
index 6f33c045..3f7ba132 100644
--- a/lib/notmuch.h
+++ b/lib/notmuch.h
@@ -220,6 +220,10 @@ typedef enum _notmuch_status {
* Database exists, so not (re)-created
*/
NOTMUCH_STATUS_DATABASE_EXISTS,
+ /**
+ * Syntax error in query
+ */
+ NOTMUCH_STATUS_BAD_QUERY_SYNTAX,
/**
* Not an actual status value. Just a way to find out how many
* valid status values there are.
--
2.30.2
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 06/27] lib/parse-sexp: parse single terms and the empty list.
2021-07-30 12:55 v3 sexpr query parser David Bremner
` (4 preceding siblings ...)
2021-07-30 12:55 ` [PATCH 05/27] lib: add new status code for query syntax errors David Bremner
@ 2021-07-30 12:55 ` David Bremner
2021-07-30 12:55 ` [PATCH 07/27] lib: leave stemmer object accessible David Bremner
` (20 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2021-07-30 12:55 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
There is not much of a parser here yet, but it already does some
useful error reporting. Most functionality sketched in the
documentation is not implemented yet; detailed documentation will
follow with the implementation.
---
doc/conf.py | 4 ++
doc/index.rst | 1 +
doc/man7/notmuch-sexp-queries.rst | 81 +++++++++++++++++++++++++++++++
lib/Makefile.local | 3 +-
lib/database-private.h | 7 +++
lib/parse-sexp.cc | 54 +++++++++++++++++++++
lib/query.cc | 6 +--
test/T080-search.sh | 5 --
test/T081-sexpr-search.sh | 65 +++++++++++++++++++++++++
9 files changed, 216 insertions(+), 10 deletions(-)
create mode 100644 doc/man7/notmuch-sexp-queries.rst
create mode 100644 lib/parse-sexp.cc
create mode 100755 test/T081-sexpr-search.sh
diff --git a/doc/conf.py b/doc/conf.py
index 4a4a3421..53becb00 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -159,6 +159,10 @@ man_pages = [
u'syntax for notmuch queries',
[notmuch_authors], 7),
+ ('man7/notmuch-sexp-queries', 'notmuch-sexp-queries',
+ u's-expression syntax for notmuch queries',
+ [notmuch_authors], 7),
+
('man1/notmuch-show', 'notmuch-show',
u'show messages matching the given search terms',
[notmuch_authors], 1),
diff --git a/doc/index.rst b/doc/index.rst
index a3bf3480..fbdcf779 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -24,6 +24,7 @@ Contents:
man1/notmuch-restore
man1/notmuch-search
man7/notmuch-search-terms
+ man7/notmuch-sexp-queries
man1/notmuch-show
man1/notmuch-tag
python-bindings
diff --git a/doc/man7/notmuch-sexp-queries.rst b/doc/man7/notmuch-sexp-queries.rst
new file mode 100644
index 00000000..e530912c
--- /dev/null
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -0,0 +1,81 @@
+.. _notmuch-sexp-query(7):
+
+====================
+notmuch-sexp-queries
+====================
+
+SYNOPSIS
+========
+
+**notmuch** **search** ``--query-syntax=sexp`` '(and (to santa) (date december))'
+
+DESCRIPTION
+===========
+
+
+S-EXPRESSIONS
+-------------
+
+An *s-expression* is either an atom, or list of whitespace delimited
+s-expressions inside parentheses. Atoms are either
+
+*basic value*
+ A basic value is an unquoted string containing no whitespace, double quotes, or
+ parentheses.
+
+*quoted string*
+ Double quotes (") delimit strings possibly containing whitespace
+ or parentheses. These can contain double quote characters by
+ escaping with backslash. E.g. ``"this is a quote \""``.
+
+S-EXPRESSION QUERIES
+--------------------
+
+An s-expression query is either an atom, the empty list, or a
+*compound query* consisting of a prefix atom (first element) defining
+a *field*, *logical operation*, or *modifier*, and 0 or more
+subqueries.
+
+``*``
+``()``
+ The empty list matches all messages
+
+*term*
+ Match all messages containing *term*, possibly after stemming
+ or phase splitting.
+
+``(`` *field* |q1| |q2| ... |qn| ``)``
+ Restrict the queries |q1| to |qn| to *field*, and combine with *and*
+ (for most fields) or *or*. See :any:`fields` for more information.
+
+``(`` *operator* |q1| |q2| ... |qn| ``)``
+ Combine queries |q1| to |qn|. See :any:`operators` for more information.
+
+``(`` *modifier* |q1| |q2| ... |qn| ``)``
+ Combine queries |q1| to |qn|, and reinterpret the result (e.g. as a regular expression).
+ See :any:`modifiers` for more information.
+
+.. _fields:
+
+FIELDS
+``````
+
+.. _operators:
+
+OPERATORS
+`````````
+
+.. _modifiers:
+
+MODIFIERS
+`````````
+
+EXAMPLES
+========
+
+``Wizard``
+ Match all messages containing the word "wizard", ignoring case.
+
+.. |q1| replace:: :math:`q_1`
+.. |q2| replace:: :math:`q_2`
+.. |qn| replace:: :math:`q_n`
diff --git a/lib/Makefile.local b/lib/Makefile.local
index e2d4b91d..1378a74b 100644
--- a/lib/Makefile.local
+++ b/lib/Makefile.local
@@ -63,7 +63,8 @@ libnotmuch_cxx_srcs = \
$(dir)/features.cc \
$(dir)/prefix.cc \
$(dir)/open.cc \
- $(dir)/init.cc
+ $(dir)/init.cc \
+ $(dir)/parse-sexp.cc
libnotmuch_modules := $(libnotmuch_c_srcs:.c=.o) $(libnotmuch_cxx_srcs:.cc=.o)
diff --git a/lib/database-private.h b/lib/database-private.h
index 9706c17e..f206efaf 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -300,4 +300,11 @@ _notmuch_database_setup_standard_query_fields (notmuch_database_t *notmuch);
notmuch_status_t
_notmuch_database_setup_user_query_fields (notmuch_database_t *notmuch);
+#if __cplusplus
+/* parse-sexp.cc */
+notmuch_status_t
+_notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr,
+ Xapian::Query &output);
+#endif
+
#endif
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
new file mode 100644
index 00000000..1ce3c9d4
--- /dev/null
+++ b/lib/parse-sexp.cc
@@ -0,0 +1,54 @@
+#include <xapian.h>
+#include "notmuch-private.h"
+#include "sexp.h"
+
+#if HAVE_SFSEXP
+
+/* _sexp is used for file scope symbols to avoid clashing with
+ * definitions from sexp.h */
+
+/* Here we expect the s-expression to be a proper list, with first
+ * element defining and operation, or as a special case the empty
+ * list */
+
+static notmuch_status_t
+_sexp_to_xapian_query (notmuch_database_t *notmuch, const sexp_t *sx,
+ Xapian::Query &output)
+{
+
+ if (sx->ty == SEXP_VALUE) {
+ output = Xapian::Query (Xapian::Unicode::tolower (sx->val));
+ return NOTMUCH_STATUS_SUCCESS;
+ }
+
+ /* Empty list */
+ if (! sx->list) {
+ output = Xapian::Query::MatchAll;
+ return NOTMUCH_STATUS_SUCCESS;
+ }
+
+ if (sx->list->ty == SEXP_VALUE)
+ _notmuch_database_log (notmuch, "unknown prefix '%s'\n", sx->list->val);
+ else
+ _notmuch_database_log (notmuch, "unexpected list in field/operation position\n",
+ sx->list->val);
+
+ return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
+}
+
+notmuch_status_t
+_notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr,
+ Xapian::Query &output)
+{
+ const sexp_t *sx = NULL;
+ char *buf = talloc_strdup (notmuch, querystr);
+
+ sx = parse_sexp (buf, strlen (querystr));
+ if (! sx) {
+ _notmuch_database_log (notmuch, "invalid s-expression: '%s'\n", querystr);
+ return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
+ }
+
+ return _sexp_to_xapian_query (notmuch, sx, output);
+}
+#endif
diff --git a/lib/query.cc b/lib/query.cc
index 12fd9482..435f7229 100644
--- a/lib/query.cc
+++ b/lib/query.cc
@@ -23,8 +23,6 @@
#include <glib.h> /* GHashTable, GPtrArray */
-#include "sexp.h"
-
struct _notmuch_query {
notmuch_database_t *notmuch;
const char *query_string;
@@ -208,8 +206,8 @@ _notmuch_query_ensure_parsed_sexpr (notmuch_query_t *query)
if (query->parsed)
return NOTMUCH_STATUS_SUCCESS;
- query->xapian_query = Xapian::Query::MatchAll;
- return NOTMUCH_STATUS_SUCCESS;
+ return _notmuch_sexp_string_to_xapian_query (query->notmuch, query->query_string,
+ query->xapian_query);
}
static notmuch_status_t
diff --git a/test/T080-search.sh b/test/T080-search.sh
index 966e772a..a3f0dead 100755
--- a/test/T080-search.sh
+++ b/test/T080-search.sh
@@ -189,9 +189,4 @@ test_begin_subtest "parts do not have adjacent term positions"
output=$(notmuch search id:termpos and '"c x"')
test_expect_equal "$output" ""
-test_begin_subtest "sexpr query: all messages"
-notmuch search '*' > EXPECTED
-notmuch search --query-syntax=sexp '()' > OUTPUT
-test_expect_equal_file EXPECTED OUTPUT
-
test_done
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
new file mode 100755
index 00000000..3ee9f71d
--- /dev/null
+++ b/test/T081-sexpr-search.sh
@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+test_description='"notmuch search" in several variations'
+. $(dirname "$0")/test-lib.sh || exit 1
+
+if [ $NOTMUCH_HAVE_SFSEXP -ne 1 ]; then
+ printf "Skipping due to missing sfsexp library\n"
+ test_done
+fi
+
+add_email_corpus
+
+test_begin_subtest "all messages: ()"
+notmuch search '*' > EXPECTED
+notmuch search --query-syntax=sexp "()" > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "single term in body"
+notmuch search --query-syntax=sexp 'wizard' | notmuch_search_sanitize>OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX 2009-11-18 [1/3] Carl Worth| Jan Janak; [notmuch] What a great idea! (inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "single term in body (case insensitive)"
+notmuch search --query-syntax=sexp 'Wizard' | notmuch_search_sanitize>OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX 2009-11-18 [1/3] Carl Worth| Jan Janak; [notmuch] What a great idea! (inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "single term in body, stemmed version"
+test_subtest_known_broken
+notmuch search arriv > EXPECTED
+notmuch search --query-syntax=sexp arriv > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "Unbalanced parens"
+# A code 1 indicates the error was handled (a crash will return e.g. 139).
+test_expect_code 1 "notmuch search --query-syntax=sexp '('"
+
+test_begin_subtest "Unbalanced parens, error message"
+notmuch search --query-syntax=sexp '(' >OUTPUT 2>&1
+cat <<EOF > EXPECTED
+notmuch search: Syntax error in query
+invalid s-expression: '('
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "unknown prefix"
+notmuch search --query-syntax=sexp '(foo)' >OUTPUT 2>&1
+cat <<EOF > EXPECTED
+notmuch search: Syntax error in query
+unknown prefix 'foo'
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "list as prefix"
+notmuch search --query-syntax=sexp '((foo))' >OUTPUT 2>&1
+cat <<EOF > EXPECTED
+notmuch search: Syntax error in query
+unexpected list in field/operation position
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_done
--
2.30.2
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 07/27] lib: leave stemmer object accessible
2021-07-30 12:55 v3 sexpr query parser David Bremner
` (5 preceding siblings ...)
2021-07-30 12:55 ` [PATCH 06/27] lib/parse-sexp: parse single terms and the empty list David Bremner
@ 2021-07-30 12:55 ` David Bremner
2021-07-30 12:55 ` [PATCH 08/27] lib/parse-sexp: stem unquoted atoms David Bremner
` (19 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2021-07-30 12:55 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
This enables using the same stemmer in both query parsers.
---
lib/database-private.h | 1 +
lib/open.cc | 3 ++-
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/lib/database-private.h b/lib/database-private.h
index f206efaf..85d55299 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -232,6 +232,7 @@ struct _notmuch_database {
*/
unsigned long view;
Xapian::QueryParser *query_parser;
+ Xapian::Stem *stemmer;
Xapian::TermGenerator *term_gen;
Xapian::RangeProcessor *value_range_processor;
Xapian::RangeProcessor *date_range_processor;
diff --git a/lib/open.cc b/lib/open.cc
index 7b95c5b1..21ab3984 100644
--- a/lib/open.cc
+++ b/lib/open.cc
@@ -428,7 +428,8 @@ _finish_open (notmuch_database_t *notmuch,
"lastmod:");
notmuch->query_parser->set_default_op (Xapian::Query::OP_AND);
notmuch->query_parser->set_database (*notmuch->xapian_db);
- notmuch->query_parser->set_stemmer (Xapian::Stem ("english"));
+ notmuch->stemmer = new Xapian::Stem ("english");
+ notmuch->query_parser->set_stemmer (*notmuch->stemmer);
notmuch->query_parser->set_stemming_strategy (Xapian::QueryParser::STEM_SOME);
notmuch->query_parser->add_rangeprocessor (notmuch->value_range_processor);
notmuch->query_parser->add_rangeprocessor (notmuch->date_range_processor);
--
2.30.2
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 08/27] lib/parse-sexp: stem unquoted atoms
2021-07-30 12:55 v3 sexpr query parser David Bremner
` (6 preceding siblings ...)
2021-07-30 12:55 ` [PATCH 07/27] lib: leave stemmer object accessible David Bremner
@ 2021-07-30 12:55 ` David Bremner
2021-07-30 12:55 ` [PATCH 09/27] lib/parse-sexp: support and, not, and or David Bremner
` (18 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2021-07-30 12:55 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
This is somewhat less DWIM than the Xapian query parser, but it has
the advantage of simplicity.
---
doc/man7/notmuch-sexp-queries.rst | 10 ++++++++--
lib/parse-sexp.cc | 10 +++++++---
test/T081-sexpr-search.sh | 7 +++++--
3 files changed, 20 insertions(+), 7 deletions(-)
diff --git a/doc/man7/notmuch-sexp-queries.rst b/doc/man7/notmuch-sexp-queries.rst
index e530912c..8a3bcd8b 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -41,8 +41,10 @@ subqueries.
The empty list matches all messages
*term*
- Match all messages containing *term*, possibly after stemming
- or phase splitting.
+ Match all messages containing *term*, possibly after
+ stemming or phase splitting. For discussion of stemming in
+ notmuch see :any:`notmuch-search-terms(7)`. Stemming only applies
+ to unquoted terms (basic values) in s-expression queries.
``(`` *field* |q1| |q2| ... |qn| ``)``
Restrict the queries |q1| to |qn| to *field*, and combine with *and*
@@ -76,6 +78,10 @@ EXAMPLES
``Wizard``
Match all messages containing the word "wizard", ignoring case.
+``added``
+ Match all messages containing "added", but also those containing "add", "additional",
+ "Additional", "adds", etc... via stemming.
+
.. |q1| replace:: :math:`q_1`
.. |q2| replace:: :math:`q_2`
.. |qn| replace:: :math:`q_n`
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 1ce3c9d4..1be5e209 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -1,5 +1,4 @@
-#include <xapian.h>
-#include "notmuch-private.h"
+#include "database-private.h"
#include "sexp.h"
#if HAVE_SFSEXP
@@ -17,7 +16,12 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const sexp_t *sx,
{
if (sx->ty == SEXP_VALUE) {
- output = Xapian::Query (Xapian::Unicode::tolower (sx->val));
+ std::string term = Xapian::Unicode::tolower (sx->val);
+ Xapian::Stem stem = *(notmuch->stemmer);
+ if (sx->aty == SEXP_BASIC)
+ term = "Z" + stem (term);
+
+ output = Xapian::Query (term);
return NOTMUCH_STATUS_SUCCESS;
}
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index 3ee9f71d..c5c3cf6b 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -22,18 +22,21 @@ EOF
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "single term in body (case insensitive)"
-notmuch search --query-syntax=sexp 'Wizard' | notmuch_search_sanitize>OUTPUT
+notmuch search --query-syntax=sexp '"Wizard"' | notmuch_search_sanitize>OUTPUT
cat <<EOF > EXPECTED
thread:XXX 2009-11-18 [1/3] Carl Worth| Jan Janak; [notmuch] What a great idea! (inbox unread)
EOF
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "single term in body, stemmed version"
-test_subtest_known_broken
notmuch search arriv > EXPECTED
notmuch search --query-syntax=sexp arriv > OUTPUT
test_expect_equal_file EXPECTED OUTPUT
+test_begin_subtest "single term in body, unstemmed version"
+notmuch search --query-syntax=sexp '"arriv"' > OUTPUT
+test_expect_equal_file /dev/null OUTPUT
+
test_begin_subtest "Unbalanced parens"
# A code 1 indicates the error was handled (a crash will return e.g. 139).
test_expect_code 1 "notmuch search --query-syntax=sexp '('"
--
2.30.2
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 09/27] lib/parse-sexp: support and, not, and or.
2021-07-30 12:55 v3 sexpr query parser David Bremner
` (7 preceding siblings ...)
2021-07-30 12:55 ` [PATCH 08/27] lib/parse-sexp: stem unquoted atoms David Bremner
@ 2021-07-30 12:55 ` David Bremner
2021-07-30 12:55 ` [PATCH 10/27] lib/parse-sexp: support subject field David Bremner
` (17 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2021-07-30 12:55 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
All operations and (Xapian) fields will eventually have an entry in
the prefixes table. The flags field is just a placeholder for now, but
will eventually distinguish between various kinds of prefixes.
---
doc/man7/notmuch-sexp-queries.rst | 16 ++++---
lib/parse-sexp.cc | 76 +++++++++++++++++++++++++++++--
test/T081-sexpr-search.sh | 32 +++++++++++--
3 files changed, 110 insertions(+), 14 deletions(-)
diff --git a/doc/man7/notmuch-sexp-queries.rst b/doc/man7/notmuch-sexp-queries.rst
index 8a3bcd8b..1594d147 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -51,7 +51,9 @@ subqueries.
(for most fields) or *or*. See :any:`fields` for more information.
``(`` *operator* |q1| |q2| ... |qn| ``)``
- Combine queries |q1| to |qn|. See :any:`operators` for more information.
+ Combine queries |q1| to |qn|. Currently supported operators are
+ ``and``, ``or``, and ``not``. ``(not`` |q1| ... |qn| ``)`` is equivalent
+ to ``(and (not`` |q1| ``) ... (not`` |qn| ``))``.
``(`` *modifier* |q1| |q2| ... |qn| ``)``
Combine queries |q1| to |qn|, and reinterpret the result (e.g. as a regular expression).
@@ -62,11 +64,6 @@ subqueries.
FIELDS
``````
-.. _operators:
-
-OPERATORS
-`````````
-
.. _modifiers:
MODIFIERS
@@ -82,6 +79,13 @@ EXAMPLES
Match all messages containing "added", but also those containing "add", "additional",
"Additional", "adds", etc... via stemming.
+``(and Bob Marley)``
+ Match messages containing words "Bob" and "Marley", or their stems
+ The words need not be adjacent.
+
+``(not Bob Marley)``
+ Match messages containing neither "Bob" nor "Marley", nor their stems,
+
.. |q1| replace:: :math:`q_1`
.. |q2| replace:: :math:`q_2`
.. |qn| replace:: :math:`q_n`
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 1be5e209..97bfecbd 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -6,12 +6,69 @@
/* _sexp is used for file scope symbols to avoid clashing with
* definitions from sexp.h */
+typedef enum {
+ SEXP_FLAG_NONE = 0,
+} _sexp_flag_t;
+
+typedef struct {
+ const char *name;
+ Xapian::Query::op xapian_op;
+ Xapian::Query initial;
+ _sexp_flag_t flags;
+} _sexp_prefix_t;
+
+static _sexp_prefix_t prefixes[] =
+{
+ { "and", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
+ SEXP_FLAG_NONE },
+ { "not", Xapian::Query::OP_AND_NOT, Xapian::Query::MatchAll,
+ SEXP_FLAG_NONE },
+ { "or", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
+ SEXP_FLAG_NONE },
+ { }
+};
+
+static notmuch_status_t _sexp_to_xapian_query (notmuch_database_t *notmuch,
+ const _sexp_prefix_t *parent,
+ const sexp_t *sx,
+ Xapian::Query &output);
+
+static notmuch_status_t
+_sexp_combine_query (notmuch_database_t *notmuch,
+ const _sexp_prefix_t *parent,
+ Xapian::Query::op operation,
+ Xapian::Query left,
+ const sexp_t *sx,
+ Xapian::Query &output)
+{
+ Xapian::Query subquery;
+
+ notmuch_status_t status;
+
+ /* if we run out elements, return accumulator */
+
+ if (! sx) {
+ output = left;
+ return NOTMUCH_STATUS_SUCCESS;
+ }
+
+ status = _sexp_to_xapian_query (notmuch, parent, sx, subquery);
+ if (status)
+ return status;
+
+ return _sexp_combine_query (notmuch,
+ parent,
+ operation,
+ Xapian::Query (operation, left, subquery),
+ sx->next, output);
+}
+
/* Here we expect the s-expression to be a proper list, with first
* element defining and operation, or as a special case the empty
* list */
static notmuch_status_t
-_sexp_to_xapian_query (notmuch_database_t *notmuch, const sexp_t *sx,
+_sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent, const sexp_t *sx,
Xapian::Query &output)
{
@@ -31,11 +88,20 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const sexp_t *sx,
return NOTMUCH_STATUS_SUCCESS;
}
- if (sx->list->ty == SEXP_VALUE)
- _notmuch_database_log (notmuch, "unknown prefix '%s'\n", sx->list->val);
- else
+ if (sx->list->ty == SEXP_LIST) {
_notmuch_database_log (notmuch, "unexpected list in field/operation position\n",
sx->list->val);
+ return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
+ }
+
+ for (_sexp_prefix_t *prefix = prefixes; prefix && prefix->name; prefix++) {
+ if (strcmp (prefix->name, sx->list->val) == 0) {
+ return _sexp_combine_query (notmuch, parent, prefix->xapian_op, prefix->initial,
+ sx->list->next, output);
+ }
+ }
+
+ _notmuch_database_log (notmuch, "unknown prefix '%s'\n", sx->list->val);
return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
}
@@ -53,6 +119,6 @@ _notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *q
return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
}
- return _sexp_to_xapian_query (notmuch, sx, output);
+ return _sexp_to_xapian_query (notmuch, NULL, sx, output);
}
#endif
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index c5c3cf6b..c82f4a43 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -9,9 +9,34 @@ fi
add_email_corpus
-test_begin_subtest "all messages: ()"
-notmuch search '*' > EXPECTED
-notmuch search --query-syntax=sexp "()" > OUTPUT
+for query in '()' '(not)' '(and)' '(or ())' '(or (not))' '(or (and))' \
+ '(or (and) (or) (not (and)))'; do
+ test_begin_subtest "all messages: $query"
+ notmuch search '*' > EXPECTED
+ notmuch search --query-syntax=sexp "$query" > OUTPUT
+ test_expect_equal_file EXPECTED OUTPUT
+done
+
+for query in '(or)' '(not ())' '(not (not))' '(not (and))' \
+ '(not (or (and) (or) (not (and))))'; do
+ test_begin_subtest "no messages: $query"
+ notmuch search --query-syntax=sexp "$query" > OUTPUT
+ test_expect_equal_file /dev/null OUTPUT
+done
+
+test_begin_subtest "and of exact terms"
+notmuch search --query-syntax=sexp '(and "wonderful" "wizard")' | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX 2009-11-18 [1/3] Carl Worth| Jan Janak; [notmuch] What a great idea! (inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "or of exact terms"
+notmuch search --query-syntax=sexp '(or "php" "wizard")' | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX 2010-12-29 [1/1] François Boulogne; [aur-general] Guidelines: cp, mkdir vs install (inbox unread)
+thread:XXX 2009-11-18 [1/3] Carl Worth| Jan Janak; [notmuch] What a great idea! (inbox unread)
+EOF
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "single term in body"
@@ -37,6 +62,7 @@ test_begin_subtest "single term in body, unstemmed version"
notmuch search --query-syntax=sexp '"arriv"' > OUTPUT
test_expect_equal_file /dev/null OUTPUT
+
test_begin_subtest "Unbalanced parens"
# A code 1 indicates the error was handled (a crash will return e.g. 139).
test_expect_code 1 "notmuch search --query-syntax=sexp '('"
--
2.30.2\r
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 10/27] lib/parse-sexp: support subject field
2021-07-30 12:55 v3 sexpr query parser David Bremner
` (8 preceding siblings ...)
2021-07-30 12:55 ` [PATCH 09/27] lib/parse-sexp: support and, not, and or David Bremner
@ 2021-07-30 12:55 ` David Bremner
2021-07-30 12:55 ` [PATCH 11/27] util/unicode: allow calling from C++ David Bremner
` (16 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2021-07-30 12:55 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
The broken tests are because we do not yet handle phrase searches.
---
doc/man7/notmuch-sexp-queries.rst | 57 +++++++++++++++++++++++++++++++
lib/parse-sexp.cc | 19 +++++++++--
test/T081-sexpr-search.sh | 56 ++++++++++++++++++++++++++++++
3 files changed, 130 insertions(+), 2 deletions(-)
diff --git a/doc/man7/notmuch-sexp-queries.rst b/doc/man7/notmuch-sexp-queries.rst
index 1594d147..5427bb0a 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -64,6 +64,59 @@ subqueries.
FIELDS
``````
+*Fields* (also called *prefixes* in notmuch documentation)
+correspond to attributes of mail messages. Some are inherent (and
+immutable) like ``subject``, while others ``tag`` and ``property`` are
+settable by the user. Each concrete field in
+:any:`the table below <field-table>`
+is discussed further under "Search prefixes" in
+:any:`notmuch-search-terms(7)`. The row *user* refers to user defined
+fields, described in :any:`notmuch-config(1)`.
+
+.. _field-table:
+
+.. table:: Fields with supported modifiers
+
+ +------------+-----------+-----------+-----------+-----------+----------+
+ | field | combine | type | expand | wildcard | regex |
+ +============+===========+===========+===========+===========+==========+
+ | *none* | and | | no | yes | no |
+ +------------+-----------+-----------+-----------+-----------+----------+
+ | *user* | and | phrase | no | yes | no |
+ +------------+-----------+-----------+-----------+-----------+----------+
+ | attachment | and | phrase | yes | yes | no |
+ +------------+-----------+-----------+-----------+-----------+----------+
+ | body | and | phrase | no | no | no |
+ +------------+-----------+-----------+-----------+-----------+----------+
+ | date | | range | no | no | no |
+ +------------+-----------+-----------+-----------+-----------+----------+
+ | folder | or | phrase | yes | yes | yes |
+ +------------+-----------+-----------+-----------+-----------+----------+
+ | from | and | phrase | yes | yes | yes |
+ +------------+-----------+-----------+-----------+-----------+----------+
+ | id | or | term | no | yes | yes |
+ +------------+-----------+-----------+-----------+-----------+----------+
+ | is | and | term | yes | yes | yes |
+ +------------+-----------+-----------+-----------+-----------+----------+
+ | lastmod | | range | no | no | no |
+ +------------+-----------+-----------+-----------+-----------+----------+
+ | mid | or | term | no | yes | yes |
+ +------------+-----------+-----------+-----------+-----------+----------+
+ | mimetype | or | phrase | yes | yes | no |
+ +------------+-----------+-----------+-----------+-----------+----------+
+ | path | or | term | yes | yes | yes |
+ +------------+-----------+-----------+-----------+-----------+----------+
+ | property | and | term | yes | yes | yes |
+ +------------+-----------+-----------+-----------+-----------+----------+
+ | subject | and | phrase | yes | yes | yes |
+ +------------+-----------+-----------+-----------+-----------+----------+
+ | tag | and | term | yes | yes | yes |
+ +------------+-----------+-----------+-----------+-----------+----------+
+ | thread | or | term | yes | yes | yes |
+ +------------+-----------+-----------+-----------+-----------+----------+
+ | to | and | phrase | yes | yes | no |
+ +------------+-----------+-----------+-----------+-----------+----------+
+
.. _modifiers:
MODIFIERS
@@ -86,6 +139,10 @@ EXAMPLES
``(not Bob Marley)``
Match messages containing neither "Bob" nor "Marley", nor their stems,
+``(subject quick "brown fox")``
+ Match messages whose subject contains "quick" (anywhere, stemmed) and
+ the phrase "brown fox".
+
.. |q1| replace:: :math:`q_1`
.. |q2| replace:: :math:`q_2`
.. |qn| replace:: :math:`q_n`
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 97bfecbd..a5d41f30 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -7,7 +7,8 @@
* definitions from sexp.h */
typedef enum {
- SEXP_FLAG_NONE = 0,
+ SEXP_FLAG_NONE = 0,
+ SEXP_FLAG_FIELD = 1 << 0,
} _sexp_flag_t;
typedef struct {
@@ -25,6 +26,8 @@ static _sexp_prefix_t prefixes[] =
SEXP_FLAG_NONE },
{ "or", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
SEXP_FLAG_NONE },
+ { "subject", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
+ SEXP_FLAG_FIELD },
{ }
};
@@ -75,8 +78,11 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
if (sx->ty == SEXP_VALUE) {
std::string term = Xapian::Unicode::tolower (sx->val);
Xapian::Stem stem = *(notmuch->stemmer);
+ std::string term_prefix = parent ? _find_prefix (parent->name) : "";
if (sx->aty == SEXP_BASIC)
- term = "Z" + stem (term);
+ term = "Z" + term_prefix + stem (term);
+ else
+ term = term_prefix + term;
output = Xapian::Query (term);
return NOTMUCH_STATUS_SUCCESS;
@@ -96,6 +102,15 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
for (_sexp_prefix_t *prefix = prefixes; prefix && prefix->name; prefix++) {
if (strcmp (prefix->name, sx->list->val) == 0) {
+ if (prefix->flags & SEXP_FLAG_FIELD) {
+ if (parent) {
+ _notmuch_database_log (notmuch, "nested field: '%s' inside '%s'\n",
+ prefix->name, parent->name);
+ return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
+ }
+ parent = prefix;
+ }
+
return _sexp_combine_query (notmuch, parent, prefix->xapian_op, prefix->initial,
sx->list->next, output);
}
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index c82f4a43..5dd45121 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -62,6 +62,54 @@ test_begin_subtest "single term in body, unstemmed version"
notmuch search --query-syntax=sexp '"arriv"' > OUTPUT
test_expect_equal_file /dev/null OUTPUT
+test_begin_subtest "Search by 'subject'"
+add_message [subject]=subjectsearchtest '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
+output=$(notmuch search --query-syntax=sexp '(subject subjectsearchtest)' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; subjectsearchtest (inbox unread)"
+
+test_begin_subtest "Search by 'subject' (case insensitive)"
+notmuch search tag:inbox and subject:maildir | notmuch_search_sanitize > EXPECTED
+notmuch search --query-syntax=sexp '(subject "Maildir")' | notmuch_search_sanitize > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "Search by 'subject' (utf-8):"
+add_message [subject]=utf8-sübjéct '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
+output=$(notmuch search --query-syntax=sexp '(subject utf8 sübjéct)' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)"
+
+test_begin_subtest "Search by 'subject' (utf-8, and):"
+output=$(notmuch search --query-syntax=sexp '(subject (and utf8 sübjéct))' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)"
+
+test_begin_subtest "Search by 'subject' (utf-8, and outside):"
+output=$(notmuch search --query-syntax=sexp '(and (subject utf8) (subject sübjéct))' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)"
+
+test_begin_subtest "Search by 'subject' (utf-8, or):"
+notmuch search --query-syntax=sexp '(subject (or utf8 subjectsearchtest))' | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; subjectsearchtest (inbox unread)
+thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "Search by 'subject' (utf-8, or outside):"
+notmuch search --query-syntax=sexp '(or (subject utf8) (subject subjectsearchtest))' | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; subjectsearchtest (inbox unread)
+thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "Search by 'subject' (utf-8, phrase-token):"
+test_subtest_known_broken
+output=$(notmuch search --query-syntax=sexp '(subject utf8-sübjéct)' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)"
+
+test_begin_subtest "Search by 'subject' (utf-8, quoted string):"
+test_subtest_known_broken
+output=$(notmuch search --query-syntax=sexp '(subject "utf8 sübjéct")' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)"
test_begin_subtest "Unbalanced parens"
# A code 1 indicates the error was handled (a crash will return e.g. 139).
@@ -91,4 +139,12 @@ unexpected list in field/operation position
EOF
test_expect_equal_file EXPECTED OUTPUT
+test_begin_subtest "illegal nesting"
+notmuch search --query-syntax=sexp '(subject (subject foo))' >OUTPUT 2>&1
+cat <<EOF > EXPECTED
+notmuch search: Syntax error in query
+nested field: 'subject' inside 'subject'
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
test_done
--
2.30.2\r
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 11/27] util/unicode: allow calling from C++
2021-07-30 12:55 v3 sexpr query parser David Bremner
` (9 preceding siblings ...)
2021-07-30 12:55 ` [PATCH 10/27] lib/parse-sexp: support subject field David Bremner
@ 2021-07-30 12:55 ` David Bremner
2021-07-30 12:55 ` [PATCH 12/27] lib/parse-sexp: support phrase queries David Bremner
` (15 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2021-07-30 12:55 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
The omission of the 'extern "C"' machinery seems like an oversight.
---
util/unicode-util.h | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/util/unicode-util.h b/util/unicode-util.h
index 32d1e6ef..1bb9336a 100644
--- a/util/unicode-util.h
+++ b/util/unicode-util.h
@@ -4,9 +4,16 @@
#include <stdbool.h>
#include <gmodule.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/* The utf8 encoded string would tokenize as a single word, according
* to xapian. */
bool unicode_word_utf8 (const char *str);
typedef gunichar notmuch_unichar;
+#ifdef __cplusplus
+}
+#endif
#endif
--
2.30.2
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 12/27] lib/parse-sexp: support phrase queries.
2021-07-30 12:55 v3 sexpr query parser David Bremner
` (10 preceding siblings ...)
2021-07-30 12:55 ` [PATCH 11/27] util/unicode: allow calling from C++ David Bremner
@ 2021-07-30 12:55 ` David Bremner
2021-07-30 12:55 ` [PATCH 13/27] lib/parse-sexp: add term prefix backed fields David Bremner
` (14 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2021-07-30 12:55 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
Anything that is quoted or not purely word characters is considered a
phrase. Phrases are not stemmed, because the stems do not have
positional information in the database. It is less efficient to scan
the term twice, but it avoids a second pass to add prefixes, so maybe
it balances out. In any case, it seems unlikely query parsing is very
often a bottleneck.
---
doc/man7/notmuch-sexp-queries.rst | 32 ++++++++++++++++++----
lib/parse-sexp.cc | 44 ++++++++++++++++++++++++++-----
test/T081-sexpr-search.sh | 21 +++++++++++++--
3 files changed, 83 insertions(+), 14 deletions(-)
diff --git a/doc/man7/notmuch-sexp-queries.rst b/doc/man7/notmuch-sexp-queries.rst
index 5427bb0a..570ed6ae 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -41,10 +41,12 @@ subqueries.
The empty list matches all messages
*term*
- Match all messages containing *term*, possibly after
- stemming or phase splitting. For discussion of stemming in
- notmuch see :any:`notmuch-search-terms(7)`. Stemming only applies
- to unquoted terms (basic values) in s-expression queries.
+
+ Match all messages containing *term*, possibly after stemming or
+ phrase splitting. For discussion of stemming in notmuch see
+ :any:`notmuch-search-terms(7)`. Stemming only applies to unquoted
+ terms (basic values) in s-expression queries. For information on
+ phrase splitting see :any:`fields`.
``(`` *field* |q1| |q2| ... |qn| ``)``
Restrict the queries |q1| to |qn| to *field*, and combine with *and*
@@ -64,7 +66,7 @@ subqueries.
FIELDS
``````
-*Fields* (also called *prefixes* in notmuch documentation)
+*Fields* [#aka-pref]_
correspond to attributes of mail messages. Some are inherent (and
immutable) like ``subject``, while others ``tag`` and ``property`` are
settable by the user. Each concrete field in
@@ -73,6 +75,13 @@ is discussed further under "Search prefixes" in
:any:`notmuch-search-terms(7)`. The row *user* refers to user defined
fields, described in :any:`notmuch-config(1)`.
+Most fields are either *phrase fields* [#aka-prob]_ (which match
+sequences of words), or *term fields* [#aka-bool]_ (which match exact
+strings). *Phrase splitting* breaks the term (basic value or quoted
+string) into words, ignore punctuation. Phrase splitting is applied to
+terms in phrase (probabilistic) fields. Both phrase splitting and
+stemming apply only in phrase fields.
+
.. _field-table:
.. table:: Fields with supported modifiers
@@ -139,10 +148,23 @@ EXAMPLES
``(not Bob Marley)``
Match messages containing neither "Bob" nor "Marley", nor their stems,
+``"quick fox"`` ``quick-fox`` ``quick@fox``
+ Match the *phrase* "quick" followed by "fox" in phrase fields (or
+ outside a field). Match the literal string in a term field.
+
``(subject quick "brown fox")``
Match messages whose subject contains "quick" (anywhere, stemmed) and
the phrase "brown fox".
+NOTES
+=====
+
+.. [#aka-pref] a.k.a. prefixes
+
+.. [#aka-prob] a.k.a. probabilistic prefixes
+
+.. [#aka-bool] a.k.a. boolean prefixes
+
.. |q1| replace:: :math:`q_1`
.. |q2| replace:: :math:`q_2`
.. |qn| replace:: :math:`q_n`
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index a5d41f30..4adfc4c5 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -1,5 +1,6 @@
#include "database-private.h"
#include "sexp.h"
+#include "unicode-util.h"
#if HAVE_SFSEXP
@@ -66,6 +67,36 @@ _sexp_combine_query (notmuch_database_t *notmuch,
sx->next, output);
}
+static notmuch_status_t
+_sexp_parse_phrase (std::string term_prefix, const char *phrase, Xapian::Query &output)
+{
+ Xapian::Utf8Iterator p (phrase);
+ Xapian::Utf8Iterator end;
+ std::vector<std::string> terms;
+
+ while (p != end) {
+ Xapian::Utf8Iterator start;
+ while (p != end && ! Xapian::Unicode::is_wordchar (*p))
+ p++;
+
+ if (p == end)
+ break;
+
+ start = p;
+
+ while (p != end && Xapian::Unicode::is_wordchar (*p))
+ p++;
+
+ if (p != start) {
+ std::string word (start, p);
+ word = Xapian::Unicode::tolower (word);
+ terms.push_back (term_prefix + word);
+ }
+ }
+ output = Xapian::Query (Xapian::Query::OP_PHRASE, terms.begin (), terms.end ());
+ return NOTMUCH_STATUS_SUCCESS;
+}
+
/* Here we expect the s-expression to be a proper list, with first
* element defining and operation, or as a special case the empty
* list */
@@ -79,13 +110,12 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
std::string term = Xapian::Unicode::tolower (sx->val);
Xapian::Stem stem = *(notmuch->stemmer);
std::string term_prefix = parent ? _find_prefix (parent->name) : "";
- if (sx->aty == SEXP_BASIC)
- term = "Z" + term_prefix + stem (term);
- else
- term = term_prefix + term;
-
- output = Xapian::Query (term);
- return NOTMUCH_STATUS_SUCCESS;
+ if (sx->aty == SEXP_BASIC && unicode_word_utf8 (sx->val)) {
+ output = Xapian::Query ("Z" + term_prefix + stem (term));
+ return NOTMUCH_STATUS_SUCCESS;
+ } else {
+ return _sexp_parse_phrase (term_prefix, sx->val, output);
+ }
}
/* Empty list */
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index 5dd45121..6c1e2536 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -102,15 +102,32 @@ EOF
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "Search by 'subject' (utf-8, phrase-token):"
-test_subtest_known_broken
output=$(notmuch search --query-syntax=sexp '(subject utf8-sübjéct)' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)"
test_begin_subtest "Search by 'subject' (utf-8, quoted string):"
-test_subtest_known_broken
output=$(notmuch search --query-syntax=sexp '(subject "utf8 sübjéct")' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)"
+test_begin_subtest "Search by 'subject' (combine phrase, term):"
+output=$(notmuch search --query-syntax=sexp '(subject Mac "compatibility issues")' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2009-11-18 [4/4] Jjgod Jiang, Alexander Botero-Lowry; [notmuch] Mac OS X/Darwin compatibility issues (inbox unread)"
+
+test_begin_subtest "Search by 'subject' (combine phrase, term 2):"
+notmuch search --query-syntax=sexp '(subject (or utf8 "compatibility issues"))' | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX 2009-11-18 [4/4] Jjgod Jiang, Alexander Botero-Lowry; [notmuch] Mac OS X/Darwin compatibility issues (inbox unread)
+thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "Search by 'subject' (combine phrase, term 3):"
+notmuch search --query-syntax=sexp '(subject issues X/Darwin)' | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX 2009-11-18 [4/4] Jjgod Jiang, Alexander Botero-Lowry; [notmuch] Mac OS X/Darwin compatibility issues (inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
test_begin_subtest "Unbalanced parens"
# A code 1 indicates the error was handled (a crash will return e.g. 139).
test_expect_code 1 "notmuch search --query-syntax=sexp '('"
--
2.30.2\r
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 13/27] lib/parse-sexp: add term prefix backed fields
2021-07-30 12:55 v3 sexpr query parser David Bremner
` (11 preceding siblings ...)
2021-07-30 12:55 ` [PATCH 12/27] lib/parse-sexp: support phrase queries David Bremner
@ 2021-07-30 12:55 ` David Bremner
2021-07-30 12:55 ` [PATCH 14/27] lib/parse-sexp: 'starts-with' wildcard searches David Bremner
` (13 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2021-07-30 12:55 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
We use "boolean" to describe fields that should generate terms
literally without stemming or phrase splitting. This terminology
might not be ideal but it is already enshrined in
notmuch-search-terms(7).
---
doc/man7/notmuch-sexp-queries.rst | 18 +++++-
lib/parse-sexp.cc | 49 ++++++++++++++++
test/T081-sexpr-search.sh | 94 +++++++++++++++++++++++++++++++
3 files changed, 160 insertions(+), 1 deletion(-)
diff --git a/doc/man7/notmuch-sexp-queries.rst b/doc/man7/notmuch-sexp-queries.rst
index 570ed6ae..83017b38 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -82,6 +82,14 @@ string) into words, ignore punctuation. Phrase splitting is applied to
terms in phrase (probabilistic) fields. Both phrase splitting and
stemming apply only in phrase fields.
+Each term or phrase field has an associated combining operator
+(``and`` or ``or``) used to combine the queries from each element of
+the tail of the list. This is generally ``or`` for those fields where
+a message has one such attribute, and ``and`` otherwise.
+
+Term or phrase fields can contain arbitrarily complex queries made up
+from terms, operators, and modifiers, but not other fields.
+
.. _field-table:
.. table:: Fields with supported modifiers
@@ -113,7 +121,7 @@ stemming apply only in phrase fields.
+------------+-----------+-----------+-----------+-----------+----------+
| mimetype | or | phrase | yes | yes | no |
+------------+-----------+-----------+-----------+-----------+----------+
- | path | or | term | yes | yes | yes |
+ | path | or | term | no | yes | yes |
+------------+-----------+-----------+-----------+-----------+----------+
| property | and | term | yes | yes | yes |
+------------+-----------+-----------+-----------+-----------+----------+
@@ -152,10 +160,18 @@ EXAMPLES
Match the *phrase* "quick" followed by "fox" in phrase fields (or
outside a field). Match the literal string in a term field.
+``(id 1234@invalid blah@test)``
+ Matches Message-Id "1234@invalid" *or* Message-Id "blah@test"
+
``(subject quick "brown fox")``
Match messages whose subject contains "quick" (anywhere, stemmed) and
the phrase "brown fox".
+``(to (or bob@example.com mallory@example.org))`` ``(or (to bob@example.com) (to mallory@example.org))``
+ Match in the "To" or "Cc" headers, "bob@example.com",
+ "mallory@example.org", and also "bob@example.com.au" since it
+ contains the adjacent triple "bob", "example", "com".
+
NOTES
=====
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 4adfc4c5..9727c57d 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -10,8 +10,26 @@
typedef enum {
SEXP_FLAG_NONE = 0,
SEXP_FLAG_FIELD = 1 << 0,
+ SEXP_FLAG_BOOLEAN = 1 << 1,
} _sexp_flag_t;
+/*
+ * define bitwise operators to hide casts */
+
+inline _sexp_flag_t
+operator| (_sexp_flag_t a, _sexp_flag_t b)
+{
+ return static_cast<_sexp_flag_t>(
+ static_cast<unsigned>(a) | static_cast<unsigned>(b));
+}
+
+inline _sexp_flag_t
+operator& (_sexp_flag_t a, _sexp_flag_t b)
+{
+ return static_cast<_sexp_flag_t>(
+ static_cast<unsigned>(a) & static_cast<unsigned>(b));
+}
+
typedef struct {
const char *name;
Xapian::Query::op xapian_op;
@@ -23,12 +41,39 @@ static _sexp_prefix_t prefixes[] =
{
{ "and", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_NONE },
+ { "attachment", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
+ SEXP_FLAG_FIELD },
+ { "body", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
+ SEXP_FLAG_FIELD },
+ { "from", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
+ SEXP_FLAG_FIELD },
+ { "folder", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
+ SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+ { "id", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
+ SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+ { "is", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
+ SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+ { "mid", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
+ SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+ { "mimetype", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
+ SEXP_FLAG_FIELD },
{ "not", Xapian::Query::OP_AND_NOT, Xapian::Query::MatchAll,
SEXP_FLAG_NONE },
{ "or", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
SEXP_FLAG_NONE },
+ { "path", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
+ SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+ { "property", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
+ SEXP_FLAG_FIELD
+ | SEXP_FLAG_BOOLEAN },
{ "subject", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_FIELD },
+ { "tag", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
+ SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+ { "thread", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
+ SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+ { "to", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
+ SEXP_FLAG_FIELD },
{ }
};
@@ -110,6 +155,10 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
std::string term = Xapian::Unicode::tolower (sx->val);
Xapian::Stem stem = *(notmuch->stemmer);
std::string term_prefix = parent ? _find_prefix (parent->name) : "";
+ if (parent && (parent->flags & SEXP_FLAG_BOOLEAN)) {
+ output = Xapian::Query (term_prefix + sx->val);
+ return NOTMUCH_STATUS_SUCCESS;
+ }
if (sx->aty == SEXP_BASIC && unicode_word_utf8 (sx->val)) {
output = Xapian::Query ("Z" + term_prefix + stem (term));
return NOTMUCH_STATUS_SUCCESS;
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index 6c1e2536..84f67d11 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -101,6 +101,99 @@ thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)
EOF
test_expect_equal_file EXPECTED OUTPUT
+test_begin_subtest "Search by 'attachment'"
+notmuch search attachment:notmuch-help.patch > EXPECTED
+notmuch search --query-syntax=sexp '(attachment notmuch-help.patch)' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "Search by 'body'"
+add_message '[subject]="body search"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' [body]=bodysearchtest
+output=$(notmuch search --query-syntax=sexp '(body bodysearchtest)' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; body search (inbox unread)"
+
+test_begin_subtest "Search by 'body' (phrase)"
+add_message '[subject]="body search (phrase)"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' '[body]="body search (phrase)"'
+add_message '[subject]="negative result"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' '[body]="This phrase should not match the body search"'
+output=$(notmuch search --query-syntax=sexp '(body "body search phrase")' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; body search (phrase) (inbox unread)"
+
+test_begin_subtest "Search by 'body' (utf-8):"
+add_message '[subject]="utf8-message-body-subject"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' '[body]="message body utf8: bödý"'
+output=$(notmuch search --query-syntax=sexp '(body bödý)' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-message-body-subject (inbox unread)"
+
+test_begin_subtest "Search by 'from'"
+add_message '[subject]="search by from"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' [from]=searchbyfrom
+output=$(notmuch search --query-syntax=sexp '(from searchbyfrom)' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] searchbyfrom; search by from (inbox unread)"
+
+test_begin_subtest "Search by 'from' (address)"
+add_message '[subject]="search by from (address)"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' [from]=searchbyfrom@example.com
+output=$(notmuch search --query-syntax=sexp '(from searchbyfrom@example.com)' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] searchbyfrom@example.com; search by from (address) (inbox unread)"
+
+test_begin_subtest "Search by 'from' (name)"
+add_message '[subject]="search by from (name)"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' '[from]="Search By From Name <test@example.com>"'
+output=$(notmuch search --query-syntax=sexp '(from "Search By From Name")' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Search By From Name; search by from (name) (inbox unread)"
+
+test_begin_subtest "Search by 'from' (name and address)"
+output=$(notmuch search --query-syntax=sexp '(from "Search By From Name <test@example.com>")' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Search By From Name; search by from (name) (inbox unread)"
+
+add_message '[dir]=bad' '[subject]="To the bone"'
+add_message '[dir]=.' '[subject]="Top level"'
+add_message '[dir]=bad/news' '[subject]="Bears"'
+mkdir -p "${MAIL_DIR}/duplicate/bad/news"
+cp "$gen_msg_filename" "${MAIL_DIR}/duplicate/bad/news"
+
+add_message '[dir]=things' '[subject]="These are a few"'
+add_message '[dir]=things/favorite' '[subject]="Raindrops, whiskers, kettles"'
+add_message '[dir]=things/bad' '[subject]="Bites, stings, sad feelings"'
+
+test_begin_subtest "Search by 'folder' (multiple)"
+output=$(notmuch search --query-syntax=sexp '(folder bad bad/news things/bad)' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; To the bone (inbox unread)
+thread:XXX 2001-01-05 [1/1(2)] Notmuch Test Suite; Bears (inbox unread)
+thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Bites, stings, sad feelings (inbox unread)"
+
+test_begin_subtest "Search by 'folder': top level."
+notmuch search folder:'""' > EXPECTED
+notmuch search --query-syntax=sexp '(folder "")' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "Search by 'id'"
+add_message '[subject]="search by id"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
+output=$(notmuch search --query-syntax=sexp "(id ${gen_msg_id})" | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by id (inbox unread)"
+
+test_begin_subtest "Search by 'id' (or)"
+add_message '[subject]="search by id"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
+output=$(notmuch search --query-syntax=sexp "(id non-existent-mid ${gen_msg_id})" | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by id (inbox unread)"
+
+test_begin_subtest "Search by 'is' (multiple)"
+notmuch tag -inbox tag:searchbytag
+notmuch search is:inbox AND is:unread | notmuch_search_sanitize > EXPECTED
+notmuch search --query-syntax=sexp '(is inbox unread)' | notmuch_search_sanitize > OUTPUT
+notmuch tag +inbox tag:searchbytag
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "Search by 'mid'"
+add_message '[subject]="search by mid"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
+output=$(notmuch search --query-syntax=sexp "(mid ${gen_msg_id})" | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by mid (inbox unread)"
+
+test_begin_subtest "Search by 'mid' (or)"
+add_message '[subject]="search by mid"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
+output=$(notmuch search --query-syntax=sexp "(mid non-existent-mid ${gen_msg_id})" | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by mid (inbox unread)"
+
+test_begin_subtest "Search by 'mimetype'"
+notmuch search mimetype:text/html > EXPECTED
+notmuch search --query-syntax=sexp '(mimetype text html)' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
test_begin_subtest "Search by 'subject' (utf-8, phrase-token):"
output=$(notmuch search --query-syntax=sexp '(subject utf8-sübjéct)' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)"
@@ -118,6 +211,7 @@ notmuch search --query-syntax=sexp '(subject (or utf8 "compatibility issues"))'
cat <<EOF > EXPECTED
thread:XXX 2009-11-18 [4/4] Jjgod Jiang, Alexander Botero-Lowry; [notmuch] Mac OS X/Darwin compatibility issues (inbox unread)
thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)
+thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-message-body-subject (inbox unread)
EOF
test_expect_equal_file EXPECTED OUTPUT
--
2.30.2\r
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 14/27] lib/parse-sexp: 'starts-with' wildcard searches
2021-07-30 12:55 v3 sexpr query parser David Bremner
` (12 preceding siblings ...)
2021-07-30 12:55 ` [PATCH 13/27] lib/parse-sexp: add term prefix backed fields David Bremner
@ 2021-07-30 12:55 ` David Bremner
2021-07-30 12:55 ` [PATCH 15/27] lib/parse-sexp: add '*' as syntactic sugar for '(starts-with "")' David Bremner
` (12 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2021-07-30 12:55 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
The many tests potentially overkill, but they could catch typos in the
prefixes table. As a simplifying assumption, for now we assume a
single argument to the wildcard operator, as this matches the Xapian
semantics. The name 'starts-with' is chosen to emphasize the supported
case of wildcards in currrent (1.4.x) Xapian.
---
doc/man7/notmuch-sexp-queries.rst | 13 ++
lib/parse-sexp.cc | 61 +++++++---
test/T081-sexpr-search.sh | 196 ++++++++++++++++++++++++++++++
3 files changed, 255 insertions(+), 15 deletions(-)
diff --git a/doc/man7/notmuch-sexp-queries.rst b/doc/man7/notmuch-sexp-queries.rst
index 83017b38..41db6fd3 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -163,10 +163,20 @@ EXAMPLES
``(id 1234@invalid blah@test)``
Matches Message-Id "1234@invalid" *or* Message-Id "blah@test"
+``(starts-with prelim)``
+ Match any words starting with "prelim".
+
``(subject quick "brown fox")``
Match messages whose subject contains "quick" (anywhere, stemmed) and
the phrase "brown fox".
+``(subject (starts-with prelim))``
+ Matches any word starting with "prelim", inside a message subject.
+
+``(subject (starts-wih quick) "brown fox")``
+ Match messages whose subject contains "quick brown fox", but also
+ "brown fox quicksand".
+
``(to (or bob@example.com mallory@example.org))`` ``(or (to bob@example.com) (to mallory@example.org))``
Match in the "To" or "Cc" headers, "bob@example.com",
"mallory@example.org", and also "bob@example.com.au" since it
@@ -181,6 +191,9 @@ NOTES
.. [#aka-bool] a.k.a. boolean prefixes
+.. [#not-body] Due the the way ``body`` is implemented in notmuch,
+ this modifier is not supported in the ``body`` field.
+
.. |q1| replace:: :math:`q_1`
.. |q2| replace:: :math:`q_2`
.. |qn| replace:: :math:`q_n`
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 9727c57d..d717efc5 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -11,6 +11,8 @@ typedef enum {
SEXP_FLAG_NONE = 0,
SEXP_FLAG_FIELD = 1 << 0,
SEXP_FLAG_BOOLEAN = 1 << 1,
+ SEXP_FLAG_SINGLE = 1 << 2,
+ SEXP_FLAG_WILDCARD = 1 << 3,
} _sexp_flag_t;
/*
@@ -42,38 +44,39 @@ static _sexp_prefix_t prefixes[] =
{ "and", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_NONE },
{ "attachment", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
- SEXP_FLAG_FIELD },
+ SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
{ "body", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_FIELD },
{ "from", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
- SEXP_FLAG_FIELD },
+ SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
{ "folder", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
- SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+ SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
{ "id", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
- SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+ SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
{ "is", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
- SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+ SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
{ "mid", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
- SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+ SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
{ "mimetype", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
- SEXP_FLAG_FIELD },
+ SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
{ "not", Xapian::Query::OP_AND_NOT, Xapian::Query::MatchAll,
SEXP_FLAG_NONE },
{ "or", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
SEXP_FLAG_NONE },
{ "path", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
- SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+ SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
{ "property", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
- SEXP_FLAG_FIELD
- | SEXP_FLAG_BOOLEAN },
+ SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
+ { "starts-with", Xapian::Query::OP_WILDCARD, Xapian::Query::MatchAll,
+ SEXP_FLAG_SINGLE },
{ "subject", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
- SEXP_FLAG_FIELD },
+ SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
{ "tag", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
- SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+ SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
{ "thread", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
- SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+ SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
{ "to", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
- SEXP_FLAG_FIELD },
+ SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
{ }
};
@@ -142,6 +145,25 @@ _sexp_parse_phrase (std::string term_prefix, const char *phrase, Xapian::Query &
return NOTMUCH_STATUS_SUCCESS;
}
+static notmuch_status_t
+_sexp_parse_wildcard (notmuch_database_t *notmuch,
+ const _sexp_prefix_t *parent,
+ std::string match,
+ Xapian::Query &output)
+{
+
+ std::string term_prefix = parent ? _find_prefix (parent->name) : "";
+
+ if (parent && ! (parent->flags & SEXP_FLAG_WILDCARD)) {
+ _notmuch_database_log (notmuch, "'%s' does not support wildcard queries\n", parent->name);
+ return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
+ }
+
+ output = Xapian::Query (Xapian::Query::OP_WILDCARD,
+ term_prefix + Xapian::Unicode::tolower (match));
+ return NOTMUCH_STATUS_SUCCESS;
+}
+
/* Here we expect the s-expression to be a proper list, with first
* element defining and operation, or as a special case the empty
* list */
@@ -150,7 +172,6 @@ static notmuch_status_t
_sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent, const sexp_t *sx,
Xapian::Query &output)
{
-
if (sx->ty == SEXP_VALUE) {
std::string term = Xapian::Unicode::tolower (sx->val);
Xapian::Stem stem = *(notmuch->stemmer);
@@ -190,6 +211,16 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
parent = prefix;
}
+ if ((prefix->flags & SEXP_FLAG_SINGLE) &&
+ (! sx->list->next || sx->list->next->next || sx->list->next->ty != SEXP_VALUE)) {
+ _notmuch_database_log (notmuch, "'%s' expects single atom as argument\n",
+ prefix->name);
+ return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
+ }
+
+ if (prefix->xapian_op == Xapian::Query::OP_WILDCARD)
+ return _sexp_parse_wildcard (notmuch, parent, sx->list->next->val, output);
+
return _sexp_combine_query (notmuch, parent, prefix->xapian_op, prefix->initial,
sx->list->next, output);
}
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index 84f67d11..fe9663bd 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -222,6 +222,170 @@ thread:XXX 2009-11-18 [4/4] Jjgod Jiang, Alexander Botero-Lowry; [notmuch] Mac
EOF
test_expect_equal_file EXPECTED OUTPUT
+test_begin_subtest "Search by 'tag'"
+add_message '[subject]="search by tag"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
+notmuch tag +searchbytag id:${gen_msg_id}
+output=$(notmuch search --query-syntax=sexp '(tag searchbytag)' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by tag (inbox searchbytag unread)"
+
+test_begin_subtest "Search by 'tag' (multiple)"
+notmuch tag -inbox tag:searchbytag
+notmuch search tag:inbox AND tag:unread | notmuch_search_sanitize > EXPECTED
+notmuch search --query-syntax=sexp '(tag inbox unread)' | notmuch_search_sanitize > OUTPUT
+notmuch tag +inbox tag:searchbytag
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "Search by 'tag' and 'subject'"
+notmuch search tag:inbox and subject:maildir | notmuch_search_sanitize > EXPECTED
+notmuch search --query-syntax=sexp '(and (tag inbox) (subject maildir))' | notmuch_search_sanitize > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "Search by 'thread'"
+add_message '[subject]="search by thread"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
+thread_id=$(notmuch search id:${gen_msg_id} | sed -e "s/thread:\([a-f0-9]*\).*/\1/")
+output=$(notmuch search --query-syntax=sexp "(thread ${thread_id})" | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by thread (inbox unread)"
+
+test_begin_subtest "Search by 'to'"
+add_message '[subject]="search by to"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' [to]=searchbyto
+output=$(notmuch search --query-syntax=sexp '(to searchbyto)' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (inbox unread)"
+
+test_begin_subtest "Search by 'to' (address)"
+add_message '[subject]="search by to (address)"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' [to]=searchbyto@example.com
+output=$(notmuch search --query-syntax=sexp '(to searchbyto@example.com)' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (address) (inbox unread)"
+
+test_begin_subtest "Search by 'to' (name)"
+add_message '[subject]="search by to (name)"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' '[to]="Search By To Name <test@example.com>"'
+output=$(notmuch search --query-syntax=sexp '(to "Search By To Name")' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (name) (inbox unread)"
+
+test_begin_subtest "Search by 'to' (name and address)"
+output=$(notmuch search --query-syntax=sexp '(to "Search By To Name <test@example.com>")' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (name) (inbox unread)"
+
+test_begin_subtest "starts-with, no prefix"
+output=$(notmuch search --query-syntax=sexp '(starts-with prelim)' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2009-11-17 [2/2] Alex Botero-Lowry, Carl Worth; [notmuch] preliminary FreeBSD support (attachment inbox unread)"
+
+test_begin_subtest "starts-with, case-insensitive"
+notmuch search --query-syntax=sexp '(starts-with FreeB)' | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX 2009-11-18 [3/4] Alexander Botero-Lowry, Jjgod Jiang; [notmuch] Mac OS X/Darwin compatibility issues (inbox unread)
+thread:XXX 2009-11-17 [2/2] Alex Botero-Lowry, Carl Worth; [notmuch] preliminary FreeBSD support (attachment inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "starts-with, no prefix, all messages"
+notmuch search --query-syntax=sexp '(starts-with "")' | notmuch_search_sanitize > OUTPUT
+notmuch search '*' | notmuch_search_sanitize > EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "starts-with, attachment"
+output=$(notmuch search --query-syntax=sexp '(attachment (starts-with not))' | notmuch_search_sanitize)
+test_expect_equal "$output" 'thread:XXX 2009-11-18 [2/2] Lars Kellogg-Stedman; [notmuch] "notmuch help" outputs to stderr? (attachment inbox signed unread)'
+
+test_begin_subtest "starts-with, folder"
+notmuch search --output=files --query-syntax=sexp '(folder (starts-with bad))' | notmuch_dir_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+MAIL_DIR/bad/msg-010
+MAIL_DIR/bad/news/msg-012
+MAIL_DIR/duplicate/bad/news/msg-012
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "starts-with, from"
+notmuch search --query-syntax=sexp '(from (starts-with Mik))' | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX 2009-11-17 [1/1] Mikhail Gusarov; [notmuch] [PATCH] Handle rename of message file (inbox unread)
+thread:XXX 2009-11-17 [2/7] Mikhail Gusarov| Lars Kellogg-Stedman, Keith Packard, Carl Worth; [notmuch] Working with Maildir storage? (inbox signed unread)
+thread:XXX 2009-11-17 [2/5] Mikhail Gusarov| Carl Worth, Keith Packard; [notmuch] [PATCH 2/2] Include <stdint.h> to get uint32_t in C++ file with gcc 4.4 (inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "starts-with, id"
+notmuch search --query-syntax=sexp --output=messages '(id (starts-with 877))' > OUTPUT
+cat <<EOF > EXPECTED
+id:877h1wv7mg.fsf@inf-8657.int-evry.fr
+id:877htoqdbo.fsf@yoom.home.cworth.org
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "starts-with, is"
+output=$(notmuch search --query-syntax=sexp '(is (starts-with searchby))' | notmuch_search_sanitize)
+test_expect_equal "$output" 'thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by tag (inbox searchbytag unread)'
+
+test_begin_subtest "starts-with, mid"
+notmuch search --query-syntax=sexp --output=messages '(mid (starts-with 877))' > OUTPUT
+cat <<EOF > EXPECTED
+id:877h1wv7mg.fsf@inf-8657.int-evry.fr
+id:877htoqdbo.fsf@yoom.home.cworth.org
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "starts-with, mimetype"
+notmuch search --query-syntax=sexp '(mimetype (starts-with sig))' | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX 2009-11-18 [2/2] Lars Kellogg-Stedman; [notmuch] "notmuch help" outputs to stderr? (attachment inbox signed unread)
+thread:XXX 2009-11-18 [4/7] Lars Kellogg-Stedman, Mikhail Gusarov| Keith Packard, Carl Worth; [notmuch] Working with Maildir storage? (inbox signed unread)
+thread:XXX 2009-11-17 [1/3] Adrian Perez de Castro| Keith Packard, Carl Worth; [notmuch] Introducing myself (inbox signed unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+add_message '[subject]="message with properties"'
+notmuch restore <<EOF
+#= ${gen_msg_id} foo=bar
+EOF
+
+test_begin_subtest "starts-with, property"
+notmuch search --query-syntax=sexp '(property (starts-with foo=))' | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; message with properties (inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "starts-with, subject"
+notmuch search --query-syntax=sexp '(subject (starts-with FreeB))' | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX 2009-11-17 [2/2] Alex Botero-Lowry, Carl Worth; [notmuch] preliminary FreeBSD support (attachment inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "starts-with, tag"
+output=$(notmuch search --query-syntax=sexp '(tag (starts-with searchby))' | notmuch_search_sanitize)
+test_expect_equal "$output" 'thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by tag (inbox searchbytag unread)'
+
+add_message '[subject]="no tags"'
+notag_mid=${gen_msg_id}
+notmuch tag -unread -inbox id:${notag_mid}
+
+test_begin_subtest "negated starts-with, tag"
+output=$(notmuch search --query-syntax=sexp '(tag (not (starts-with in)))' | notmuch_search_sanitize)
+test_expect_equal "$output" 'thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; no tags ()'
+
+test_begin_subtest "negated starts-with, tag 2"
+output=$(notmuch search --query-syntax=sexp '(not (tag (starts-with in)))' | notmuch_search_sanitize)
+test_expect_equal "$output" 'thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; no tags ()'
+
+test_begin_subtest "negated starts-with, tag 3"
+output=$(notmuch search --query-syntax=sexp '(not (tag (starts-with "")))' | notmuch_search_sanitize)
+test_expect_equal "$output" 'thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; no tags ()'
+
+test_begin_subtest "starts-with, thread"
+notmuch search --query-syntax=sexp '(thread (starts-with "00"))' > OUTPUT
+notmuch search '*' > EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "starts-with, to"
+notmuch search --query-syntax=sexp '(to (starts-with "search"))' | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (inbox unread)
+thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (address) (inbox unread)
+thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (name) (inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
test_begin_subtest "Unbalanced parens"
# A code 1 indicates the error was handled (a crash will return e.g. 139).
test_expect_code 1 "notmuch search --query-syntax=sexp '('"
@@ -258,4 +422,36 @@ nested field: 'subject' inside 'subject'
EOF
test_expect_equal_file EXPECTED OUTPUT
+test_begin_subtest "starts-with, no argument"
+notmuch search --query-syntax=sexp '(starts-with)' >OUTPUT 2>&1
+cat <<EOF > EXPECTED
+notmuch search: Syntax error in query
+'starts-with' expects single atom as argument
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "starts-with, list argument"
+notmuch search --query-syntax=sexp '(starts-with (stuff))' >OUTPUT 2>&1
+cat <<EOF > EXPECTED
+notmuch search: Syntax error in query
+'starts-with' expects single atom as argument
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "starts-with, too many arguments"
+notmuch search --query-syntax=sexp '(starts-with a b c)' >OUTPUT 2>&1
+cat <<EOF > EXPECTED
+notmuch search: Syntax error in query
+'starts-with' expects single atom as argument
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "starts-with, illegal field"
+notmuch search --query-syntax=sexp '(body (starts-with foo))' >OUTPUT 2>&1
+cat <<EOF > EXPECTED
+notmuch search: Syntax error in query
+'body' does not support wildcard queries
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
test_done
--
2.30.2
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 15/27] lib/parse-sexp: add '*' as syntactic sugar for '(starts-with "")'
2021-07-30 12:55 v3 sexpr query parser David Bremner
` (13 preceding siblings ...)
2021-07-30 12:55 ` [PATCH 14/27] lib/parse-sexp: 'starts-with' wildcard searches David Bremner
@ 2021-07-30 12:55 ` David Bremner
2021-07-30 12:55 ` [PATCH 16/27] lib/parse-sexp: handle unprefixed terms David Bremner
` (11 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2021-07-30 12:55 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
Users that insist on using a literal '*' as a tag, can continue to do
so by quoting it when searching.
---
doc/man7/notmuch-sexp-queries.rst | 14 +++++++++
lib/parse-sexp.cc | 5 ++++
test/T081-sexpr-search.sh | 48 +++++++++++++++++++++++++++++++
3 files changed, 67 insertions(+)
diff --git a/doc/man7/notmuch-sexp-queries.rst b/doc/man7/notmuch-sexp-queries.rst
index 41db6fd3..3f4299de 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -37,6 +37,8 @@ a *field*, *logical operation*, or *modifier*, and 0 or more
subqueries.
``*``
+ "*" matches any non-empty string in the current field.
+
``()``
The empty list matches all messages
@@ -139,6 +141,15 @@ from terms, operators, and modifiers, but not other fields.
MODIFIERS
`````````
+*Modifiers* refer to any prefixes (first elements of compound queries)
+that are neither operators nor fields.
+
+``(starts-with`` *subword* ``)``
+ Matches any term starting with *subword*. This applies in either
+ phrase or term :any:`fields <fields>`, or outside of fields [#not-body]_. Note that
+ a ``starts-with`` query cannot be part of a phrase. The
+ atom ``*`` is a synonym for ``(starts-with "")``.
+
EXAMPLES
========
@@ -182,6 +193,9 @@ EXAMPLES
"mallory@example.org", and also "bob@example.com.au" since it
contains the adjacent triple "bob", "example", "com".
+``(not (to *))``
+ Match messages with an empty or invalid 'To' and 'Cc' field.
+
NOTES
=====
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index d717efc5..caffbe59 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -176,6 +176,11 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
std::string term = Xapian::Unicode::tolower (sx->val);
Xapian::Stem stem = *(notmuch->stemmer);
std::string term_prefix = parent ? _find_prefix (parent->name) : "";
+
+ if (sx->aty == SEXP_BASIC && strcmp (sx->val, "*") == 0) {
+ return _sexp_parse_wildcard (notmuch, parent, "", output);
+ }
+
if (parent && (parent->flags & SEXP_FLAG_BOOLEAN)) {
output = Xapian::Query (term_prefix + sx->val);
return NOTMUCH_STATUS_SUCCESS;
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index fe9663bd..8ec585bb 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -386,6 +386,46 @@ thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (name) (inbox unr
EOF
test_expect_equal_file EXPECTED OUTPUT
+test_begin_subtest "wildcard search for 'is'"
+notmuch search not id:${notag_mid} > EXPECTED
+notmuch search --query-syntax=sexp '(is *)' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "negated wildcard search for 'is'"
+notmuch search id:${notag_mid} > EXPECTED
+notmuch search --query-syntax=sexp '(not (is *))' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "wildcard search for 'property'"
+notmuch search property:foo=bar > EXPECTED
+notmuch search --query-syntax=sexp '(property *)' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "wildcard search for 'tag'"
+notmuch search not id:${notag_mid} > EXPECTED
+notmuch search --query-syntax=sexp '(tag *)' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "negated wildcard search for 'tag'"
+notmuch search id:${notag_mid} > EXPECTED
+notmuch search --query-syntax=sexp '(not (tag *))' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+add_message '[subject]="message with tag \"*\""'
+notmuch tag '+*' id:${gen_msg_id}
+
+test_begin_subtest "search for 'tag' \"*\""
+output=$(notmuch search --query-syntax=sexp --output=messages '(tag "*")')
+test_expect_equal "$output" "id:$gen_msg_id"
+
+test_begin_subtest "search for missing / empty to"
+add_message [to]="undisclosed-recipients:"
+notmuch search --query-syntax=sexp '(not (to *))' | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; search for missing / empty to (inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
test_begin_subtest "Unbalanced parens"
# A code 1 indicates the error was handled (a crash will return e.g. 139).
test_expect_code 1 "notmuch search --query-syntax=sexp '('"
@@ -454,4 +494,12 @@ notmuch search: Syntax error in query
EOF
test_expect_equal_file EXPECTED OUTPUT
+test_begin_subtest "wildcard, illegal field"
+notmuch search --query-syntax=sexp '(body *)' >OUTPUT 2>&1
+cat <<EOF > EXPECTED
+notmuch search: Syntax error in query
+'body' does not support wildcard queries
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
test_done
--
2.30.2
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 16/27] lib/parse-sexp: handle unprefixed terms.
2021-07-30 12:55 v3 sexpr query parser David Bremner
` (14 preceding siblings ...)
2021-07-30 12:55 ` [PATCH 15/27] lib/parse-sexp: add '*' as syntactic sugar for '(starts-with "")' David Bremner
@ 2021-07-30 12:55 ` David Bremner
2021-07-30 12:55 ` [PATCH 17/27] lib/query: generalize exclude handling to s-expression queries David Bremner
` (10 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2021-07-30 12:55 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
This is equivalent to adding the same field name "" for multiple
prefixes in the Xapian query parser, but we have to explicitely
construct the resulting query.
---
lib/parse-sexp.cc | 36 ++++++++++++++++++++++++++++++++----
test/T081-sexpr-search.sh | 35 +++++++++++++++++++++++++++++++----
2 files changed, 63 insertions(+), 8 deletions(-)
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index caffbe59..56bd7e4b 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -164,6 +164,22 @@ _sexp_parse_wildcard (notmuch_database_t *notmuch,
return NOTMUCH_STATUS_SUCCESS;
}
+static notmuch_status_t
+_sexp_parse_one_term (notmuch_database_t *notmuch, std::string term_prefix, const sexp_t *sx,
+ Xapian::Query &output)
+{
+ Xapian::Stem stem = *(notmuch->stemmer);
+
+ if (sx->aty == SEXP_BASIC && unicode_word_utf8 (sx->val)) {
+ std::string term = Xapian::Unicode::tolower (sx->val);
+
+ output = Xapian::Query ("Z" + term_prefix + stem (term));
+ return NOTMUCH_STATUS_SUCCESS;
+ } else {
+ return _sexp_parse_phrase (term_prefix, sx->val, output);
+ }
+
+}
/* Here we expect the s-expression to be a proper list, with first
* element defining and operation, or as a special case the empty
* list */
@@ -185,11 +201,23 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
output = Xapian::Query (term_prefix + sx->val);
return NOTMUCH_STATUS_SUCCESS;
}
- if (sx->aty == SEXP_BASIC && unicode_word_utf8 (sx->val)) {
- output = Xapian::Query ("Z" + term_prefix + stem (term));
- return NOTMUCH_STATUS_SUCCESS;
+ if (parent) {
+ return _sexp_parse_one_term (notmuch, term_prefix, sx, output);
} else {
- return _sexp_parse_phrase (term_prefix, sx->val, output);
+ Xapian::Query accumulator;
+ for (_sexp_prefix_t *prefix = prefixes; prefix->name; prefix++) {
+ if (prefix->flags & SEXP_FLAG_FIELD) {
+ notmuch_status_t status;
+ Xapian::Query subquery;
+ term_prefix = _find_prefix (prefix->name);
+ status = _sexp_parse_one_term (notmuch, term_prefix, sx, subquery);
+ if (status)
+ return status;
+ accumulator = Xapian::Query (Xapian::Query::OP_OR, accumulator, subquery);
+ }
+ }
+ output = accumulator;
+ return NOTMUCH_STATUS_SUCCESS;
}
}
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index 8ec585bb..13b47324 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -111,6 +111,10 @@ add_message '[subject]="body search"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
output=$(notmuch search --query-syntax=sexp '(body bodysearchtest)' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; body search (inbox unread)"
+test_begin_subtest "Search by body (unprefixed)"
+output=$(notmuch search --query-syntax=sexp '(and bodysearchtest)' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; body search (inbox unread)"
+
test_begin_subtest "Search by 'body' (phrase)"
add_message '[subject]="body search (phrase)"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' '[body]="body search (phrase)"'
add_message '[subject]="negative result"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' '[body]="This phrase should not match the body search"'
@@ -122,6 +126,29 @@ add_message '[subject]="utf8-message-body-subject"' '[date]="Sat, 01 Jan 2000 12
output=$(notmuch search --query-syntax=sexp '(body bödý)' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-message-body-subject (inbox unread)"
+add_message "[body]=thebody-1" "[subject]=kryptonite-1"
+add_message "[body]=nothing-to-see-here-1" "[subject]=thebody-1"
+
+test_begin_subtest 'search without body: prefix'
+notmuch search thebody > EXPECTED
+notmuch search --query-syntax=sexp '(and thebody)' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest 'negated body: prefix'
+notmuch search thebody and not body:thebody > EXPECTED
+notmuch search --query-syntax=sexp '(and (not (body thebody)) thebody)' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest 'search unprefixed for prefixed term'
+notmuch search kryptonite > EXPECTED
+notmuch search --query-syntax=sexp '(and kryptonite)' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest 'search with body: prefix for term only in subject'
+notmuch search body:kryptonite > EXPECTED
+notmuch search --query-syntax=sexp '(body kryptonite)' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
test_begin_subtest "Search by 'from'"
add_message '[subject]="search by from"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' [from]=searchbyfrom
output=$(notmuch search --query-syntax=sexp '(from searchbyfrom)' | notmuch_search_sanitize)
@@ -287,11 +314,11 @@ output=$(notmuch search --query-syntax=sexp '(attachment (starts-with not))' | n
test_expect_equal "$output" 'thread:XXX 2009-11-18 [2/2] Lars Kellogg-Stedman; [notmuch] "notmuch help" outputs to stderr? (attachment inbox signed unread)'
test_begin_subtest "starts-with, folder"
-notmuch search --output=files --query-syntax=sexp '(folder (starts-with bad))' | notmuch_dir_sanitize > OUTPUT
+notmuch search --output=files --query-syntax=sexp '(folder (starts-with bad))' | notmuch_dir_sanitize | sed 's/[0-9]*$/XXX/' > OUTPUT
cat <<EOF > EXPECTED
-MAIL_DIR/bad/msg-010
-MAIL_DIR/bad/news/msg-012
-MAIL_DIR/duplicate/bad/news/msg-012
+MAIL_DIR/bad/msg-XXX
+MAIL_DIR/bad/news/msg-XXX
+MAIL_DIR/duplicate/bad/news/msg-XXX
EOF
test_expect_equal_file EXPECTED OUTPUT
--
2.30.2\r
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 17/27] lib/query: generalize exclude handling to s-expression queries
2021-07-30 12:55 v3 sexpr query parser David Bremner
` (15 preceding siblings ...)
2021-07-30 12:55 ` [PATCH 16/27] lib/parse-sexp: handle unprefixed terms David Bremner
@ 2021-07-30 12:55 ` David Bremner
2021-07-30 12:55 ` [PATCH 18/27] lib: factor out query construction from regexp David Bremner
` (9 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2021-07-30 12:55 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
In fact most of the code path is in common, only the caching of terms
in the query needs to be added for s-expression queries.
---
lib/query.cc | 34 ++++++++++++++++++++++-----------
test/T081-sexpr-search.sh | 40 +++++++++++++++++++++++++++++++++++++++
2 files changed, 63 insertions(+), 11 deletions(-)
diff --git a/lib/query.cc b/lib/query.cc
index 435f7229..56f90e1c 100644
--- a/lib/query.cc
+++ b/lib/query.cc
@@ -165,6 +165,19 @@ notmuch_query_create_with_syntax (notmuch_database_t *notmuch,
return NOTMUCH_STATUS_SUCCESS;
}
+static void
+_notmuch_query_cache_terms (notmuch_query_t *query)
+{
+ /* Xapian doesn't support skip_to on terms from a query since
+ * they are unordered, so cache a copy of all terms in
+ * something searchable.
+ */
+
+ for (Xapian::TermIterator t = query->xapian_query.get_terms_begin ();
+ t != query->xapian_query.get_terms_end (); ++t)
+ query->terms.insert (*t);
+}
+
static notmuch_status_t
_notmuch_query_ensure_parsed_xapian (notmuch_query_t *query)
{
@@ -173,15 +186,7 @@ _notmuch_query_ensure_parsed_xapian (notmuch_query_t *query)
query->notmuch->query_parser->
parse_query (query->query_string, NOTMUCH_QUERY_PARSER_FLAGS);
- /* Xapian doesn't support skip_to on terms from a query since
- * they are unordered, so cache a copy of all terms in
- * something searchable.
- */
-
- for (Xapian::TermIterator t = query->xapian_query.get_terms_begin ();
- t != query->xapian_query.get_terms_end (); ++t)
- query->terms.insert (*t);
-
+ _notmuch_query_cache_terms (query);
query->parsed = true;
} catch (const Xapian::Error &error) {
@@ -203,11 +208,18 @@ _notmuch_query_ensure_parsed_xapian (notmuch_query_t *query)
static notmuch_status_t
_notmuch_query_ensure_parsed_sexpr (notmuch_query_t *query)
{
+ notmuch_status_t status;
+
if (query->parsed)
return NOTMUCH_STATUS_SUCCESS;
- return _notmuch_sexp_string_to_xapian_query (query->notmuch, query->query_string,
- query->xapian_query);
+ status = _notmuch_sexp_string_to_xapian_query (query->notmuch, query->query_string,
+ query->xapian_query);
+ if (status)
+ return status;
+
+ _notmuch_query_cache_terms (query);
+ return NOTMUCH_STATUS_SUCCESS;
}
static notmuch_status_t
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index 13b47324..dc05732b 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -529,4 +529,44 @@ notmuch search: Syntax error in query
EOF
test_expect_equal_file EXPECTED OUTPUT
+test_begin_subtest "Search, exclude \"deleted\" messages from search"
+notmuch config set search.exclude_tags deleted
+generate_message '[subject]="Not deleted"'
+not_deleted_id=$gen_msg_id
+generate_message '[subject]="Deleted"'
+notmuch new > /dev/null
+notmuch tag +deleted id:$gen_msg_id
+deleted_id=$gen_msg_id
+output=$(notmuch search --query-syntax=sexp '(subject deleted)' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Not deleted (inbox unread)"
+
+test_begin_subtest "Search, exclude \"deleted\" messages from message search --exclude=false"
+output=$(notmuch search --query-syntax=sexp --exclude=false --output=messages '(subject deleted)' | notmuch_search_sanitize)
+test_expect_equal "$output" "id:$not_deleted_id
+id:$deleted_id"
+
+test_begin_subtest "Search, exclude \"deleted\" messages from search, overridden"
+notmuch search --query-syntax=sexp '(and (subject deleted) (tag deleted))' | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Deleted (deleted inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "Search, exclude \"deleted\" messages from threads"
+add_message '[subject]="Not deleted reply"' '[in-reply-to]="<$gen_msg_id>"'
+output=$(notmuch search --query-syntax=sexp '(subject deleted)' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Not deleted (inbox unread)
+thread:XXX 2001-01-05 [1/2] Notmuch Test Suite; Not deleted reply (deleted inbox unread)"
+
+test_begin_subtest "Search, don't exclude \"deleted\" messages when --exclude=flag specified"
+output=$(notmuch search --query-syntax=sexp --exclude=flag '(subject deleted)' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Not deleted (inbox unread)
+thread:XXX 2001-01-05 [1/2] Notmuch Test Suite; Deleted (deleted inbox unread)"
+
+test_begin_subtest "Search, don't exclude \"deleted\" messages from search if not configured"
+notmuch config set search.exclude_tags
+output=$(notmuch search --query-syntax=sexp '(subject deleted)' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Not deleted (inbox unread)
+thread:XXX 2001-01-05 [2/2] Notmuch Test Suite; Deleted (deleted inbox unread)"
+
test_done
--
2.30.2
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 18/27] lib: factor out query construction from regexp
2021-07-30 12:55 v3 sexpr query parser David Bremner
` (16 preceding siblings ...)
2021-07-30 12:55 ` [PATCH 17/27] lib/query: generalize exclude handling to s-expression queries David Bremner
@ 2021-07-30 12:55 ` David Bremner
2021-07-30 12:55 ` [PATCH 19/27] lib/parse-sexp: support regular expressions David Bremner
` (8 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2021-07-30 12:55 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
This will allow re-use of this code outside of the Xapian query parser.
---
lib/database-private.h | 5 +++
lib/regexp-fields.cc | 81 +++++++++++++++++++++++++++++-------------
lib/regexp-fields.h | 6 ++++
3 files changed, 68 insertions(+), 24 deletions(-)
diff --git a/lib/database-private.h b/lib/database-private.h
index 85d55299..cf4eb94b 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -306,6 +306,11 @@ _notmuch_database_setup_user_query_fields (notmuch_database_t *notmuch);
notmuch_status_t
_notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr,
Xapian::Query &output);
+
+notmuch_status_t
+_notmuch_regexp_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, std::string field,
+ std::string regexp_str,
+ Xapian::Query &output, std::string &msg);
#endif
#endif
diff --git a/lib/regexp-fields.cc b/lib/regexp-fields.cc
index 0feb50e5..c6d9d94f 100644
--- a/lib/regexp-fields.cc
+++ b/lib/regexp-fields.cc
@@ -26,27 +26,32 @@
#include "notmuch-private.h"
#include "database-private.h"
-static void
-compile_regex (regex_t ®exp, const char *str)
+notmuch_status_t
+compile_regex (regex_t ®exp, const char *str, std::string &msg)
{
int err = regcomp (®exp, str, REG_EXTENDED | REG_NOSUB);
if (err != 0) {
size_t len = regerror (err, ®exp, NULL, 0);
char *buffer = new char[len];
- std::string msg = "Regexp error: ";
+ msg = "Regexp error: ";
(void) regerror (err, ®exp, buffer, len);
msg.append (buffer, len);
delete[] buffer;
- throw Xapian::QueryParserError (msg);
+ return NOTMUCH_STATUS_ILLEGAL_ARGUMENT;
}
+ return NOTMUCH_STATUS_SUCCESS;
}
RegexpPostingSource::RegexpPostingSource (Xapian::valueno slot, const std::string ®exp)
: slot_ (slot)
{
- compile_regex (regexp_, regexp.c_str ());
+ std::string msg;
+ notmuch_status_t status = compile_regex (regexp_, regexp.c_str (), msg);
+
+ if (status)
+ throw Xapian::QueryParserError (msg);
}
RegexpPostingSource::~RegexpPostingSource ()
@@ -141,18 +146,54 @@ _find_slot (std::string prefix)
return Xapian::BAD_VALUENO;
}
-RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix,
+RegexpFieldProcessor::RegexpFieldProcessor (std::string field_,
notmuch_field_flag_t options_,
Xapian::QueryParser &parser_,
notmuch_database_t *notmuch_)
- : slot (_find_slot (prefix)),
- term_prefix (_find_prefix (prefix.c_str ())),
+ : slot (_find_slot (field_)),
+ field (field_),
+ term_prefix (_find_prefix (field_.c_str ())),
options (options_),
parser (parser_),
notmuch (notmuch_)
{
};
+notmuch_status_t
+_notmuch_regexp_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, std::string field,
+ std::string regexp_str,
+ Xapian::Query &output, std::string &msg)
+{
+ regex_t regexp;
+ notmuch_status_t status;
+
+ status = compile_regex (regexp, regexp_str.c_str (), msg);
+ if (status) {
+ _notmuch_database_log_append (notmuch, "error compiling regex %s", msg.c_str ());
+ return status;
+ }
+
+ if (slot == Xapian::BAD_VALUENO)
+ slot = _find_slot (field);
+
+ if (slot == Xapian::BAD_VALUENO) {
+ std::string term_prefix = _find_prefix (field.c_str ());
+ std::vector<std::string> terms;
+
+ for (Xapian::TermIterator it = notmuch->xapian_db->allterms_begin (term_prefix);
+ it != notmuch->xapian_db->allterms_end (); ++it) {
+ if (regexec (®exp, (*it).c_str () + term_prefix.size (),
+ 0, NULL, 0) == 0)
+ terms.push_back (*it);
+ }
+ output = Xapian::Query (Xapian::Query::OP_OR, terms.begin (), terms.end ());
+ } else {
+ RegexpPostingSource *postings = new RegexpPostingSource (slot, regexp_str);
+ output = Xapian::Query (postings->release ());
+ }
+ return NOTMUCH_STATUS_SUCCESS;
+}
+
Xapian::Query
RegexpFieldProcessor::operator() (const std::string & str)
{
@@ -168,23 +209,15 @@ RegexpFieldProcessor::operator() (const std::string & str)
if (str.at (0) == '/') {
if (str.length () > 1 && str.at (str.size () - 1) == '/') {
+ Xapian::Query query;
std::string regexp_str = str.substr (1, str.size () - 2);
- if (slot != Xapian::BAD_VALUENO) {
- RegexpPostingSource *postings = new RegexpPostingSource (slot, regexp_str);
- return Xapian::Query (postings->release ());
- } else {
- std::vector<std::string> terms;
- regex_t regexp;
-
- compile_regex (regexp, regexp_str.c_str ());
- for (Xapian::TermIterator it = notmuch->xapian_db->allterms_begin (term_prefix);
- it != notmuch->xapian_db->allterms_end (); ++it) {
- if (regexec (®exp, (*it).c_str () + term_prefix.size (),
- 0, NULL, 0) == 0)
- terms.push_back (*it);
- }
- return Xapian::Query (Xapian::Query::OP_OR, terms.begin (), terms.end ());
- }
+ std::string msg;
+ notmuch_status_t status;
+
+ status = _notmuch_regexp_to_query (notmuch, slot, field, regexp_str, query, msg);
+ if (status)
+ throw Xapian::QueryParserError (msg);
+ return query;
} else {
throw Xapian::QueryParserError ("unmatched regex delimiter in '" + str + "'");
}
diff --git a/lib/regexp-fields.h b/lib/regexp-fields.h
index a8cca243..9c871de7 100644
--- a/lib/regexp-fields.h
+++ b/lib/regexp-fields.h
@@ -30,6 +30,11 @@
#include "database-private.h"
#include "notmuch-private.h"
+notmuch_status_t
+_notmuch_regex_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, std::string field,
+ std::string regexp_str,
+ Xapian::Query &output, std::string &msg);
+
/* A posting source that returns documents where a value matches a
* regexp.
*/
@@ -64,6 +69,7 @@ public:
class RegexpFieldProcessor : public Xapian::FieldProcessor {
protected:
Xapian::valueno slot;
+ std::string field;
std::string term_prefix;
notmuch_field_flag_t options;
Xapian::QueryParser &parser;
--
2.30.2
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 19/27] lib/parse-sexp: support regular expressions
2021-07-30 12:55 v3 sexpr query parser David Bremner
` (17 preceding siblings ...)
2021-07-30 12:55 ` [PATCH 18/27] lib: factor out query construction from regexp David Bremner
@ 2021-07-30 12:55 ` David Bremner
2021-07-30 12:56 ` [PATCH 20/27] lib: generate actual Xapian query for "*" and "" David Bremner
` (7 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2021-07-30 12:55 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
At least to the degree that the Xapian QueryParser parser based parser
also supports them. Support short alias 'rx' as it seems to make more
complex queries nicer to read.
---
doc/man7/notmuch-sexp-queries.rst | 8 ++++
lib/parse-sexp.cc | 54 ++++++++++++++++++-----
test/T081-sexpr-search.sh | 72 +++++++++++++++++++++++++++++++
3 files changed, 124 insertions(+), 10 deletions(-)
diff --git a/doc/man7/notmuch-sexp-queries.rst b/doc/man7/notmuch-sexp-queries.rst
index 3f4299de..5f0502f7 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -144,6 +144,11 @@ MODIFIERS
*Modifiers* refer to any prefixes (first elements of compound queries)
that are neither operators nor fields.
+``(regex`` *atom* ``)`` ``(rx`` *atom* ``)``
+ Interpret *atom* as a POSIX.2 regular expression (see
+ :manpage:`regex(7)`). This applies in term fields and a subset [#not-phrase]_ of
+ phrase fields (see :any:`field-table`).
+
``(starts-with`` *subword* ``)``
Matches any term starting with *subword*. This applies in either
phrase or term :any:`fields <fields>`, or outside of fields [#not-body]_. Note that
@@ -205,6 +210,9 @@ NOTES
.. [#aka-bool] a.k.a. boolean prefixes
+.. [#not-phrase] Due to the implemention of phrase fields in Xapian,
+ regex queries could only match individual words.
+
.. [#not-body] Due the the way ``body`` is implemented in notmuch,
this modifier is not supported in the ``body`` field.
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 56bd7e4b..48728edb 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -13,6 +13,8 @@ typedef enum {
SEXP_FLAG_BOOLEAN = 1 << 1,
SEXP_FLAG_SINGLE = 1 << 2,
SEXP_FLAG_WILDCARD = 1 << 3,
+ SEXP_FLAG_REGEX = 1 << 4,
+ SEXP_FLAG_DO_REGEX = 1 << 5,
} _sexp_flag_t;
/*
@@ -48,15 +50,15 @@ static _sexp_prefix_t prefixes[] =
{ "body", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_FIELD },
{ "from", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
- SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
+ SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
{ "folder", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
- SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
+ SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
{ "id", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
- SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
+ SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
{ "is", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
- SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
+ SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
{ "mid", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
- SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
+ SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
{ "mimetype", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
{ "not", Xapian::Query::OP_AND_NOT, Xapian::Query::MatchAll,
@@ -64,17 +66,21 @@ static _sexp_prefix_t prefixes[] =
{ "or", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
SEXP_FLAG_NONE },
{ "path", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
- SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
+ SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
{ "property", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
- SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
+ SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
+ { "regex", Xapian::Query::OP_INVALID, Xapian::Query::MatchAll,
+ SEXP_FLAG_SINGLE | SEXP_FLAG_DO_REGEX },
+ { "rx", Xapian::Query::OP_INVALID, Xapian::Query::MatchAll,
+ SEXP_FLAG_SINGLE | SEXP_FLAG_DO_REGEX },
{ "starts-with", Xapian::Query::OP_WILDCARD, Xapian::Query::MatchAll,
SEXP_FLAG_SINGLE },
{ "subject", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
- SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
+ SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
{ "tag", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
- SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
+ SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
{ "thread", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
- SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
+ SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
{ "to", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
{ }
@@ -180,6 +186,30 @@ _sexp_parse_one_term (notmuch_database_t *notmuch, std::string term_prefix, cons
}
}
+
+notmuch_status_t
+_sexp_parse_regex (notmuch_database_t *notmuch,
+ const _sexp_prefix_t *prefix, const _sexp_prefix_t *parent,
+ std::string val, Xapian::Query &output)
+{
+ if (! parent) {
+ _notmuch_database_log (notmuch, "illegal '%s' outside field\n",
+ prefix->name);
+ return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
+ }
+
+ if (! (parent->flags & SEXP_FLAG_REGEX)) {
+ _notmuch_database_log (notmuch, "'%s' not supported in field '%s'\n",
+ prefix->name, parent->name);
+ return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
+ }
+
+ std::string msg; /* ignored */
+
+ return _notmuch_regexp_to_query (notmuch, Xapian::BAD_VALUENO, parent->name,
+ val, output, msg);
+}
+
/* Here we expect the s-expression to be a proper list, with first
* element defining and operation, or as a special case the empty
* list */
@@ -254,6 +284,10 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
if (prefix->xapian_op == Xapian::Query::OP_WILDCARD)
return _sexp_parse_wildcard (notmuch, parent, sx->list->next->val, output);
+ if (prefix->flags & SEXP_FLAG_DO_REGEX) {
+ return _sexp_parse_regex (notmuch, prefix, parent, sx->list->next->val, output);
+ }
+
return _sexp_combine_query (notmuch, parent, prefix->xapian_op, prefix->initial,
sx->list->next, output);
}
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index dc05732b..49fa5262 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -569,4 +569,76 @@ output=$(notmuch search --query-syntax=sexp '(subject deleted)' | notmuch_search
test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Not deleted (inbox unread)
thread:XXX 2001-01-05 [2/2] Notmuch Test Suite; Deleted (deleted inbox unread)"
+test_begin_subtest "regex at top level"
+notmuch search --query-syntax=sexp '(rx foo)' >& OUTPUT
+cat <<EOF > EXPECTED
+notmuch search: Syntax error in query
+illegal 'rx' outside field
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "regex in illegal field"
+notmuch search --query-syntax=sexp '(body (regex foo))' >& OUTPUT
+cat <<EOF > EXPECTED
+notmuch search: Syntax error in query
+'regex' not supported in field 'body'
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+notmuch search --output=messages from:cworth > cworth.msg-ids
+
+test_begin_subtest "regexp 'from' search"
+notmuch search --output=messages --query-syntax=sexp '(from (rx cworth))' > OUTPUT
+test_expect_equal_file cworth.msg-ids OUTPUT
+
+test_begin_subtest "regexp search for 'from' 2"
+notmuch search from:/cworth@cworth.org/ and subject:patch | notmuch_search_sanitize > EXPECTED
+notmuch search --query-syntax=sexp '(and (from (rx cworth@cworth.org)) (subject patch))' \
+ | notmuch_search_sanitize > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "regexp 'folder' search"
+notmuch search 'folder:/^bar$/' | notmuch_search_sanitize > EXPECTED
+notmuch search --query-syntax=sexp '(folder (rx ^bar$))' | notmuch_search_sanitize > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "regexp 'id' search"
+notmuch search --output=messages --query-syntax=sexp '(id (rx yoom))' > OUTPUT
+test_expect_equal_file cworth.msg-ids OUTPUT
+
+test_begin_subtest "unanchored 'is' search"
+notmuch search tag:signed or tag:inbox > EXPECTED
+notmuch search --query-syntax=sexp '(is (rx i))' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "anchored 'is' search"
+notmuch search tag:signed > EXPECTED
+notmuch search --query-syntax=sexp '(is (rx ^si))' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "combine regexp mid and subject"
+notmuch search subject:/-C/ and mid:/y..m/ | notmuch_search_sanitize > EXPECTED
+notmuch search --query-syntax=sexp '(and (subject (rx -C)) (mid (rx y..m)))' | notmuch_search_sanitize > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "regexp 'path' search"
+notmuch search 'path:/^bar$/' | notmuch_search_sanitize > EXPECTED
+notmuch search --query-syntax=sexp '(path (rx ^bar$))' | notmuch_search_sanitize > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "regexp 'property' search"
+notmuch search property:foo=bar > EXPECTED
+notmuch search --query-syntax=sexp '(property (rx foo=.*))' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "anchored 'tag' search"
+notmuch search tag:signed > EXPECTED
+notmuch search --query-syntax=sexp '(tag (rx ^si))' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "regexp 'thread' search"
+notmuch search --output=threads '*' | grep '7$' > EXPECTED
+notmuch search --output=threads --query-syntax=sexp '(thread (rx 7$))' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
test_done
--
2.30.2
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 20/27] lib: generate actual Xapian query for "*" and ""
2021-07-30 12:55 v3 sexpr query parser David Bremner
` (18 preceding siblings ...)
2021-07-30 12:55 ` [PATCH 19/27] lib/parse-sexp: support regular expressions David Bremner
@ 2021-07-30 12:56 ` David Bremner
2021-07-30 12:56 ` [PATCH 21/27] lib/query: factor out _notmuch_query_string_to_xapian_query David Bremner
` (6 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2021-07-30 12:56 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
The previous code had the somewhat bizarre effect that the (notmuch
specific) query string was "*" (interpreted as MatchAll) and the
allegedly parsed xapian_query was "MatchNothing".
This commit also reduces code duplication.
---
lib/query.cc | 34 ++++++++++++++--------------------
1 file changed, 14 insertions(+), 20 deletions(-)
diff --git a/lib/query.cc b/lib/query.cc
index 56f90e1c..57596f48 100644
--- a/lib/query.cc
+++ b/lib/query.cc
@@ -182,11 +182,16 @@ static notmuch_status_t
_notmuch_query_ensure_parsed_xapian (notmuch_query_t *query)
{
try {
- query->xapian_query =
- query->notmuch->query_parser->
- parse_query (query->query_string, NOTMUCH_QUERY_PARSER_FLAGS);
+ if (strcmp (query->query_string, "") == 0 ||
+ strcmp (query->query_string, "*") == 0) {
+ query->xapian_query = Xapian::Query::MatchAll;
+ } else {
+ query->xapian_query =
+ query->notmuch->query_parser->
+ parse_query (query->query_string, NOTMUCH_QUERY_PARSER_FLAGS);
- _notmuch_query_cache_terms (query);
+ _notmuch_query_cache_terms (query);
+ }
query->parsed = true;
} catch (const Xapian::Error &error) {
@@ -331,7 +336,6 @@ _notmuch_query_search_documents (notmuch_query_t *query,
notmuch_messages_t **out)
{
notmuch_database_t *notmuch = query->notmuch;
- const char *query_string = query->query_string;
notmuch_mset_messages_t *messages;
notmuch_status_t status;
@@ -361,13 +365,9 @@ _notmuch_query_search_documents (notmuch_query_t *query,
Xapian::MSet mset;
Xapian::MSetIterator iterator;
- if (strcmp (query_string, "") == 0 ||
- strcmp (query_string, "*") == 0) {
- final_query = mail_query;
- } else {
- final_query = Xapian::Query (Xapian::Query::OP_AND,
- mail_query, query->xapian_query);
- }
+ final_query = Xapian::Query (Xapian::Query::OP_AND,
+ mail_query, query->xapian_query);
+
messages->base.excluded_doc_ids = NULL;
if ((query->omit_excluded != NOTMUCH_EXCLUDE_FALSE) && (query->exclude_terms)) {
@@ -688,7 +688,6 @@ notmuch_status_t
_notmuch_query_count_documents (notmuch_query_t *query, const char *type, unsigned *count_out)
{
notmuch_database_t *notmuch = query->notmuch;
- const char *query_string = query->query_string;
Xapian::doccount count = 0;
notmuch_status_t status;
@@ -704,13 +703,8 @@ _notmuch_query_count_documents (notmuch_query_t *query, const char *type, unsign
Xapian::Query final_query, exclude_query;
Xapian::MSet mset;
- if (strcmp (query_string, "") == 0 ||
- strcmp (query_string, "*") == 0) {
- final_query = mail_query;
- } else {
- final_query = Xapian::Query (Xapian::Query::OP_AND,
- mail_query, query->xapian_query);
- }
+ final_query = Xapian::Query (Xapian::Query::OP_AND,
+ mail_query, query->xapian_query);
exclude_query = _notmuch_exclude_tags (query);
--
2.30.2
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 21/27] lib/query: factor out _notmuch_query_string_to_xapian_query
2021-07-30 12:55 v3 sexpr query parser David Bremner
` (19 preceding siblings ...)
2021-07-30 12:56 ` [PATCH 20/27] lib: generate actual Xapian query for "*" and "" David Bremner
@ 2021-07-30 12:56 ` David Bremner
2021-07-30 12:56 ` [PATCH 22/27] lib/thread-fp: factor out query expansion, rewrite in Xapian David Bremner
` (5 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2021-07-30 12:56 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
When dealing with recursive queries (i.e. thread:{foo}) it turns out
to be useful just to deal with the underlying Xapian objects, and not
wrap them in notmuch objects.
---
lib/database-private.h | 7 ++++++
lib/query.cc | 51 ++++++++++++++++++++++++++++--------------
2 files changed, 41 insertions(+), 17 deletions(-)
diff --git a/lib/database-private.h b/lib/database-private.h
index cf4eb94b..7ee8e62d 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -302,11 +302,18 @@ notmuch_status_t
_notmuch_database_setup_user_query_fields (notmuch_database_t *notmuch);
#if __cplusplus
+/* query.cc */
+notmuch_status_t
+_notmuch_query_string_to_xapian_query (notmuch_database_t *notmuch,
+ std::string query_string,
+ Xapian::Query &output,
+ std::string &msg);
/* parse-sexp.cc */
notmuch_status_t
_notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr,
Xapian::Query &output);
+/* regexp-fields.cc */
notmuch_status_t
_notmuch_regexp_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, std::string field,
std::string regexp_str,
diff --git a/lib/query.cc b/lib/query.cc
index 57596f48..87ee18fc 100644
--- a/lib/query.cc
+++ b/lib/query.cc
@@ -178,38 +178,55 @@ _notmuch_query_cache_terms (notmuch_query_t *query)
query->terms.insert (*t);
}
-static notmuch_status_t
-_notmuch_query_ensure_parsed_xapian (notmuch_query_t *query)
+notmuch_status_t
+_notmuch_query_string_to_xapian_query (notmuch_database_t *notmuch,
+ std::string query_string,
+ Xapian::Query &output,
+ std::string &msg)
{
try {
- if (strcmp (query->query_string, "") == 0 ||
- strcmp (query->query_string, "*") == 0) {
- query->xapian_query = Xapian::Query::MatchAll;
+ if (query_string == "" || query_string == "*") {
+ output = Xapian::Query::MatchAll;
} else {
- query->xapian_query =
- query->notmuch->query_parser->
- parse_query (query->query_string, NOTMUCH_QUERY_PARSER_FLAGS);
-
- _notmuch_query_cache_terms (query);
+ output =
+ notmuch->query_parser->
+ parse_query (query_string, NOTMUCH_QUERY_PARSER_FLAGS);
}
- query->parsed = true;
-
} catch (const Xapian::Error &error) {
- if (! query->notmuch->exception_reported) {
- _notmuch_database_log (query->notmuch,
+ if (! notmuch->exception_reported) {
+ _notmuch_database_log (notmuch,
"A Xapian exception occurred parsing query: %s\n",
error.get_msg ().c_str ());
- _notmuch_database_log_append (query->notmuch,
+ _notmuch_database_log_append (notmuch,
"Query string was: %s\n",
- query->query_string);
- query->notmuch->exception_reported = true;
+ query_string.c_str ());
+ notmuch->exception_reported = true;
}
+ msg = error.get_msg ();
return NOTMUCH_STATUS_XAPIAN_EXCEPTION;
}
return NOTMUCH_STATUS_SUCCESS;
}
+static notmuch_status_t
+_notmuch_query_ensure_parsed_xapian (notmuch_query_t *query)
+{
+ notmuch_status_t status;
+ std::string msg; /* ignored */
+
+ status = _notmuch_query_string_to_xapian_query (query->notmuch, query->query_string,
+ query->xapian_query, msg);
+ if (status)
+ return status;
+
+ query->parsed = true;
+
+ _notmuch_query_cache_terms (query);
+
+ return NOTMUCH_STATUS_SUCCESS;
+}
+
static notmuch_status_t
_notmuch_query_ensure_parsed_sexpr (notmuch_query_t *query)
{
--
2.30.2
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 22/27] lib/thread-fp: factor out query expansion, rewrite in Xapian
2021-07-30 12:55 v3 sexpr query parser David Bremner
` (20 preceding siblings ...)
2021-07-30 12:56 ` [PATCH 21/27] lib/query: factor out _notmuch_query_string_to_xapian_query David Bremner
@ 2021-07-30 12:56 ` David Bremner
2021-07-30 12:56 ` [PATCH 23/27] lib/parse-sexp: expand queries David Bremner
` (4 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2021-07-30 12:56 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
It will be convenient not to have to construct a notmuch query object
when parsing subqueries, so the commit rewrites the query
expansion (currently only used for thread:{} queries) using only
Xapian. As a bonus it seems about 15% faster in initial experiments.
---
lib/database-private.h | 16 +++++++++++++-
lib/parse-sexp.cc | 2 --
lib/query.cc | 47 ++++++++++++++++++++++++++++++++++++++++++
lib/thread-fp.cc | 26 ++++++++---------------
4 files changed, 71 insertions(+), 20 deletions(-)
diff --git a/lib/database-private.h b/lib/database-private.h
index 7ee8e62d..9ee3b933 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -40,6 +40,10 @@
#include <xapian.h>
+#if HAVE_SFSEXP
+#include <sexp.h>
+#endif
+
/* Bit masks for _notmuch_database::features. Features are named,
* independent aspects of the database schema.
*
@@ -313,11 +317,21 @@ notmuch_status_t
_notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr,
Xapian::Query &output);
+notmuch_status_t
+_notmuch_query_expand (notmuch_database_t *notmuch, const char *field, Xapian::Query subquery,
+ Xapian::Query &output, std::string &msg);
+
/* regexp-fields.cc */
notmuch_status_t
_notmuch_regexp_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, std::string field,
std::string regexp_str,
Xapian::Query &output, std::string &msg);
-#endif
+#if HAVE_SFSEXP
+/* parse-sexp.cc */
+notmuch_status_t
+_notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr,
+ Xapian::Query &output);
+#endif
+#endif
#endif
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 48728edb..f48c94be 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -219,8 +219,6 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
Xapian::Query &output)
{
if (sx->ty == SEXP_VALUE) {
- std::string term = Xapian::Unicode::tolower (sx->val);
- Xapian::Stem stem = *(notmuch->stemmer);
std::string term_prefix = parent ? _find_prefix (parent->name) : "";
if (sx->aty == SEXP_BASIC && strcmp (sx->val, "*") == 0) {
diff --git a/lib/query.cc b/lib/query.cc
index 87ee18fc..83b82a1d 100644
--- a/lib/query.cc
+++ b/lib/query.cc
@@ -821,3 +821,50 @@ notmuch_query_get_database (const notmuch_query_t *query)
{
return query->notmuch;
}
+
+notmuch_status_t
+_notmuch_query_expand (notmuch_database_t *notmuch, const char *field, Xapian::Query subquery,
+ Xapian::Query &output, std::string &msg)
+{
+ std::set<std::string> terms;
+ const std::string term_prefix = _find_prefix (field);
+
+ if (_debug_query ()) {
+ fprintf (stderr, "Expanding subquery:\n%s\n",
+ subquery.get_description ().c_str ());
+ }
+
+ try {
+ Xapian::Enquire enquire (*notmuch->xapian_db);
+ Xapian::MSet mset;
+
+ enquire.set_weighting_scheme (Xapian::BoolWeight ());
+ enquire.set_query (subquery);
+
+ mset = enquire.get_mset (0, notmuch->xapian_db->get_doccount ());
+
+ for (Xapian::MSetIterator iterator = mset.begin (); iterator != mset.end (); iterator++) {
+ Xapian::docid doc_id = *iterator;
+ Xapian::Document doc = notmuch->xapian_db->get_document (doc_id);
+ Xapian::TermIterator i = doc.termlist_begin ();
+
+ for (i.skip_to (term_prefix);
+ i != doc.termlist_end () && ((*i).rfind (term_prefix, 0) == 0); i++) {
+ terms.insert (*i);
+ }
+ }
+ output = Xapian::Query (Xapian::Query::OP_OR, terms.begin (), terms.end ());
+ if (_debug_query ()) {
+ fprintf (stderr, "Expanded query:\n%s\n",
+ subquery.get_description ().c_str ());
+ }
+
+ } catch (const Xapian::Error &error) {
+ _notmuch_database_log (notmuch,
+ "A Xapian exception occurred expanding query: %s\n",
+ error.get_msg ().c_str ());
+ return NOTMUCH_STATUS_XAPIAN_EXCEPTION;
+ }
+
+ return NOTMUCH_STATUS_SUCCESS;
+}
diff --git a/lib/thread-fp.cc b/lib/thread-fp.cc
index 06708ef2..3aa9c423 100644
--- a/lib/thread-fp.cc
+++ b/lib/thread-fp.cc
@@ -34,28 +34,20 @@ ThreadFieldProcessor::operator() (const std::string & str)
if (str.size () <= 1 || str.at (str.size () - 1) != '}') {
throw Xapian::QueryParserError ("missing } in '" + str + "'");
} else {
+ Xapian::Query subquery;
+ Xapian::Query query;
+ std::string msg;
std::string subquery_str = str.substr (1, str.size () - 2);
- notmuch_query_t *subquery = notmuch_query_create (notmuch, subquery_str.c_str ());
- notmuch_messages_t *messages;
- std::set<std::string> terms;
- if (! subquery)
- throw Xapian::QueryParserError ("failed to create subquery for '" + subquery_str +
- "'");
+ status = _notmuch_query_string_to_xapian_query (notmuch, subquery_str, subquery, msg);
+ if (status)
+ throw Xapian::QueryParserError (msg);
- status = notmuch_query_search_messages (subquery, &messages);
+ status = _notmuch_query_expand (notmuch, "thread", subquery, query, msg);
if (status)
- throw Xapian::QueryParserError ("failed to search messages for '" + subquery_str +
- "'");
+ throw Xapian::QueryParserError (msg);
- for (; notmuch_messages_valid (messages); notmuch_messages_move_to_next (messages)) {
- std::string term = thread_prefix;
- notmuch_message_t *message;
- message = notmuch_messages_get (messages);
- term += _notmuch_message_get_thread_id_only (message);
- terms.insert (term);
- }
- return Xapian::Query (Xapian::Query::OP_OR, terms.begin (), terms.end ());
+ return query;
}
} else {
/* literal thread id */
--
2.30.2
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 23/27] lib/parse-sexp: expand queries
2021-07-30 12:55 v3 sexpr query parser David Bremner
` (21 preceding siblings ...)
2021-07-30 12:56 ` [PATCH 22/27] lib/thread-fp: factor out query expansion, rewrite in Xapian David Bremner
@ 2021-07-30 12:56 ` David Bremner
2021-07-30 12:56 ` [PATCH 24/27] lib/parse-sexp: support infix subqueries David Bremner
` (3 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2021-07-30 12:56 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
The code here is just gluing together _notmuch_query_expand with the
existing sexp parser infrastructure.
---
doc/man7/notmuch-sexp-queries.rst | 20 +++++++++++
lib/parse-sexp.cc | 56 +++++++++++++++++++++++++------
test/T081-sexpr-search.sh | 52 ++++++++++++++++++++++++++++
3 files changed, 118 insertions(+), 10 deletions(-)
diff --git a/doc/man7/notmuch-sexp-queries.rst b/doc/man7/notmuch-sexp-queries.rst
index 5f0502f7..b6a00c1c 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -144,6 +144,11 @@ MODIFIERS
*Modifiers* refer to any prefixes (first elements of compound queries)
that are neither operators nor fields.
+``(matching`` |q1| |q2| ... |qn| ``)`` ``(of`` |q1| |q2| ... |qn| ``)``
+ Match all messages have the same values of the current field as
+ those matching all of |q1| ... |qn|. Supported in most term [#not-path]_ or
+ phrase fields. Most commonly used in the ``thread`` field.
+
``(regex`` *atom* ``)`` ``(rx`` *atom* ``)``
Interpret *atom* as a POSIX.2 regular expression (see
:manpage:`regex(7)`). This applies in term fields and a subset [#not-phrase]_ of
@@ -176,6 +181,9 @@ EXAMPLES
Match the *phrase* "quick" followed by "fox" in phrase fields (or
outside a field). Match the literal string in a term field.
+``(folder (of (id 1234@invalid)))``
+ Match any message in the same folder as the one with Message-Id "1234@invalid"
+
``(id 1234@invalid blah@test)``
Matches Message-Id "1234@invalid" *or* Message-Id "blah@test"
@@ -193,6 +201,14 @@ EXAMPLES
Match messages whose subject contains "quick brown fox", but also
"brown fox quicksand".
+``(thread (of (id 1234@invalid)))``
+ Match any message in the same thread as the one with Message-Id "1234@invalid"
+
+``(thread (matching (from bob@example.com) (to bob@example.com)))``
+ Match any (messages in) a thread containing a message from
+ "bob@example.com" and a (possibly distinct) message to "bob at
+ example.com")
+
``(to (or bob@example.com mallory@example.org))`` ``(or (to bob@example.com) (to mallory@example.org))``
Match in the "To" or "Cc" headers, "bob@example.com",
"mallory@example.org", and also "bob@example.com.au" since it
@@ -216,6 +232,10 @@ NOTES
.. [#not-body] Due the the way ``body`` is implemented in notmuch,
this modifier is not supported in the ``body`` field.
+.. [#not-path] Due to the way recursive ``path`` queries are implemented
+ in notmuch, this modifier is not supported in the
+ ``path`` field.
+
.. |q1| replace:: :math:`q_1`
.. |q2| replace:: :math:`q_2`
.. |qn| replace:: :math:`q_n`
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index f48c94be..3c99a7e0 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -15,6 +15,8 @@ typedef enum {
SEXP_FLAG_WILDCARD = 1 << 3,
SEXP_FLAG_REGEX = 1 << 4,
SEXP_FLAG_DO_REGEX = 1 << 5,
+ SEXP_FLAG_EXPAND = 1 << 6,
+ SEXP_FLAG_DO_EXPAND = 1 << 7,
} _sexp_flag_t;
/*
@@ -46,29 +48,33 @@ static _sexp_prefix_t prefixes[] =
{ "and", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_NONE },
{ "attachment", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
- SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
+ SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_EXPAND },
{ "body", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_FIELD },
{ "from", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
- SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
+ SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
{ "folder", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
- SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
+ SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
{ "id", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
{ "is", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
- SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
+ SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
+ { "matching", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
+ SEXP_FLAG_DO_EXPAND },
{ "mid", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
{ "mimetype", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
- SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
+ SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_EXPAND },
{ "not", Xapian::Query::OP_AND_NOT, Xapian::Query::MatchAll,
SEXP_FLAG_NONE },
+ { "of", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
+ SEXP_FLAG_DO_EXPAND },
{ "or", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
SEXP_FLAG_NONE },
{ "path", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
{ "property", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
- SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
+ SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
{ "regex", Xapian::Query::OP_INVALID, Xapian::Query::MatchAll,
SEXP_FLAG_SINGLE | SEXP_FLAG_DO_REGEX },
{ "rx", Xapian::Query::OP_INVALID, Xapian::Query::MatchAll,
@@ -76,13 +82,13 @@ static _sexp_prefix_t prefixes[] =
{ "starts-with", Xapian::Query::OP_WILDCARD, Xapian::Query::MatchAll,
SEXP_FLAG_SINGLE },
{ "subject", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
- SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
+ SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
{ "tag", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
- SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
+ SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
{ "thread", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
- SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
+ SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
{ "to", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
- SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
+ SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_EXPAND },
{ }
};
@@ -210,6 +216,32 @@ _sexp_parse_regex (notmuch_database_t *notmuch,
val, output, msg);
}
+
+static notmuch_status_t
+_sexp_expand_query (notmuch_database_t *notmuch,
+ const _sexp_prefix_t *prefix, const _sexp_prefix_t *parent,
+ const sexp_t *sx, Xapian::Query &output)
+{
+ Xapian::Query subquery;
+ notmuch_status_t status;
+ std::string msg;
+
+ if (! (parent->flags & SEXP_FLAG_EXPAND)) {
+ _notmuch_database_log (notmuch, "'%s' unsupported inside '%s'\n", prefix->name, parent->name);
+ return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
+ }
+
+ status = _sexp_combine_query (notmuch, NULL, prefix->xapian_op, prefix->initial, sx, subquery);
+ if (status)
+ return status;
+
+ status = _notmuch_query_expand (notmuch, parent->name, subquery, output, msg);
+ if (status) {
+ _notmuch_database_log (notmuch, "error expanding query %s\n", msg.c_str ());
+ }
+ return status;
+}
+
/* Here we expect the s-expression to be a proper list, with first
* element defining and operation, or as a special case the empty
* list */
@@ -286,6 +318,10 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
return _sexp_parse_regex (notmuch, prefix, parent, sx->list->next->val, output);
}
+ if (prefix->flags & SEXP_FLAG_DO_EXPAND) {
+ return _sexp_expand_query (notmuch, prefix, parent, sx->list->next, output);
+ }
+
return _sexp_combine_query (notmuch, parent, prefix->xapian_op, prefix->initial,
sx->list->next, output);
}
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index 49fa5262..2a23996e 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -641,4 +641,56 @@ notmuch search --output=threads '*' | grep '7$' > EXPECTED
notmuch search --output=threads --query-syntax=sexp '(thread (rx 7$))' > OUTPUT
test_expect_equal_file EXPECTED OUTPUT
+test_begin_subtest "Basic query that matches no messages"
+count=$(notmuch count from:keithp and to:keithp)
+test_expect_equal 0 "$count"
+
+test_begin_subtest "Same query against threads"
+notmuch search --query-syntax=sexp '(and (thread (of (from keithp))) (thread (matching (to keithp))))' \
+ | notmuch_search_sanitize > OUTPUT
+cat<<EOF > EXPECTED
+thread:XXX 2009-11-18 [7/7] Lars Kellogg-Stedman, Mikhail Gusarov, Keith Packard, Carl Worth; [notmuch] Working with Maildir storage? (inbox signed unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "Mix thread and non-threads query"
+notmuch search --query-syntax=sexp '(and (thread (matching keithp)) (to keithp))' | notmuch_search_sanitize > OUTPUT
+cat<<EOF > EXPECTED
+thread:XXX 2009-11-18 [1/7] Lars Kellogg-Stedman| Mikhail Gusarov, Keith Packard, Carl Worth; [notmuch] Working with Maildir storage? (inbox signed unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "Compound subquery"
+notmuch search --query-syntax=sexp '(thread (of (from keithp) (subject Maildir)))' | notmuch_search_sanitize > OUTPUT
+cat<<EOF > EXPECTED
+thread:XXX 2009-11-18 [7/7] Lars Kellogg-Stedman, Mikhail Gusarov, Keith Packard, Carl Worth; [notmuch] Working with Maildir storage? (inbox signed unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "empty subquery"
+notmuch search --query-syntax=sexp '(thread (of))' 1>OUTPUT 2>&1
+notmuch search '*' > EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "illegal expansion"
+notmuch search --query-syntax=sexp '(id (of ego))' 1>OUTPUT 2>&1
+cat<<EOF > EXPECTED
+notmuch search: Syntax error in query
+'of' unsupported inside 'id'
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "(folder (of subquery))"
+notmuch search --query-syntax=sexp --output=messages '(folder (of (id yun3a4cegoa.fsf@aiko.keithp.com)))' > OUTPUT
+cat <<EOF > EXPECTED
+id:yun1vjwegii.fsf@aiko.keithp.com
+id:yun3a4cegoa.fsf@aiko.keithp.com
+id:1258509400-32511-1-git-send-email-stewart@flamingspork.com
+id:1258506353-20352-1-git-send-email-stewart@flamingspork.com
+id:20091118010116.GC25380@dottiness.seas.harvard.edu
+id:20091118005829.GB25380@dottiness.seas.harvard.edu
+id:cf0c4d610911171136h1713aa59w9cf9aa31f052ad0a@mail.gmail.com
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
test_done
--
2.30.2
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 24/27] lib/parse-sexp: support infix subqueries
2021-07-30 12:55 v3 sexpr query parser David Bremner
` (22 preceding siblings ...)
2021-07-30 12:56 ` [PATCH 23/27] lib/parse-sexp: expand queries David Bremner
@ 2021-07-30 12:56 ` David Bremner
2021-07-30 12:56 ` [PATCH 25/27] lib/parse-sexp: parse user headers David Bremner
` (2 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2021-07-30 12:56 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
This is necessary so that programs can take infix syntax queries from
a user and use the sexp query syntax to construct e.g. a refinement of
that query.
---
doc/man7/notmuch-sexp-queries.rst | 7 +++++
lib/parse-sexp.cc | 34 ++++++++++++++++++++++++
test/T081-sexpr-search.sh | 43 +++++++++++++++++++++++++++++++
3 files changed, 84 insertions(+)
diff --git a/doc/man7/notmuch-sexp-queries.rst b/doc/man7/notmuch-sexp-queries.rst
index b6a00c1c..fc8621b8 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -144,6 +144,10 @@ MODIFIERS
*Modifiers* refer to any prefixes (first elements of compound queries)
that are neither operators nor fields.
+``(infix`` *atom* ``)``
+ Interpret *atom* as an infix notmuch query (see
+ :any:`notmuch-search-terms(7)`). Not supported inside fields.
+
``(matching`` |q1| |q2| ... |qn| ``)`` ``(of`` |q1| |q2| ... |qn| ``)``
Match all messages have the same values of the current field as
those matching all of |q1| ... |qn|. Supported in most term [#not-path]_ or
@@ -187,6 +191,9 @@ EXAMPLES
``(id 1234@invalid blah@test)``
Matches Message-Id "1234@invalid" *or* Message-Id "blah@test"
+``(and (infix "date:2009-11-18..2009-11-18") (tag unread))``
+ Match messages in the given date range with tag unread.
+
``(starts-with prelim)``
Match any words starting with "prelim".
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 3c99a7e0..61dd8f7d 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -57,6 +57,8 @@ static _sexp_prefix_t prefixes[] =
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
{ "id", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
+ { "infix", Xapian::Query::OP_INVALID, Xapian::Query::MatchAll,
+ SEXP_FLAG_SINGLE },
{ "is", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
{ "matching", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
@@ -242,6 +244,34 @@ _sexp_expand_query (notmuch_database_t *notmuch,
return status;
}
+static notmuch_status_t
+_sexp_parse_infix (notmuch_database_t *notmuch, const _sexp_prefix_t *parent,
+ const sexp_t *sx, Xapian::Query &output)
+{
+ if (parent) {
+ _notmuch_database_log (notmuch, "'infix' not supported inside '%s'\n", parent->name);
+ return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
+ }
+ try {
+ output = notmuch->query_parser->parse_query (sx->val, NOTMUCH_QUERY_PARSER_FLAGS);
+ } catch (const Xapian::QueryParserError &error) {
+ _notmuch_database_log (notmuch, "Syntax error in infix query: %s\n", sx->val);
+ return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
+ } catch (const Xapian::Error &error) {
+ if (! notmuch->exception_reported) {
+ _notmuch_database_log (notmuch,
+ "A Xapian exception occurred parsing query: %s\n",
+ error.get_msg ().c_str ());
+ _notmuch_database_log_append (notmuch,
+ "Query string was: %s\n",
+ sx->val);
+ notmuch->exception_reported = true;
+ return NOTMUCH_STATUS_XAPIAN_EXCEPTION;
+ }
+ }
+ return NOTMUCH_STATUS_SUCCESS;
+}
+
/* Here we expect the s-expression to be a proper list, with first
* element defining and operation, or as a special case the empty
* list */
@@ -311,6 +341,10 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
}
+ if (strcmp (prefix->name, "infix") == 0) {
+ return _sexp_parse_infix (notmuch, parent, sx->list->next, output);
+ }
+
if (prefix->xapian_op == Xapian::Query::OP_WILDCARD)
return _sexp_parse_wildcard (notmuch, parent, sx->list->next->val, output);
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index 2a23996e..145038b5 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -693,4 +693,47 @@ id:cf0c4d610911171136h1713aa59w9cf9aa31f052ad0a@mail.gmail.com
EOF
test_expect_equal_file EXPECTED OUTPUT
+test_begin_subtest "infix query"
+notmuch search to:searchbyto | notmuch_search_sanitize > EXPECTED
+notmuch search --query-syntax=sexp '(infix "to:searchbyto")' | notmuch_search_sanitize > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "bad infix query 1"
+notmuch search --query-syntax=sexp '(infix "from:/unbalanced")' 2>&1| notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+notmuch search: Syntax error in query
+Syntax error in infix query: from:/unbalanced
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "bad infix query 2"
+notmuch search --query-syntax=sexp '(infix "thread:{unbalanced")' 2>&1| notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+notmuch search: Syntax error in query
+Syntax error in infix query: thread:{unbalanced
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "bad infix query 3: bad nesting"
+notmuch search --query-syntax=sexp '(subject (infix "tag:inbox"))' 2>&1| notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+notmuch search: Syntax error in query
+'infix' not supported inside 'subject'
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "infix query that matches no messages"
+notmuch search --query-syntax=sexp '(and (infix "from:keithp") (infix "to:keithp"))' > OUTPUT
+test_expect_equal_file /dev/null OUTPUT
+
+test_begin_subtest "compound infix query"
+notmuch search date:2009-11-18..2009-11-18 and tag:unread > EXPECTED
+notmuch search --query-syntax=sexp '(infix "date:2009-11-18..2009-11-18 and tag:unread")' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "compound infix query 2"
+notmuch search date:2009-11-18..2009-11-18 and tag:unread > EXPECTED
+notmuch search --query-syntax=sexp '(and (infix "date:2009-11-18..2009-11-18") (infix "tag:unread"))' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
test_done
--
2.30.2
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 25/27] lib/parse-sexp: parse user headers
2021-07-30 12:55 v3 sexpr query parser David Bremner
` (23 preceding siblings ...)
2021-07-30 12:56 ` [PATCH 24/27] lib/parse-sexp: support infix subqueries David Bremner
@ 2021-07-30 12:56 ` David Bremner
2021-07-30 12:56 ` [PATCH 26/27] lib: factor out expansion of saved queries David Bremner
2021-07-30 12:56 ` [PATCH 27/27] lib/parse-sexp: handle " David Bremner
26 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2021-07-30 12:56 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
One subtle aspect is the replacement of _find_prefix with
_notmuch_database_prefix, which understands user headers. Otherwise
the code mainly consists of creating a fake prefix record (since the
user prefixes are not in the prefix table) and error handling.
---
doc/man7/notmuch-sexp-queries.rst | 4 +++
lib/parse-sexp.cc | 34 +++++++++++++++++++++---
test/T081-sexpr-search.sh | 44 +++++++++++++++++++++++++++++++
3 files changed, 78 insertions(+), 4 deletions(-)
diff --git a/doc/man7/notmuch-sexp-queries.rst b/doc/man7/notmuch-sexp-queries.rst
index fc8621b8..44d8eb8a 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -224,6 +224,10 @@ EXAMPLES
``(not (to *))``
Match messages with an empty or invalid 'To' and 'Cc' field.
+``(List *)``
+ Match messages with a non-empty List-Id header, assuming
+ configuration ``index.header.List=List-Id``
+
NOTES
=====
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 61dd8f7d..a1783f86 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -166,7 +166,7 @@ _sexp_parse_wildcard (notmuch_database_t *notmuch,
Xapian::Query &output)
{
- std::string term_prefix = parent ? _find_prefix (parent->name) : "";
+ std::string term_prefix = parent ? _notmuch_database_prefix (notmuch, parent->name) : "";
if (parent && ! (parent->flags & SEXP_FLAG_WILDCARD)) {
_notmuch_database_log (notmuch, "'%s' does not support wildcard queries\n", parent->name);
@@ -272,6 +272,27 @@ _sexp_parse_infix (notmuch_database_t *notmuch, const _sexp_prefix_t *parent,
return NOTMUCH_STATUS_SUCCESS;
}
+static notmuch_status_t
+_sexp_parse_header (notmuch_database_t *notmuch, const _sexp_prefix_t *parent,
+ const sexp_t *sx, Xapian::Query &output)
+{
+ _sexp_prefix_t user_prefix;
+
+ user_prefix.name = sx->list->val;
+ user_prefix.flags = SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD;
+
+ if (parent) {
+ _notmuch_database_log (notmuch, "nested field: '%s' inside '%s'\n",
+ sx->list->val, parent->name);
+ return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
+ }
+
+ parent = &user_prefix;
+
+ return _sexp_combine_query (notmuch, parent, Xapian::Query::OP_AND, Xapian::Query::MatchAll,
+ sx->list->next, output);
+}
+
/* Here we expect the s-expression to be a proper list, with first
* element defining and operation, or as a special case the empty
* list */
@@ -281,7 +302,7 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
Xapian::Query &output)
{
if (sx->ty == SEXP_VALUE) {
- std::string term_prefix = parent ? _find_prefix (parent->name) : "";
+ std::string term_prefix = parent ? _notmuch_database_prefix (notmuch, parent->name) : "";
if (sx->aty == SEXP_BASIC && strcmp (sx->val, "*") == 0) {
return _sexp_parse_wildcard (notmuch, parent, "", output);
@@ -291,6 +312,7 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
output = Xapian::Query (term_prefix + sx->val);
return NOTMUCH_STATUS_SUCCESS;
}
+
if (parent) {
return _sexp_parse_one_term (notmuch, term_prefix, sx, output);
} else {
@@ -299,7 +321,7 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
if (prefix->flags & SEXP_FLAG_FIELD) {
notmuch_status_t status;
Xapian::Query subquery;
- term_prefix = _find_prefix (prefix->name);
+ term_prefix = _notmuch_database_prefix (notmuch, prefix->name);
status = _sexp_parse_one_term (notmuch, term_prefix, sx, subquery);
if (status)
return status;
@@ -323,6 +345,11 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
}
+ /* Check for user defined field */
+ if (_notmuch_string_map_get (notmuch->user_prefix, sx->list->val)) {
+ return _sexp_parse_header (notmuch, parent, sx, output);
+ }
+
for (_sexp_prefix_t *prefix = prefixes; prefix && prefix->name; prefix++) {
if (strcmp (prefix->name, sx->list->val) == 0) {
if (prefix->flags & SEXP_FLAG_FIELD) {
@@ -362,7 +389,6 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
}
_notmuch_database_log (notmuch, "unknown prefix '%s'\n", sx->list->val);
-
return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
}
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index 145038b5..04aa6150 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -736,4 +736,48 @@ notmuch search date:2009-11-18..2009-11-18 and tag:unread > EXPECTED
notmuch search --query-syntax=sexp '(and (infix "date:2009-11-18..2009-11-18") (infix "tag:unread"))' > OUTPUT
test_expect_equal_file EXPECTED OUTPUT
+test_begin_subtest "user header (unknown header)"
+notmuch search --query-syntax=sexp '(FooBar)' >& OUTPUT
+cat <<EOF > EXPECTED
+notmuch search: Syntax error in query
+unknown prefix 'FooBar'
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "adding user header"
+test_expect_code 0 "notmuch config set index.header.List \"List-Id\""
+
+test_begin_subtest "reindexing"
+test_expect_code 0 'notmuch reindex "*"'
+
+test_begin_subtest "wildcard search for user header"
+grep -Ril List-Id ${MAIL_DIR} | sort | notmuch_dir_sanitize > EXPECTED
+notmuch search --output=files --query-syntax=sexp '(List *)' | sort | notmuch_dir_sanitize > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "wildcard search for user header 2"
+grep -Ril List-Id ${MAIL_DIR} | sort | notmuch_dir_sanitize > EXPECTED
+notmuch search --output=files --query-syntax=sexp '(List (starts-with not))' | sort | notmuch_dir_sanitize > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "search for user header"
+notmuch search List:notmuch | notmuch_search_sanitize > EXPECTED
+notmuch search --query-syntax=sexp '(List notmuch)' | notmuch_search_sanitize > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "search for user header (list token)"
+notmuch search List:notmuch | notmuch_search_sanitize > EXPECTED
+notmuch search --query-syntax=sexp '(List notmuch.notmuchmail.org)' | notmuch_search_sanitize > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "search for user header (quoted string)"
+notmuch search 'List:"notmuch notmuchmail org"' | notmuch_search_sanitize > EXPECTED
+notmuch search --query-syntax=sexp '(List "notmuch notmuchmail org")' | notmuch_search_sanitize > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "search for user header (atoms)"
+notmuch search 'List:"notmuch notmuchmail org"' | notmuch_search_sanitize > EXPECTED
+notmuch search --query-syntax=sexp '(List notmuch notmuchmail org)' | notmuch_search_sanitize > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
test_done
--
2.30.2
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 26/27] lib: factor out expansion of saved queries.
2021-07-30 12:55 v3 sexpr query parser David Bremner
` (24 preceding siblings ...)
2021-07-30 12:56 ` [PATCH 25/27] lib/parse-sexp: parse user headers David Bremner
@ 2021-07-30 12:56 ` David Bremner
2021-07-30 12:56 ` [PATCH 27/27] lib/parse-sexp: handle " David Bremner
26 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2021-07-30 12:56 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
This is intended to allow use outside of the Xapian query parser.
---
lib/database-private.h | 5 +++++
lib/query-fp.cc | 22 +++++++++++++++++++---
2 files changed, 24 insertions(+), 3 deletions(-)
diff --git a/lib/database-private.h b/lib/database-private.h
index 9ee3b933..8b9d67fe 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -327,6 +327,11 @@ _notmuch_regexp_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, std
std::string regexp_str,
Xapian::Query &output, std::string &msg);
+/* thread-fp.cc */
+notmuch_status_t
+_notmuch_query_name_to_query (notmuch_database_t *notmuch, const std::string name,
+ Xapian::Query &output);
+
#if HAVE_SFSEXP
/* parse-sexp.cc */
notmuch_status_t
diff --git a/lib/query-fp.cc b/lib/query-fp.cc
index b980b7f0..75b1d875 100644
--- a/lib/query-fp.cc
+++ b/lib/query-fp.cc
@@ -24,17 +24,33 @@
#include "query-fp.h"
#include <iostream>
-Xapian::Query
-QueryFieldProcessor::operator() (const std::string & name)
+notmuch_status_t
+_notmuch_query_name_to_query (notmuch_database_t *notmuch, const std::string name,
+ Xapian::Query &output)
{
std::string key = "query." + name;
char *expansion;
notmuch_status_t status;
status = notmuch_database_get_config (notmuch, key.c_str (), &expansion);
+ if (status)
+ return status;
+
+ output = notmuch->query_parser->parse_query (expansion, NOTMUCH_QUERY_PARSER_FLAGS);
+ return NOTMUCH_STATUS_SUCCESS;
+}
+
+Xapian::Query
+QueryFieldProcessor::operator() (const std::string & name)
+{
+ notmuch_status_t status;
+ Xapian::Query output;
+
+ status = _notmuch_query_name_to_query (notmuch, name, output);
if (status) {
throw Xapian::QueryParserError ("error looking up key" + name);
}
- return parser.parse_query (expansion, NOTMUCH_QUERY_PARSER_FLAGS);
+ return output;
+
}
--
2.30.2
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 27/27] lib/parse-sexp: handle saved queries
2021-07-30 12:55 v3 sexpr query parser David Bremner
` (25 preceding siblings ...)
2021-07-30 12:56 ` [PATCH 26/27] lib: factor out expansion of saved queries David Bremner
@ 2021-07-30 12:56 ` David Bremner
26 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2021-07-30 12:56 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
This provides functionality analogous to query: in the Xapian
QueryParser based parser. Perhaps counterintuitively, the saved
queries currently have to be in the original query syntax (i.e. not
s-expressions).
---
doc/man7/notmuch-sexp-queries.rst | 6 ++++++
lib/parse-sexp.cc | 24 ++++++++++++++-------
test/T081-sexpr-search.sh | 36 +++++++++++++++++++++++++++++++
3 files changed, 58 insertions(+), 8 deletions(-)
diff --git a/doc/man7/notmuch-sexp-queries.rst b/doc/man7/notmuch-sexp-queries.rst
index 44d8eb8a..197a9b60 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -153,6 +153,12 @@ that are neither operators nor fields.
those matching all of |q1| ... |qn|. Supported in most term [#not-path]_ or
phrase fields. Most commonly used in the ``thread`` field.
+``(query`` *atom* ``)``
+ Expand to the saved query named by *atom*. See
+ :any:`notmuch-config(1)` for more. Note that the saved query must
+ be in infix syntax (:any:`notmuch-search-terms(7)`). Not supported
+ inside fields.
+
``(regex`` *atom* ``)`` ``(rx`` *atom* ``)``
Interpret *atom* as a POSIX.2 regular expression (see
:manpage:`regex(7)`). This applies in term fields and a subset [#not-phrase]_ of
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index a1783f86..e582e350 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -17,6 +17,7 @@ typedef enum {
SEXP_FLAG_DO_REGEX = 1 << 5,
SEXP_FLAG_EXPAND = 1 << 6,
SEXP_FLAG_DO_EXPAND = 1 << 7,
+ SEXP_FLAG_ORPHAN = 1 << 8,
} _sexp_flag_t;
/*
@@ -58,7 +59,7 @@ static _sexp_prefix_t prefixes[] =
{ "id", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
{ "infix", Xapian::Query::OP_INVALID, Xapian::Query::MatchAll,
- SEXP_FLAG_SINGLE },
+ SEXP_FLAG_SINGLE | SEXP_FLAG_ORPHAN },
{ "is", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
{ "matching", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
@@ -77,6 +78,8 @@ static _sexp_prefix_t prefixes[] =
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
{ "property", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
+ { "query", Xapian::Query::OP_INVALID, Xapian::Query::MatchNothing,
+ SEXP_FLAG_SINGLE | SEXP_FLAG_ORPHAN },
{ "regex", Xapian::Query::OP_INVALID, Xapian::Query::MatchAll,
SEXP_FLAG_SINGLE | SEXP_FLAG_DO_REGEX },
{ "rx", Xapian::Query::OP_INVALID, Xapian::Query::MatchAll,
@@ -245,13 +248,8 @@ _sexp_expand_query (notmuch_database_t *notmuch,
}
static notmuch_status_t
-_sexp_parse_infix (notmuch_database_t *notmuch, const _sexp_prefix_t *parent,
- const sexp_t *sx, Xapian::Query &output)
+_sexp_parse_infix (notmuch_database_t *notmuch, const sexp_t *sx, Xapian::Query &output)
{
- if (parent) {
- _notmuch_database_log (notmuch, "'infix' not supported inside '%s'\n", parent->name);
- return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
- }
try {
output = notmuch->query_parser->parse_query (sx->val, NOTMUCH_QUERY_PARSER_FLAGS);
} catch (const Xapian::QueryParserError &error) {
@@ -361,6 +359,12 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
parent = prefix;
}
+ if (parent && (prefix->flags & SEXP_FLAG_ORPHAN)) {
+ _notmuch_database_log (notmuch, "'%s' not supported inside '%s'\n",
+ prefix->name, parent->name);
+ return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
+ }
+
if ((prefix->flags & SEXP_FLAG_SINGLE) &&
(! sx->list->next || sx->list->next->next || sx->list->next->ty != SEXP_VALUE)) {
_notmuch_database_log (notmuch, "'%s' expects single atom as argument\n",
@@ -369,7 +373,11 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
}
if (strcmp (prefix->name, "infix") == 0) {
- return _sexp_parse_infix (notmuch, parent, sx->list->next, output);
+ return _sexp_parse_infix (notmuch, sx->list->next, output);
+ }
+
+ if (strcmp (prefix->name, "query") == 0) {
+ return _notmuch_query_name_to_query (notmuch, sx->list->next->val, output);
}
if (prefix->xapian_op == Xapian::Query::OP_WILDCARD)
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index 04aa6150..ea848fe0 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -221,10 +221,46 @@ notmuch search mimetype:text/html > EXPECTED
notmuch search --query-syntax=sexp '(mimetype text html)' > OUTPUT
test_expect_equal_file EXPECTED OUTPUT
+QUERYSTR="date:2009-11-18..2009-11-18 and tag:unread"
+QUERYSTR2="query:test and subject:Maildir"
+notmuch config set --database query.test "$QUERYSTR"
+notmuch config set query.test2 "$QUERYSTR2"
+
+test_begin_subtest "ill-formed named query search"
+notmuch search --query-syntax=sexp '(query)' > OUTPUT 2>&1
+cat <<EOF > EXPECTED
+notmuch search: Syntax error in query
+'query' expects single atom as argument
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "ill-formed named query search 2"
+notmuch search --query-syntax=sexp '(to (query))' > OUTPUT 2>&1
+cat <<EOF > EXPECTED
+notmuch search: Syntax error in query
+''query' not supported inside 'to'
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "search named query"
+notmuch search --query-syntax=sexp '(query test)' > OUTPUT
+notmuch search $QUERYSTR > EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
test_begin_subtest "Search by 'subject' (utf-8, phrase-token):"
output=$(notmuch search --query-syntax=sexp '(subject utf8-sübjéct)' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)"
+test_begin_subtest "search named query with other terms"
+notmuch search --query-syntax=sexp '(and (query test) (subject Maildir))' > OUTPUT
+notmuch search $QUERYSTR and subject:Maildir > EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "search nested named query"
+notmuch search --query-syntax=sexp '(query test2)' > OUTPUT
+notmuch search $QUERYSTR2 > EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
test_begin_subtest "Search by 'subject' (utf-8, quoted string):"
output=$(notmuch search --query-syntax=sexp '(subject "utf8 sübjéct")' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)"
--
2.30.2\r
^ permalink raw reply related [flat|nested] 28+ messages in thread