unofficial mirror of notmuch@notmuchmail.org
 help / color / mirror / code / Atom feed
* Index user defined headers
@ 2019-03-27 11:16 David Bremner
  2019-03-27 11:16 ` [PATCH 1/9] util: add unicode_word_utf8 David Bremner
                   ` (10 more replies)
  0 siblings, 11 replies; 22+ messages in thread
From: David Bremner @ 2019-03-27 11:16 UTC (permalink / raw)
  To: notmuch


This obsoletes [1]. Compared to the previous version the main change
is that it imposes the restriction that user defined prefixes may not
start with [a-z], and must consist of "unicode word characters". This
assumes a utf8 input encoding. People that don't like utf8 are welcome
to use ASCII :P

[1]: id:20190302154133.25642-1-david@tethera.net
[2]: https://salsa.debian.org/bremner/notmuch/commits/wip/user-headers

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 1/9] util: add unicode_word_utf8
  2019-03-27 11:16 Index user defined headers David Bremner
@ 2019-03-27 11:16 ` David Bremner
  2019-03-27 11:16 ` [PATCH 2/9] cli/config: refactor _stored_in_db David Bremner
                   ` (9 subsequent siblings)
  10 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2019-03-27 11:16 UTC (permalink / raw)
  To: notmuch

This originally use Xapian::Unicode::is_wordchar, but that forces
clients to link directly to libxapian, which seems like it might be
busywork if nothing else.
---
 util/Makefile.local |  3 ++-
 util/unicode-util.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
 util/unicode-util.h | 12 ++++++++++++
 3 files changed, 59 insertions(+), 1 deletion(-)
 create mode 100644 util/unicode-util.c
 create mode 100644 util/unicode-util.h

diff --git a/util/Makefile.local b/util/Makefile.local
index ba03230e..46f8af3a 100644
--- a/util/Makefile.local
+++ b/util/Makefile.local
@@ -5,7 +5,8 @@ extra_cflags += -I$(srcdir)/$(dir)
 
 libnotmuch_util_c_srcs := $(dir)/xutil.c $(dir)/error_util.c $(dir)/hex-escape.c \
 		  $(dir)/string-util.c $(dir)/talloc-extra.c $(dir)/zlib-extra.c \
-		$(dir)/util.c $(dir)/gmime-extra.c $(dir)/crypto.c
+		$(dir)/util.c $(dir)/gmime-extra.c $(dir)/crypto.c \
+		$(dir)/unicode-util.c
 
 libnotmuch_util_modules := $(libnotmuch_util_c_srcs:.c=.o)
 
diff --git a/util/unicode-util.c b/util/unicode-util.c
new file mode 100644
index 00000000..f0bef543
--- /dev/null
+++ b/util/unicode-util.c
@@ -0,0 +1,45 @@
+#include "unicode-util.h"
+
+/* Based on Xapian::Unicode::is_wordchar, to avoid forcing clients to
+   link directly to libxapian.
+*/
+
+static bool
+unicode_is_wordchar (notmuch_unichar ch)
+{
+    switch (g_unichar_type (ch)) {
+    case G_UNICODE_UPPERCASE_LETTER:
+    case G_UNICODE_LOWERCASE_LETTER:
+    case G_UNICODE_TITLECASE_LETTER:
+    case G_UNICODE_MODIFIER_LETTER:
+    case G_UNICODE_OTHER_LETTER:
+    case G_UNICODE_NON_SPACING_MARK:
+    case G_UNICODE_ENCLOSING_MARK:
+	/* XXX not sure what the glib equivalent of Xapian::Unicode::COMBINING_SPACING_MARK
+	   case G_UNICODE_COMBINING_SPACING_MARK:
+	*/
+    case G_UNICODE_DECIMAL_NUMBER:
+    case G_UNICODE_LETTER_NUMBER:
+    case G_UNICODE_OTHER_NUMBER:
+    case G_UNICODE_CONNECT_PUNCTUATION:
+	return true;
+    default:
+	return false;
+    }
+}
+
+bool
+unicode_word_utf8 (const char *utf8_str)
+{
+    gunichar *decoded=g_utf8_to_ucs4_fast (utf8_str, -1, NULL);
+    const gunichar *p = decoded;
+    bool ret;
+
+    while (*p && unicode_is_wordchar (*p))
+	p++;
+
+    ret =  (*p == '\0');
+
+    g_free (decoded);
+    return ret;
+}
diff --git a/util/unicode-util.h b/util/unicode-util.h
new file mode 100644
index 00000000..32d1e6ef
--- /dev/null
+++ b/util/unicode-util.h
@@ -0,0 +1,12 @@
+#ifndef UNICODE_UTIL_H
+#define UNICODE_UTIL_H
+
+#include <stdbool.h>
+#include <gmodule.h>
+
+/* The utf8 encoded string would tokenize as a single word, according
+ * to xapian. */
+bool unicode_word_utf8 (const char *str);
+typedef gunichar notmuch_unichar;
+
+#endif
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 2/9] cli/config: refactor _stored_in_db
  2019-03-27 11:16 Index user defined headers David Bremner
  2019-03-27 11:16 ` [PATCH 1/9] util: add unicode_word_utf8 David Bremner
@ 2019-03-27 11:16 ` David Bremner
  2019-03-27 11:16 ` [PATCH 3/9] cli/config: support user header index config David Bremner
                   ` (8 subsequent siblings)
  10 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2019-03-27 11:16 UTC (permalink / raw)
  To: notmuch

This will make it easier to add other prefixes that are stored in the
database, compared to special casing each one as "query." was.
---
 notmuch-config.c | 47 +++++++++++++++++++++++++++++++++++++----------
 1 file changed, 37 insertions(+), 10 deletions(-)

diff --git a/notmuch-config.c b/notmuch-config.c
index bf77cc9d..daecbdac 100644
--- a/notmuch-config.c
+++ b/notmuch-config.c
@@ -821,18 +821,40 @@ _item_split (char *item, char **group, char **key)
 
 #define BUILT_WITH_PREFIX "built_with."
 
+typedef struct config_key {
+    const char *name;
+    bool in_db;
+    bool prefix;
+    bool (*validate)(const char *);
+} config_key_info_t;
+
+static struct config_key
+config_key_table[] = {
+    {"index.decrypt",	true,	false,	NULL},
+    {"query.",		true,	true,	NULL},
+};
+
+static config_key_info_t *
+_config_key_info (const char *item)
+{
+    for (size_t i = 0; i < ARRAY_SIZE (config_key_table); i++) {
+	if (config_key_table[i].prefix &&
+	    strncmp (item, config_key_table[i].name,
+		     strlen(config_key_table[i].name)) == 0)
+	    return config_key_table+i;
+	if (strcmp (item, config_key_table[i].name) == 0)
+	    return config_key_table+i;
+    }
+    return NULL;
+}
+
 static bool
 _stored_in_db (const char *item)
 {
-    const char * db_configs[] = {
-	"index.decrypt",
-    };
-    if (STRNCMP_LITERAL (item, "query.") == 0)
-	return true;
-    for (size_t i = 0; i < ARRAY_SIZE (db_configs); i++)
-	if (strcmp (item, db_configs[i]) == 0)
-	    return true;
-    return false;
+    config_key_info_t *info;
+    info = _config_key_info (item);
+
+    return (info && info->in_db);
 }
 
 static int
@@ -947,13 +969,18 @@ static int
 notmuch_config_command_set (notmuch_config_t *config, char *item, int argc, char *argv[])
 {
     char *group, *key;
+    config_key_info_t *key_info;
 
     if (STRNCMP_LITERAL (item, BUILT_WITH_PREFIX) == 0) {
 	fprintf (stderr, "Error: read only option: %s\n", item);
 	return 1;
     }
 
-    if (_stored_in_db (item)) {
+    key_info = _config_key_info (item);
+    if (key_info && key_info->validate && (! key_info->validate (item)))
+	return 1;
+
+    if (key_info && key_info->in_db) {
 	return _set_db_config (config, item, argc, argv);
     }
 
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 3/9] cli/config: support user header index config
  2019-03-27 11:16 Index user defined headers David Bremner
  2019-03-27 11:16 ` [PATCH 1/9] util: add unicode_word_utf8 David Bremner
  2019-03-27 11:16 ` [PATCH 2/9] cli/config: refactor _stored_in_db David Bremner
@ 2019-03-27 11:16 ` David Bremner
  2019-03-27 11:16 ` [PATCH 4/9] cli/config: check syntax of user configured field names David Bremner
                   ` (7 subsequent siblings)
  10 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2019-03-27 11:16 UTC (permalink / raw)
  To: notmuch

We don't do anything with this configuration information information
yet, but nonetheless add a couple of regression tests to make sure we
don't break standard functionality when we do use the configuration
information.
---
 notmuch-config.c         |  1 +
 test/T730-user-header.sh | 43 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+)
 create mode 100755 test/T730-user-header.sh

diff --git a/notmuch-config.c b/notmuch-config.c
index daecbdac..519fb27d 100644
--- a/notmuch-config.c
+++ b/notmuch-config.c
@@ -831,6 +831,7 @@ typedef struct config_key {
 static struct config_key
 config_key_table[] = {
     {"index.decrypt",	true,	false,	NULL},
+    {"index.header.",	true,	true,	NULL},
     {"query.",		true,	true,	NULL},
 };
 
diff --git a/test/T730-user-header.sh b/test/T730-user-header.sh
new file mode 100755
index 00000000..75fb1635
--- /dev/null
+++ b/test/T730-user-header.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+test_description='indexing user specified headers'
+. $(dirname "$0")/test-lib.sh || exit 1
+
+test_begin_subtest "error adding user header before initializing DB"
+notmuch config set index.header.List List-Id 2>&1 | notmuch_dir_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+Error opening database at MAIL_DIR/.notmuch: No such file or directory
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+add_email_corpus
+
+notmuch search '*' | notmuch_search_sanitize > initial-threads
+notmuch search --output=messages '*' > initial-message-ids
+notmuch dump > initial-dump
+
+test_begin_subtest "adding user header"
+test_expect_code 0 "notmuch config set index.header.List \"List-Id\""
+
+test_begin_subtest "adding existing user header"
+test_expect_code 0 "notmuch config set index.header.List \"List-Id\""
+
+
+test_begin_subtest "retrieve user header"
+output=$(notmuch config get index.header.List)
+test_expect_equal "List-Id" "$output"
+
+test_begin_subtest 'reindex after adding header preserves threads'
+notmuch reindex '*'
+notmuch search '*' | notmuch_search_sanitize > OUTPUT
+test_expect_equal_file initial-threads OUTPUT
+
+test_begin_subtest "List all user headers"
+notmuch config set index.header.Spam "X-Spam"
+notmuch config list | grep ^index.header | notmuch_config_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+index.header.List=List-Id
+index.header.Spam=X-Spam
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_done
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 4/9] cli/config: check syntax of user configured field names
  2019-03-27 11:16 Index user defined headers David Bremner
                   ` (2 preceding siblings ...)
  2019-03-27 11:16 ` [PATCH 3/9] cli/config: support user header index config David Bremner
@ 2019-03-27 11:16 ` David Bremner
  2019-03-27 11:16 ` [PATCH 5/9] lib: setup user headers in query parser David Bremner
                   ` (6 subsequent siblings)
  10 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2019-03-27 11:16 UTC (permalink / raw)
  To: notmuch

These restrictions are meant to prevent incompatibilities with the
Xapian query parser (which will split at non-word characters) and
clashes with future notmuch builtin fields.
---
 notmuch-config.c         | 41 +++++++++++++++++++++++++++++++++++++++-
 test/T730-user-header.sh | 30 +++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/notmuch-config.c b/notmuch-config.c
index 519fb27d..07b4c26f 100644
--- a/notmuch-config.c
+++ b/notmuch-config.c
@@ -24,6 +24,8 @@
 #include <netdb.h>
 #include <assert.h>
 
+#include "unicode-util.h"
+
 static const char toplevel_config_comment[] =
     " .notmuch-config - Configuration file for the notmuch mail system\n"
     "\n"
@@ -819,6 +821,43 @@ _item_split (char *item, char **group, char **key)
     return 0;
 }
 
+/* These are more properly called Xapian fields, but the user facing
+   docs call them prefixes, so make the error message match */
+static bool
+validate_field_name (const char *str)
+{
+    const char *key;
+
+    if (! g_utf8_validate (str, -1, NULL)) {
+	fprintf (stderr, "Invalid utf8: %s\n", str);
+	return false;
+    }
+
+    key = g_utf8_strrchr (str, -1, '.');
+    if (! key ) {
+	INTERNAL_ERROR ("Impossible code path on input: %s\n", str);
+    }
+
+    key++;
+
+    if (! *key) {
+	fprintf (stderr, "Empty prefix name: %s\n", str);
+	return false;
+    }
+
+    if (! unicode_word_utf8 (key)) {
+	fprintf (stderr, "Non-word character in prefix name: %s\n", key);
+	return false;
+    }
+
+    if (key[0] >= 'a' && key[0] <= 'z') {
+	fprintf (stderr, "Prefix names starting with lower case letters are reserved: %s\n", key);
+	return false;
+    }
+
+    return true;
+}
+
 #define BUILT_WITH_PREFIX "built_with."
 
 typedef struct config_key {
@@ -831,7 +870,7 @@ typedef struct config_key {
 static struct config_key
 config_key_table[] = {
     {"index.decrypt",	true,	false,	NULL},
-    {"index.header.",	true,	true,	NULL},
+    {"index.header.",	true,	true,	validate_field_name},
     {"query.",		true,	true,	NULL},
 };
 
diff --git a/test/T730-user-header.sh b/test/T730-user-header.sh
index 75fb1635..b97b00b6 100755
--- a/test/T730-user-header.sh
+++ b/test/T730-user-header.sh
@@ -15,6 +15,36 @@ notmuch search '*' | notmuch_search_sanitize > initial-threads
 notmuch search --output=messages '*' > initial-message-ids
 notmuch dump > initial-dump
 
+test_begin_subtest "adding illegal prefix name, bad utf8"
+notmuch config set index.header.$'\xFF' "List-Id" 2>&1 | sed 's/:.*$//' >OUTPUT
+cat <<EOF > EXPECTED
+Invalid utf8
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "adding illegal prefix name, reserved for notmuch"
+notmuch config set index.header.list "List-Id" 2>OUTPUT
+cat <<EOF > EXPECTED
+Prefix names starting with lower case letters are reserved: list
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "adding illegal prefix name, non-word character."
+notmuch config set index.header.l:st "List-Id" 2>OUTPUT
+cat <<EOF > EXPECTED
+Non-word character in prefix name: l:st
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "adding empty prefix name."
+notmuch config set index.header. "List-Id" 2>OUTPUT
+Non-word character in prefix name: l:st
+cat <<EOF > EXPECTED
+Empty prefix name: index.header.
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+
 test_begin_subtest "adding user header"
 test_expect_code 0 "notmuch config set index.header.List \"List-Id\""
 
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 5/9] lib: setup user headers in query parser
  2019-03-27 11:16 Index user defined headers David Bremner
                   ` (3 preceding siblings ...)
  2019-03-27 11:16 ` [PATCH 4/9] cli/config: check syntax of user configured field names David Bremner
@ 2019-03-27 11:16 ` David Bremner
  2019-03-27 11:16 ` [PATCH 6/9] lib: cache user prefixes in database object David Bremner
                   ` (5 subsequent siblings)
  10 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2019-03-27 11:16 UTC (permalink / raw)
  To: notmuch

These tests will need to be updated if the Xapian
query print/debug format changes.
---
 lib/database.cc          | 35 +++++++++++++++++++++++++++++++++++
 test/T730-user-header.sh | 21 +++++++++++++++++++++
 2 files changed, 56 insertions(+)

diff --git a/lib/database.cc b/lib/database.cc
index 9cf8062c..4de79f79 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -299,6 +299,8 @@ prefix_t prefix_table[] = {
 						NOTMUCH_FIELD_PROCESSOR},
 };
 
+#define CONFIG_HEADER_PREFIX "index.header."
+
 static void
 _setup_query_field_default (const prefix_t *prefix, notmuch_database_t *notmuch)
 {
@@ -308,6 +310,38 @@ _setup_query_field_default (const prefix_t *prefix, notmuch_database_t *notmuch)
 	notmuch->query_parser->add_boolean_prefix (prefix->name, prefix->prefix);
 }
 
+static notmuch_status_t
+_setup_user_query_fields (notmuch_database_t *notmuch)
+{
+    notmuch_config_list_t *list;
+    notmuch_status_t status;
+
+    status = notmuch_database_get_config_list (notmuch, CONFIG_HEADER_PREFIX, &list);
+    if (status)
+	return status;
+    for (; notmuch_config_list_valid (list); notmuch_config_list_move_to_next (list)) {
+
+	prefix_t query_field { .name = NULL, .prefix = NULL,
+		.flags = NOTMUCH_FIELD_PROBABILISTIC |
+			 NOTMUCH_FIELD_EXTERNAL
+		};
+
+	const char *key = notmuch_config_list_key (list)
+	    + sizeof (CONFIG_HEADER_PREFIX) - 1;
+
+	char *prefix = talloc_asprintf(notmuch, "XU%s:", key);
+
+	query_field.name = key;
+	query_field.prefix = prefix;
+
+	_setup_query_field_default (&query_field, notmuch);
+    }
+
+    notmuch_config_list_destroy (list);
+
+    return NOTMUCH_STATUS_SUCCESS;
+}
+
 #if HAVE_XAPIAN_FIELD_PROCESSOR
 static void
 _setup_query_field (const prefix_t *prefix, notmuch_database_t *notmuch)
@@ -965,6 +999,7 @@ notmuch_database_open_verbose (const char *path,
 		_setup_query_field (prefix, notmuch);
 	    }
 	}
+	status = _setup_user_query_fields (notmuch);
     } catch (const Xapian::Error &error) {
 	IGNORE_RESULT (asprintf (&message, "A Xapian exception occurred opening database: %s\n",
 				 error.get_msg().c_str()));
diff --git a/test/T730-user-header.sh b/test/T730-user-header.sh
index b97b00b6..2d6cc60b 100755
--- a/test/T730-user-header.sh
+++ b/test/T730-user-header.sh
@@ -70,4 +70,25 @@ index.header.Spam=X-Spam
 EOF
 test_expect_equal_file EXPECTED OUTPUT
 
+test_begin_subtest "parse user prefix"
+NOTMUCH_DEBUG_QUERY=t notmuch count 'List:"notmuch"' 2>&1 | grep Tmail >OUTPUT
+cat <<EOF > EXPECTED
+Query((Tmail AND XUList:notmuch@1))
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "parse user prefix, stemmed"
+NOTMUCH_DEBUG_QUERY=t notmuch count 'List:notmuch' 2>&1 | grep Tmail >OUTPUT
+cat <<EOF > EXPECTED
+Query((Tmail AND ZXUList:notmuch@1))
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "parse user prefix, phrase"
+NOTMUCH_DEBUG_QUERY=t notmuch count 'List:notmuchmail.org' 2>&1 | grep Tmail >OUTPUT
+cat <<EOF > EXPECTED
+Query((Tmail AND (XUList:notmuchmail@1 PHRASE 2 XUList:org@2)))
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
 test_done
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 6/9] lib: cache user prefixes in database object
  2019-03-27 11:16 Index user defined headers David Bremner
                   ` (4 preceding siblings ...)
  2019-03-27 11:16 ` [PATCH 5/9] lib: setup user headers in query parser David Bremner
@ 2019-03-27 11:16 ` David Bremner
  2019-03-27 11:16 ` [PATCH 7/9] lib: support user prefix names in term generation David Bremner
                   ` (4 subsequent siblings)
  10 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2019-03-27 11:16 UTC (permalink / raw)
  To: notmuch

This will be used to avoid needing a database access to resolve a db
prefix from the corresponding UI prefix (e.g. when indexing). Arguably
the setup of the seperate header map does not belong here, since it is
about indexing rather than querying, but we currently don't have any
other indexing setup to do.
---
 lib/database-private.h |  5 +++++
 lib/database.cc        | 39 ++++++++++++++++++++++++++++-----------
 lib/notmuch-private.h  |  9 +++++++++
 lib/thread.cc          |  2 --
 4 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/lib/database-private.h b/lib/database-private.h
index a499b259..57fddada 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -215,6 +215,11 @@ struct _notmuch_database {
     Xapian::ValueRangeProcessor *value_range_processor;
     Xapian::ValueRangeProcessor *date_range_processor;
     Xapian::ValueRangeProcessor *last_mod_range_processor;
+
+    /* XXX it's slightly gross to use two parallel string->string maps
+     * here, but at least they are small */
+    notmuch_string_map_t *user_prefix;
+    notmuch_string_map_t *user_header;
 };
 
 /* Prior to database version 3, features were implied by the database
diff --git a/lib/database.cc b/lib/database.cc
index 4de79f79..6caa1311 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -299,8 +299,6 @@ prefix_t prefix_table[] = {
 						NOTMUCH_FIELD_PROCESSOR},
 };
 
-#define CONFIG_HEADER_PREFIX "index.header."
-
 static void
 _setup_query_field_default (const prefix_t *prefix, notmuch_database_t *notmuch)
 {
@@ -310,29 +308,48 @@ _setup_query_field_default (const prefix_t *prefix, notmuch_database_t *notmuch)
 	notmuch->query_parser->add_boolean_prefix (prefix->name, prefix->prefix);
 }
 
+const char *
+_user_prefix (void *ctx, const char* name)
+{
+    return talloc_asprintf(ctx, "XU%s:", name);
+}
+
 static notmuch_status_t
 _setup_user_query_fields (notmuch_database_t *notmuch)
 {
     notmuch_config_list_t *list;
     notmuch_status_t status;
 
+    notmuch->user_prefix = _notmuch_string_map_create (notmuch);
+    if (notmuch->user_prefix == NULL)
+	return NOTMUCH_STATUS_OUT_OF_MEMORY;
+
+    notmuch->user_header = _notmuch_string_map_create (notmuch);
+    if (notmuch->user_header == NULL)
+	return NOTMUCH_STATUS_OUT_OF_MEMORY;
+
     status = notmuch_database_get_config_list (notmuch, CONFIG_HEADER_PREFIX, &list);
     if (status)
 	return status;
+
     for (; notmuch_config_list_valid (list); notmuch_config_list_move_to_next (list)) {
 
-	prefix_t query_field { .name = NULL, .prefix = NULL,
-		.flags = NOTMUCH_FIELD_PROBABILISTIC |
-			 NOTMUCH_FIELD_EXTERNAL
-		};
+	prefix_t query_field;
+
+	const char *key = notmuch_config_list_key (list) +
+	    	    + sizeof (CONFIG_HEADER_PREFIX) - 1;
 
-	const char *key = notmuch_config_list_key (list)
-	    + sizeof (CONFIG_HEADER_PREFIX) - 1;
+	_notmuch_string_map_append (notmuch->user_prefix,
+				    key,
+				    _user_prefix (notmuch, key));
 
-	char *prefix = talloc_asprintf(notmuch, "XU%s:", key);
+	_notmuch_string_map_append (notmuch->user_header,
+				    key,
+				    notmuch_config_list_value (list));
 
-	query_field.name = key;
-	query_field.prefix = prefix;
+	query_field.name = talloc_strdup(notmuch, key);
+	query_field.prefix = _user_prefix(notmuch, key);
+	query_field.flags = NOTMUCH_FIELD_PROBABILISTIC | NOTMUCH_FIELD_EXTERNAL;
 
 	_setup_query_field_default (&query_field, notmuch);
     }
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index df32d39c..1ef26e37 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -181,6 +181,11 @@ typedef struct _notmuch_doc_id_set notmuch_doc_id_set_t;
 const char *
 _find_prefix (const char *name);
 
+/* Lookup a prefix value by name, including possibly user defined prefixes
+ */
+const char *
+_notmuch_database_prefix (notmuch_database_t  *notmuch, const char *name);
+
 char *
 _notmuch_message_id_compressed (void *ctx, const char *message_id);
 
@@ -676,6 +681,10 @@ struct _notmuch_indexopts {
     _notmuch_crypto_t crypto;
 };
 
+#define CONFIG_HEADER_PREFIX "index.header."
+
+#define EMPTY_STRING(s) ((s)[0] == '\0')
+
 NOTMUCH_END_DECLS
 
 #ifdef __cplusplus
diff --git a/lib/thread.cc b/lib/thread.cc
index 47c90664..ae830064 100644
--- a/lib/thread.cc
+++ b/lib/thread.cc
@@ -30,8 +30,6 @@
 #define THREAD_DEBUG(format, ...) do {} while (0) /* ignored */
 #endif
 
-#define EMPTY_STRING(s) ((s)[0] == '\0')
-
 struct _notmuch_thread {
     notmuch_database_t *notmuch;
     char *thread_id;
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 7/9] lib: support user prefix names in term generation
  2019-03-27 11:16 Index user defined headers David Bremner
                   ` (5 preceding siblings ...)
  2019-03-27 11:16 ` [PATCH 6/9] lib: cache user prefixes in database object David Bremner
@ 2019-03-27 11:16 ` David Bremner
  2019-03-27 11:16 ` [PATCH 8/9] lib/database: index user headers David Bremner
                   ` (3 subsequent siblings)
  10 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2019-03-27 11:16 UTC (permalink / raw)
  To: notmuch

This should not change the indexing process yet as nothing calls
_notmuch_message_gen_terms with a user prefix name. On the other hand,
it should not break anything either.
---
 lib/database.cc       | 20 ++++++++++++++++++++
 lib/message.cc        |  5 ++++-
 lib/notmuch-private.h |  1 +
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/lib/database.cc b/lib/database.cc
index 6caa1311..19aff0e8 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -405,6 +405,26 @@ _find_prefix (const char *name)
     return "";
 }
 
+/* Like find prefix, but include the possibility of user defined
+ * prefixes specific to this database */
+
+const char *
+_notmuch_database_prefix (notmuch_database_t *notmuch, const char *name)
+{
+    unsigned int i;
+
+    /*XXX TODO: reduce code duplication */
+    for (i = 0; i < ARRAY_SIZE (prefix_table); i++) {
+	if (strcmp (name, prefix_table[i].name) == 0)
+	    return prefix_table[i].prefix;
+    }
+
+    if (notmuch->user_prefix)
+	return _notmuch_string_map_get (notmuch->user_prefix, name);
+
+    return NULL;
+}
+
 static const struct {
     /* NOTMUCH_FEATURE_* value. */
     _notmuch_features value;
diff --git a/lib/message.cc b/lib/message.cc
index 6f2f6345..bbc5ef99 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -1434,7 +1434,10 @@ _notmuch_message_gen_terms (notmuch_message_t *message,
     term_gen->set_document (message->doc);
 
     if (prefix_name) {
-	const char *prefix = _find_prefix (prefix_name);
+	const char *prefix = _notmuch_database_prefix (message->notmuch, prefix_name);
+
+	if (prefix == NULL)
+	    return NOTMUCH_PRIVATE_STATUS_BAD_PREFIX;
 
 	term_gen->set_termpos (message->termpos);
 	term_gen->index_text (text, 1, prefix);
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 1ef26e37..cf08411e 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -136,6 +136,7 @@ typedef enum _notmuch_private_status {
     /* Then add our own private values. */
     NOTMUCH_PRIVATE_STATUS_TERM_TOO_LONG = NOTMUCH_STATUS_LAST_STATUS,
     NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND,
+    NOTMUCH_PRIVATE_STATUS_BAD_PREFIX,
 
     NOTMUCH_PRIVATE_STATUS_LAST_STATUS
 } notmuch_private_status_t;
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 8/9] lib/database: index user headers.
  2019-03-27 11:16 Index user defined headers David Bremner
                   ` (6 preceding siblings ...)
  2019-03-27 11:16 ` [PATCH 7/9] lib: support user prefix names in term generation David Bremner
@ 2019-03-27 11:16 ` David Bremner
  2019-03-27 11:16 ` [PATCH 9/9] doc: document user header indexing David Bremner
                   ` (2 subsequent siblings)
  10 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2019-03-27 11:16 UTC (permalink / raw)
  To: notmuch

There is a O(log(#user headers)) penalty for the second lookup of the
prefix name.
---
 lib/database.cc          |  6 ++++++
 lib/index.cc             | 28 ++++++++++++++++++++++++++++
 lib/notmuch-private.h    |  5 +++++
 test/T730-user-header.sh | 17 +++++++++++++++++
 4 files changed, 56 insertions(+)

diff --git a/lib/database.cc b/lib/database.cc
index 19aff0e8..2219a76a 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -308,6 +308,12 @@ _setup_query_field_default (const prefix_t *prefix, notmuch_database_t *notmuch)
 	notmuch->query_parser->add_boolean_prefix (prefix->name, prefix->prefix);
 }
 
+notmuch_string_map_iterator_t *
+_notmuch_database_user_headers (notmuch_database_t *notmuch)
+{
+    return _notmuch_string_map_iterator_create (notmuch->user_header, "", false);
+}
+
 const char *
 _user_prefix (void *ctx, const char* name)
 {
diff --git a/lib/index.cc b/lib/index.cc
index efd9da4c..ef3369fd 100644
--- a/lib/index.cc
+++ b/lib/index.cc
@@ -595,6 +595,32 @@ _index_encrypted_mime_part (notmuch_message_t *message,
 
 }
 
+static notmuch_status_t
+_notmuch_message_index_user_headers (notmuch_message_t *message, GMimeMessage *mime_message)
+{
+
+    notmuch_database_t *notmuch = notmuch_message_get_database (message);
+    notmuch_string_map_iterator_t *iter = _notmuch_database_user_headers (notmuch);
+
+    for (; _notmuch_string_map_iterator_valid (iter);
+	 _notmuch_string_map_iterator_move_to_next (iter)) {
+
+	const char *prefix_name = _notmuch_string_map_iterator_key (iter);
+
+	const char *header_name = _notmuch_string_map_iterator_value (iter);
+
+	const char *header = g_mime_object_get_header (GMIME_OBJECT (mime_message), header_name);
+	if (header)
+	    _notmuch_message_gen_terms (message, prefix_name, header);
+    }
+
+    if (iter)
+	_notmuch_string_map_iterator_destroy (iter);
+    return NOTMUCH_STATUS_SUCCESS;
+
+}
+
+
 notmuch_status_t
 _notmuch_message_index_file (notmuch_message_t *message,
 			     notmuch_indexopts_t *indexopts,
@@ -625,6 +651,8 @@ _notmuch_message_index_file (notmuch_message_t *message,
     subject = g_mime_message_get_subject (mime_message);
     _notmuch_message_gen_terms (message, "subject", subject);
 
+    status = _notmuch_message_index_user_headers (message, mime_message);
+
     _index_mime_part (message, indexopts, g_mime_message_get_mime_part (mime_message));
 
     return NOTMUCH_STATUS_SUCCESS;
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index cf08411e..e46df9a8 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -652,6 +652,11 @@ _notmuch_string_map_iterator_value (notmuch_string_map_iterator_t *iterator);
 void
 _notmuch_string_map_iterator_destroy (notmuch_string_map_iterator_t *iterator);
 
+/* Create an iterator for user headers. Destroy with
+ * _notmuch_string_map_iterator_destroy. Actually in database.cc*/
+notmuch_string_map_iterator_t *
+_notmuch_database_user_headers (notmuch_database_t *notmuch);
+
 /* tags.c */
 
 notmuch_tags_t *
diff --git a/test/T730-user-header.sh b/test/T730-user-header.sh
index 2d6cc60b..204c052a 100755
--- a/test/T730-user-header.sh
+++ b/test/T730-user-header.sh
@@ -91,4 +91,21 @@ Query((Tmail AND (XUList:notmuchmail@1 PHRASE 2 XUList:org@2)))
 EOF
 test_expect_equal_file EXPECTED OUTPUT
 
+test_begin_subtest "index user header"
+notmuch config set index.header.List "List-Id"
+notmuch reindex '*'
+notmuch search --output=files List:notmuch | notmuch_search_files_sanitize | sort > OUTPUT
+cat <<EOF > EXPECTED
+MAIL_DIR/bar/baz/05:2,
+MAIL_DIR/bar/baz/23:2,
+MAIL_DIR/bar/baz/24:2,
+MAIL_DIR/bar/cur/20:2,
+MAIL_DIR/bar/new/21:2,
+MAIL_DIR/bar/new/22:2,
+MAIL_DIR/foo/cur/08:2,
+MAIL_DIR/foo/new/03:2,
+MAIL_DIR/new/04:2,
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
 test_done
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 9/9] doc: document user header indexing.
  2019-03-27 11:16 Index user defined headers David Bremner
                   ` (7 preceding siblings ...)
  2019-03-27 11:16 ` [PATCH 8/9] lib/database: index user headers David Bremner
@ 2019-03-27 11:16 ` David Bremner
  2019-04-26 11:15 ` Index user defined headers David Bremner
  2019-04-28 23:10 ` Index user defined headers v2 David Bremner
  10 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2019-03-27 11:16 UTC (permalink / raw)
  To: notmuch

It's a bit odd that the primary documentation is in notmuch-config,
but it is consistent with the "query:" prefix.
---
 doc/man1/notmuch-config.rst       | 9 +++++++++
 doc/man7/notmuch-search-terms.rst | 3 +++
 2 files changed, 12 insertions(+)

diff --git a/doc/man1/notmuch-config.rst b/doc/man1/notmuch-config.rst
index 89909808..28487079 100644
--- a/doc/man1/notmuch-config.rst
+++ b/doc/man1/notmuch-config.rst
@@ -195,6 +195,15 @@ The available configuration items are described below.
 
     Default: ``auto``.
 
+**index.header.<prefix>** **[STORED IN DATABASE]**
+    Define the query prefix <prefix>, based on a mail header. For
+    example ``index.header.List=List-Id`` will add a probabilistic
+    prefix ``List:`` that searches the ``List-Id`` field.  User
+    defined prefixes must not start with 'a'...'z'; in particular
+    adding a prefix with same name as a predefined prefix is not
+    supported. See **notmuch-search-terms(7)** for a list of existing
+    prefixes, and an explanation of probabilistic prefixes.
+
 **built_with.<name>**
     Compile time feature <name>. Current possibilities include
     "compact" (see **notmuch-compact(1)**) and "field_processor" (see
diff --git a/doc/man7/notmuch-search-terms.rst b/doc/man7/notmuch-search-terms.rst
index f7a39ceb..36ba804b 100644
--- a/doc/man7/notmuch-search-terms.rst
+++ b/doc/man7/notmuch-search-terms.rst
@@ -166,6 +166,9 @@ property:<key>=<value>
     can be present on a given message with several different values.
     See **notmuch-properties(7)** for more details.
 
+User defined prefixes are also supported, see **notmuch-config(1)** for
+details.
+
 Operators
 ---------
 
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: Index user defined headers
  2019-03-27 11:16 Index user defined headers David Bremner
                   ` (8 preceding siblings ...)
  2019-03-27 11:16 ` [PATCH 9/9] doc: document user header indexing David Bremner
@ 2019-04-26 11:15 ` David Bremner
  2019-05-25 10:38   ` David Bremner
  2019-04-28 23:10 ` Index user defined headers v2 David Bremner
  10 siblings, 1 reply; 22+ messages in thread
From: David Bremner @ 2019-04-26 11:15 UTC (permalink / raw)
  To: notmuch

David Bremner <david@tethera.net> writes:

> This obsoletes [1]. Compared to the previous version the main change
> is that it imposes the restriction that user defined prefixes may not
> start with [a-z], and must consist of "unicode word characters". This
> assumes a utf8 input encoding. People that don't like utf8 are welcome
> to use ASCII :P
>
> [1]: id:20190302154133.25642-1-david@tethera.net
> [2]: https://salsa.debian.org/bremner/notmuch/commits/wip/user-headers

Last call for review or other feedback. I'll merge this in the next week
if I don't hear anything.

d

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Index user defined headers v2
  2019-03-27 11:16 Index user defined headers David Bremner
                   ` (9 preceding siblings ...)
  2019-04-26 11:15 ` Index user defined headers David Bremner
@ 2019-04-28 23:10 ` David Bremner
  2019-04-28 23:10   ` [PATCH 1/9] util: add unicode_word_utf8 David Bremner
                     ` (8 more replies)
  10 siblings, 9 replies; 22+ messages in thread
From: David Bremner @ 2019-04-28 23:10 UTC (permalink / raw)
  To: David Bremner, notmuch

It turned out to be not-completely-trivial to rebase onto master, so I
decided to post the rebased patches for potential review. I also did a
bunch of cosmetic changes, but other than not breaking body:
searching, there are no functionality changes since [1]

[1] id:20190327111627.15903-1-david@tethera.net

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 1/9] util: add unicode_word_utf8
  2019-04-28 23:10 ` Index user defined headers v2 David Bremner
@ 2019-04-28 23:10   ` David Bremner
  2019-04-28 23:10   ` [PATCH 2/9] cli/config: refactor _stored_in_db David Bremner
                     ` (7 subsequent siblings)
  8 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2019-04-28 23:10 UTC (permalink / raw)
  To: David Bremner, notmuch

This originally use Xapian::Unicode::is_wordchar, but that forces
clients to link directly to libxapian, which seems like it might be
busywork if nothing else.
---
 util/Makefile.local |  3 ++-
 util/unicode-util.c | 43 +++++++++++++++++++++++++++++++++++++++++++
 util/unicode-util.h | 12 ++++++++++++
 3 files changed, 57 insertions(+), 1 deletion(-)
 create mode 100644 util/unicode-util.c
 create mode 100644 util/unicode-util.h

diff --git a/util/Makefile.local b/util/Makefile.local
index ba03230e..46f8af3a 100644
--- a/util/Makefile.local
+++ b/util/Makefile.local
@@ -5,7 +5,8 @@ extra_cflags += -I$(srcdir)/$(dir)
 
 libnotmuch_util_c_srcs := $(dir)/xutil.c $(dir)/error_util.c $(dir)/hex-escape.c \
 		  $(dir)/string-util.c $(dir)/talloc-extra.c $(dir)/zlib-extra.c \
-		$(dir)/util.c $(dir)/gmime-extra.c $(dir)/crypto.c
+		$(dir)/util.c $(dir)/gmime-extra.c $(dir)/crypto.c \
+		$(dir)/unicode-util.c
 
 libnotmuch_util_modules := $(libnotmuch_util_c_srcs:.c=.o)
 
diff --git a/util/unicode-util.c b/util/unicode-util.c
new file mode 100644
index 00000000..28ce6001
--- /dev/null
+++ b/util/unicode-util.c
@@ -0,0 +1,43 @@
+#include "unicode-util.h"
+
+/* Based on Xapian::Unicode::is_wordchar, to avoid forcing clients to
+   link directly to libxapian.
+*/
+
+static bool
+unicode_is_wordchar (notmuch_unichar ch)
+{
+    switch (g_unichar_type (ch)) {
+    case G_UNICODE_UPPERCASE_LETTER:
+    case G_UNICODE_LOWERCASE_LETTER:
+    case G_UNICODE_TITLECASE_LETTER:
+    case G_UNICODE_MODIFIER_LETTER:
+    case G_UNICODE_OTHER_LETTER:
+    case G_UNICODE_NON_SPACING_MARK:
+    case G_UNICODE_ENCLOSING_MARK:
+    case G_UNICODE_SPACING_MARK:
+    case G_UNICODE_DECIMAL_NUMBER:
+    case G_UNICODE_LETTER_NUMBER:
+    case G_UNICODE_OTHER_NUMBER:
+    case G_UNICODE_CONNECT_PUNCTUATION:
+	return true;
+    default:
+	return false;
+    }
+}
+
+bool
+unicode_word_utf8 (const char *utf8_str)
+{
+    gunichar *decoded=g_utf8_to_ucs4_fast (utf8_str, -1, NULL);
+    const gunichar *p = decoded;
+    bool ret;
+
+    while (*p && unicode_is_wordchar (*p))
+	p++;
+
+    ret =  (*p == '\0');
+
+    g_free (decoded);
+    return ret;
+}
diff --git a/util/unicode-util.h b/util/unicode-util.h
new file mode 100644
index 00000000..32d1e6ef
--- /dev/null
+++ b/util/unicode-util.h
@@ -0,0 +1,12 @@
+#ifndef UNICODE_UTIL_H
+#define UNICODE_UTIL_H
+
+#include <stdbool.h>
+#include <gmodule.h>
+
+/* The utf8 encoded string would tokenize as a single word, according
+ * to xapian. */
+bool unicode_word_utf8 (const char *str);
+typedef gunichar notmuch_unichar;
+
+#endif
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 2/9] cli/config: refactor _stored_in_db
  2019-04-28 23:10 ` Index user defined headers v2 David Bremner
  2019-04-28 23:10   ` [PATCH 1/9] util: add unicode_word_utf8 David Bremner
@ 2019-04-28 23:10   ` David Bremner
  2019-04-28 23:10   ` [PATCH 3/9] cli/config: support user header index config David Bremner
                     ` (6 subsequent siblings)
  8 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2019-04-28 23:10 UTC (permalink / raw)
  To: David Bremner, notmuch

This will make it easier to add other prefixes that are stored in the
database, compared to special casing each one as "query." was.
---
 notmuch-config.c | 47 +++++++++++++++++++++++++++++++++++++----------
 1 file changed, 37 insertions(+), 10 deletions(-)

diff --git a/notmuch-config.c b/notmuch-config.c
index bf77cc9d..daecbdac 100644
--- a/notmuch-config.c
+++ b/notmuch-config.c
@@ -821,18 +821,40 @@ _item_split (char *item, char **group, char **key)
 
 #define BUILT_WITH_PREFIX "built_with."
 
+typedef struct config_key {
+    const char *name;
+    bool in_db;
+    bool prefix;
+    bool (*validate)(const char *);
+} config_key_info_t;
+
+static struct config_key
+config_key_table[] = {
+    {"index.decrypt",	true,	false,	NULL},
+    {"query.",		true,	true,	NULL},
+};
+
+static config_key_info_t *
+_config_key_info (const char *item)
+{
+    for (size_t i = 0; i < ARRAY_SIZE (config_key_table); i++) {
+	if (config_key_table[i].prefix &&
+	    strncmp (item, config_key_table[i].name,
+		     strlen(config_key_table[i].name)) == 0)
+	    return config_key_table+i;
+	if (strcmp (item, config_key_table[i].name) == 0)
+	    return config_key_table+i;
+    }
+    return NULL;
+}
+
 static bool
 _stored_in_db (const char *item)
 {
-    const char * db_configs[] = {
-	"index.decrypt",
-    };
-    if (STRNCMP_LITERAL (item, "query.") == 0)
-	return true;
-    for (size_t i = 0; i < ARRAY_SIZE (db_configs); i++)
-	if (strcmp (item, db_configs[i]) == 0)
-	    return true;
-    return false;
+    config_key_info_t *info;
+    info = _config_key_info (item);
+
+    return (info && info->in_db);
 }
 
 static int
@@ -947,13 +969,18 @@ static int
 notmuch_config_command_set (notmuch_config_t *config, char *item, int argc, char *argv[])
 {
     char *group, *key;
+    config_key_info_t *key_info;
 
     if (STRNCMP_LITERAL (item, BUILT_WITH_PREFIX) == 0) {
 	fprintf (stderr, "Error: read only option: %s\n", item);
 	return 1;
     }
 
-    if (_stored_in_db (item)) {
+    key_info = _config_key_info (item);
+    if (key_info && key_info->validate && (! key_info->validate (item)))
+	return 1;
+
+    if (key_info && key_info->in_db) {
 	return _set_db_config (config, item, argc, argv);
     }
 
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 3/9] cli/config: support user header index config
  2019-04-28 23:10 ` Index user defined headers v2 David Bremner
  2019-04-28 23:10   ` [PATCH 1/9] util: add unicode_word_utf8 David Bremner
  2019-04-28 23:10   ` [PATCH 2/9] cli/config: refactor _stored_in_db David Bremner
@ 2019-04-28 23:10   ` David Bremner
  2019-04-28 23:10   ` [PATCH 4/9] cli/config: check syntax of user configured field names David Bremner
                     ` (5 subsequent siblings)
  8 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2019-04-28 23:10 UTC (permalink / raw)
  To: David Bremner, notmuch

We don't do anything with this configuration information information
yet, but nonetheless add a couple of regression tests to make sure we
don't break standard functionality when we do use the configuration
information.
---
 notmuch-config.c         |  1 +
 test/T750-user-header.sh | 43 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+)
 create mode 100755 test/T750-user-header.sh

diff --git a/notmuch-config.c b/notmuch-config.c
index daecbdac..519fb27d 100644
--- a/notmuch-config.c
+++ b/notmuch-config.c
@@ -831,6 +831,7 @@ typedef struct config_key {
 static struct config_key
 config_key_table[] = {
     {"index.decrypt",	true,	false,	NULL},
+    {"index.header.",	true,	true,	NULL},
     {"query.",		true,	true,	NULL},
 };
 
diff --git a/test/T750-user-header.sh b/test/T750-user-header.sh
new file mode 100755
index 00000000..75fb1635
--- /dev/null
+++ b/test/T750-user-header.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+test_description='indexing user specified headers'
+. $(dirname "$0")/test-lib.sh || exit 1
+
+test_begin_subtest "error adding user header before initializing DB"
+notmuch config set index.header.List List-Id 2>&1 | notmuch_dir_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+Error opening database at MAIL_DIR/.notmuch: No such file or directory
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+add_email_corpus
+
+notmuch search '*' | notmuch_search_sanitize > initial-threads
+notmuch search --output=messages '*' > initial-message-ids
+notmuch dump > initial-dump
+
+test_begin_subtest "adding user header"
+test_expect_code 0 "notmuch config set index.header.List \"List-Id\""
+
+test_begin_subtest "adding existing user header"
+test_expect_code 0 "notmuch config set index.header.List \"List-Id\""
+
+
+test_begin_subtest "retrieve user header"
+output=$(notmuch config get index.header.List)
+test_expect_equal "List-Id" "$output"
+
+test_begin_subtest 'reindex after adding header preserves threads'
+notmuch reindex '*'
+notmuch search '*' | notmuch_search_sanitize > OUTPUT
+test_expect_equal_file initial-threads OUTPUT
+
+test_begin_subtest "List all user headers"
+notmuch config set index.header.Spam "X-Spam"
+notmuch config list | grep ^index.header | notmuch_config_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+index.header.List=List-Id
+index.header.Spam=X-Spam
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_done
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 4/9] cli/config: check syntax of user configured field names
  2019-04-28 23:10 ` Index user defined headers v2 David Bremner
                     ` (2 preceding siblings ...)
  2019-04-28 23:10   ` [PATCH 3/9] cli/config: support user header index config David Bremner
@ 2019-04-28 23:10   ` David Bremner
  2019-04-28 23:10   ` [PATCH 5/9] lib: setup user headers in query parser David Bremner
                     ` (4 subsequent siblings)
  8 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2019-04-28 23:10 UTC (permalink / raw)
  To: David Bremner, notmuch

These restrictions are meant to prevent incompatibilities with the
Xapian query parser (which will split at non-word characters) and
clashes with future notmuch builtin fields.
---
 notmuch-config.c         | 41 +++++++++++++++++++++++++++++++++++++++-
 test/T750-user-header.sh | 30 +++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/notmuch-config.c b/notmuch-config.c
index 519fb27d..07b4c26f 100644
--- a/notmuch-config.c
+++ b/notmuch-config.c
@@ -24,6 +24,8 @@
 #include <netdb.h>
 #include <assert.h>
 
+#include "unicode-util.h"
+
 static const char toplevel_config_comment[] =
     " .notmuch-config - Configuration file for the notmuch mail system\n"
     "\n"
@@ -819,6 +821,43 @@ _item_split (char *item, char **group, char **key)
     return 0;
 }
 
+/* These are more properly called Xapian fields, but the user facing
+   docs call them prefixes, so make the error message match */
+static bool
+validate_field_name (const char *str)
+{
+    const char *key;
+
+    if (! g_utf8_validate (str, -1, NULL)) {
+	fprintf (stderr, "Invalid utf8: %s\n", str);
+	return false;
+    }
+
+    key = g_utf8_strrchr (str, -1, '.');
+    if (! key ) {
+	INTERNAL_ERROR ("Impossible code path on input: %s\n", str);
+    }
+
+    key++;
+
+    if (! *key) {
+	fprintf (stderr, "Empty prefix name: %s\n", str);
+	return false;
+    }
+
+    if (! unicode_word_utf8 (key)) {
+	fprintf (stderr, "Non-word character in prefix name: %s\n", key);
+	return false;
+    }
+
+    if (key[0] >= 'a' && key[0] <= 'z') {
+	fprintf (stderr, "Prefix names starting with lower case letters are reserved: %s\n", key);
+	return false;
+    }
+
+    return true;
+}
+
 #define BUILT_WITH_PREFIX "built_with."
 
 typedef struct config_key {
@@ -831,7 +870,7 @@ typedef struct config_key {
 static struct config_key
 config_key_table[] = {
     {"index.decrypt",	true,	false,	NULL},
-    {"index.header.",	true,	true,	NULL},
+    {"index.header.",	true,	true,	validate_field_name},
     {"query.",		true,	true,	NULL},
 };
 
diff --git a/test/T750-user-header.sh b/test/T750-user-header.sh
index 75fb1635..b97b00b6 100755
--- a/test/T750-user-header.sh
+++ b/test/T750-user-header.sh
@@ -15,6 +15,36 @@ notmuch search '*' | notmuch_search_sanitize > initial-threads
 notmuch search --output=messages '*' > initial-message-ids
 notmuch dump > initial-dump
 
+test_begin_subtest "adding illegal prefix name, bad utf8"
+notmuch config set index.header.$'\xFF' "List-Id" 2>&1 | sed 's/:.*$//' >OUTPUT
+cat <<EOF > EXPECTED
+Invalid utf8
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "adding illegal prefix name, reserved for notmuch"
+notmuch config set index.header.list "List-Id" 2>OUTPUT
+cat <<EOF > EXPECTED
+Prefix names starting with lower case letters are reserved: list
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "adding illegal prefix name, non-word character."
+notmuch config set index.header.l:st "List-Id" 2>OUTPUT
+cat <<EOF > EXPECTED
+Non-word character in prefix name: l:st
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "adding empty prefix name."
+notmuch config set index.header. "List-Id" 2>OUTPUT
+Non-word character in prefix name: l:st
+cat <<EOF > EXPECTED
+Empty prefix name: index.header.
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+
 test_begin_subtest "adding user header"
 test_expect_code 0 "notmuch config set index.header.List \"List-Id\""
 
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 5/9] lib: setup user headers in query parser
  2019-04-28 23:10 ` Index user defined headers v2 David Bremner
                     ` (3 preceding siblings ...)
  2019-04-28 23:10   ` [PATCH 4/9] cli/config: check syntax of user configured field names David Bremner
@ 2019-04-28 23:10   ` David Bremner
  2019-04-28 23:10   ` [PATCH 6/9] lib: cache user prefixes in database object David Bremner
                     ` (3 subsequent siblings)
  8 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2019-04-28 23:10 UTC (permalink / raw)
  To: David Bremner, notmuch

These tests will need to be updated if the Xapian
query print/debug format changes.
---
 lib/database.cc          | 37 +++++++++++++++++++++++++++++++++++++
 lib/notmuch-private.h    |  2 ++
 test/T750-user-header.sh | 21 +++++++++++++++++++++
 3 files changed, 60 insertions(+)

diff --git a/lib/database.cc b/lib/database.cc
index d2732f5e..a3a17b68 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -322,6 +322,42 @@ _setup_query_field_default (const prefix_t *prefix, notmuch_database_t *notmuch)
 	notmuch->query_parser->add_boolean_prefix (prefix->name, prefix->prefix);
 }
 
+const char *
+_user_prefix (void *ctx, const char* name)
+{
+    return talloc_asprintf(ctx, "XU%s:", name);
+}
+
+static notmuch_status_t
+_setup_user_query_fields (notmuch_database_t *notmuch)
+{
+    notmuch_config_list_t *list;
+    notmuch_status_t status;
+
+    status = notmuch_database_get_config_list (notmuch, CONFIG_HEADER_PREFIX, &list);
+    if (status)
+	return status;
+
+    for (; notmuch_config_list_valid (list); notmuch_config_list_move_to_next (list)) {
+
+	prefix_t query_field;
+
+	const char *key = notmuch_config_list_key (list)
+	    + sizeof (CONFIG_HEADER_PREFIX) - 1;
+
+	query_field.name = talloc_strdup (notmuch, key);
+	query_field.prefix = _user_prefix (notmuch, key);
+	query_field.flags = NOTMUCH_FIELD_PROBABILISTIC
+	    | NOTMUCH_FIELD_EXTERNAL;
+
+	_setup_query_field_default (&query_field, notmuch);
+    }
+
+    notmuch_config_list_destroy (list);
+
+    return NOTMUCH_STATUS_SUCCESS;
+}
+
 #if HAVE_XAPIAN_FIELD_PROCESSOR
 static void
 _setup_query_field (const prefix_t *prefix, notmuch_database_t *notmuch)
@@ -986,6 +1022,7 @@ notmuch_database_open_verbose (const char *path,
 		_setup_query_field (prefix, notmuch);
 	    }
 	}
+	status = _setup_user_query_fields (notmuch);
     } catch (const Xapian::Error &error) {
 	IGNORE_RESULT (asprintf (&message, "A Xapian exception occurred opening database: %s\n",
 				 error.get_msg().c_str()));
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index df32d39c..39d11a91 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -676,6 +676,8 @@ struct _notmuch_indexopts {
     _notmuch_crypto_t crypto;
 };
 
+#define CONFIG_HEADER_PREFIX "index.header."
+
 NOTMUCH_END_DECLS
 
 #ifdef __cplusplus
diff --git a/test/T750-user-header.sh b/test/T750-user-header.sh
index b97b00b6..2d6cc60b 100755
--- a/test/T750-user-header.sh
+++ b/test/T750-user-header.sh
@@ -70,4 +70,25 @@ index.header.Spam=X-Spam
 EOF
 test_expect_equal_file EXPECTED OUTPUT
 
+test_begin_subtest "parse user prefix"
+NOTMUCH_DEBUG_QUERY=t notmuch count 'List:"notmuch"' 2>&1 | grep Tmail >OUTPUT
+cat <<EOF > EXPECTED
+Query((Tmail AND XUList:notmuch@1))
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "parse user prefix, stemmed"
+NOTMUCH_DEBUG_QUERY=t notmuch count 'List:notmuch' 2>&1 | grep Tmail >OUTPUT
+cat <<EOF > EXPECTED
+Query((Tmail AND ZXUList:notmuch@1))
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "parse user prefix, phrase"
+NOTMUCH_DEBUG_QUERY=t notmuch count 'List:notmuchmail.org' 2>&1 | grep Tmail >OUTPUT
+cat <<EOF > EXPECTED
+Query((Tmail AND (XUList:notmuchmail@1 PHRASE 2 XUList:org@2)))
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
 test_done
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 6/9] lib: cache user prefixes in database object
  2019-04-28 23:10 ` Index user defined headers v2 David Bremner
                     ` (4 preceding siblings ...)
  2019-04-28 23:10   ` [PATCH 5/9] lib: setup user headers in query parser David Bremner
@ 2019-04-28 23:10   ` David Bremner
  2019-04-28 23:10   ` [PATCH 7/9] lib: support user prefix names in term generation David Bremner
                     ` (2 subsequent siblings)
  8 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2019-04-28 23:10 UTC (permalink / raw)
  To: David Bremner, notmuch

This will be used to avoid needing a database access to resolve a db
prefix from the corresponding UI prefix (e.g. when indexing). Arguably
the setup of the seperate header map does not belong here, since it is
about indexing rather than querying, but we currently don't have any
other indexing setup to do.
---
 lib/database-private.h |  5 +++++
 lib/database.cc        | 16 ++++++++++++++++
 lib/notmuch-private.h  |  7 +++++++
 lib/thread.cc          |  2 --
 4 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/lib/database-private.h b/lib/database-private.h
index 293f2db4..9d1dabf1 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -221,6 +221,11 @@ struct _notmuch_database {
     Xapian::ValueRangeProcessor *value_range_processor;
     Xapian::ValueRangeProcessor *date_range_processor;
     Xapian::ValueRangeProcessor *last_mod_range_processor;
+
+    /* XXX it's slightly gross to use two parallel string->string maps
+     * here, but at least they are small */
+    notmuch_string_map_t *user_prefix;
+    notmuch_string_map_t *user_header;
 };
 
 /* Prior to database version 3, features were implied by the database
diff --git a/lib/database.cc b/lib/database.cc
index a3a17b68..80235344 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -334,6 +334,14 @@ _setup_user_query_fields (notmuch_database_t *notmuch)
     notmuch_config_list_t *list;
     notmuch_status_t status;
 
+    notmuch->user_prefix = _notmuch_string_map_create (notmuch);
+    if (notmuch->user_prefix == NULL)
+	return NOTMUCH_STATUS_OUT_OF_MEMORY;
+
+    notmuch->user_header = _notmuch_string_map_create (notmuch);
+    if (notmuch->user_header == NULL)
+	return NOTMUCH_STATUS_OUT_OF_MEMORY;
+
     status = notmuch_database_get_config_list (notmuch, CONFIG_HEADER_PREFIX, &list);
     if (status)
 	return status;
@@ -345,6 +353,14 @@ _setup_user_query_fields (notmuch_database_t *notmuch)
 	const char *key = notmuch_config_list_key (list)
 	    + sizeof (CONFIG_HEADER_PREFIX) - 1;
 
+	_notmuch_string_map_append (notmuch->user_prefix,
+				    key,
+				    _user_prefix (notmuch, key));
+
+	_notmuch_string_map_append (notmuch->user_header,
+				    key,
+				    notmuch_config_list_value (list));
+
 	query_field.name = talloc_strdup (notmuch, key);
 	query_field.prefix = _user_prefix (notmuch, key);
 	query_field.flags = NOTMUCH_FIELD_PROBABILISTIC
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 39d11a91..1ef26e37 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -181,6 +181,11 @@ typedef struct _notmuch_doc_id_set notmuch_doc_id_set_t;
 const char *
 _find_prefix (const char *name);
 
+/* Lookup a prefix value by name, including possibly user defined prefixes
+ */
+const char *
+_notmuch_database_prefix (notmuch_database_t  *notmuch, const char *name);
+
 char *
 _notmuch_message_id_compressed (void *ctx, const char *message_id);
 
@@ -678,6 +683,8 @@ struct _notmuch_indexopts {
 
 #define CONFIG_HEADER_PREFIX "index.header."
 
+#define EMPTY_STRING(s) ((s)[0] == '\0')
+
 NOTMUCH_END_DECLS
 
 #ifdef __cplusplus
diff --git a/lib/thread.cc b/lib/thread.cc
index 47c90664..ae830064 100644
--- a/lib/thread.cc
+++ b/lib/thread.cc
@@ -30,8 +30,6 @@
 #define THREAD_DEBUG(format, ...) do {} while (0) /* ignored */
 #endif
 
-#define EMPTY_STRING(s) ((s)[0] == '\0')
-
 struct _notmuch_thread {
     notmuch_database_t *notmuch;
     char *thread_id;
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 7/9] lib: support user prefix names in term generation
  2019-04-28 23:10 ` Index user defined headers v2 David Bremner
                     ` (5 preceding siblings ...)
  2019-04-28 23:10   ` [PATCH 6/9] lib: cache user prefixes in database object David Bremner
@ 2019-04-28 23:10   ` David Bremner
  2019-04-28 23:10   ` [PATCH 8/9] lib/database: index user headers David Bremner
  2019-04-28 23:10   ` [PATCH 9/9] doc: document user header indexing David Bremner
  8 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2019-04-28 23:10 UTC (permalink / raw)
  To: David Bremner, notmuch

This should not change the indexing process yet as nothing calls
_notmuch_message_gen_terms with a user prefix name. On the other hand,
it should not break anything either.
---
 lib/database.cc       | 20 ++++++++++++++++++++
 lib/message.cc        |  6 +++++-
 lib/notmuch-private.h |  1 +
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/lib/database.cc b/lib/database.cc
index 80235344..a2a33d24 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -422,6 +422,26 @@ _find_prefix (const char *name)
     return "";
 }
 
+/* Like find prefix, but include the possibility of user defined
+ * prefixes specific to this database */
+
+const char *
+_notmuch_database_prefix (notmuch_database_t *notmuch, const char *name)
+{
+    unsigned int i;
+
+    /*XXX TODO: reduce code duplication */
+    for (i = 0; i < ARRAY_SIZE (prefix_table); i++) {
+	if (strcmp (name, prefix_table[i].name) == 0)
+	    return prefix_table[i].prefix;
+    }
+
+    if (notmuch->user_prefix)
+	return _notmuch_string_map_get (notmuch->user_prefix, name);
+
+    return NULL;
+}
+
 static const struct {
     /* NOTMUCH_FEATURE_* value. */
     _notmuch_features value;
diff --git a/lib/message.cc b/lib/message.cc
index 38a48933..a4c2a575 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -1436,8 +1436,12 @@ _notmuch_message_gen_terms (notmuch_message_t *message,
     term_gen->set_termpos (message->termpos);
 
     if (prefix_name) {
+	const char *prefix = _notmuch_database_prefix (message->notmuch, prefix_name);
+	if (prefix == NULL)
+	    return NOTMUCH_PRIVATE_STATUS_BAD_PREFIX;
+
 	_notmuch_message_invalidate_metadata (message, prefix_name);
-	term_gen->index_text (text, 1, _find_prefix (prefix_name));
+	term_gen->index_text (text, 1, prefix);
     } else {
 	term_gen->index_text (text);
     }
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 1ef26e37..cf08411e 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -136,6 +136,7 @@ typedef enum _notmuch_private_status {
     /* Then add our own private values. */
     NOTMUCH_PRIVATE_STATUS_TERM_TOO_LONG = NOTMUCH_STATUS_LAST_STATUS,
     NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND,
+    NOTMUCH_PRIVATE_STATUS_BAD_PREFIX,
 
     NOTMUCH_PRIVATE_STATUS_LAST_STATUS
 } notmuch_private_status_t;
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 8/9] lib/database: index user headers.
  2019-04-28 23:10 ` Index user defined headers v2 David Bremner
                     ` (6 preceding siblings ...)
  2019-04-28 23:10   ` [PATCH 7/9] lib: support user prefix names in term generation David Bremner
@ 2019-04-28 23:10   ` David Bremner
  2019-04-28 23:10   ` [PATCH 9/9] doc: document user header indexing David Bremner
  8 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2019-04-28 23:10 UTC (permalink / raw)
  To: David Bremner, notmuch

There is a O(log(#user headers)) penalty for the second lookup of the
prefix name.
---
 lib/database.cc          |  6 ++++++
 lib/index.cc             | 27 +++++++++++++++++++++++++++
 lib/notmuch-private.h    |  5 +++++
 test/T750-user-header.sh | 17 +++++++++++++++++
 4 files changed, 55 insertions(+)

diff --git a/lib/database.cc b/lib/database.cc
index a2a33d24..ff1ca2b3 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -322,6 +322,12 @@ _setup_query_field_default (const prefix_t *prefix, notmuch_database_t *notmuch)
 	notmuch->query_parser->add_boolean_prefix (prefix->name, prefix->prefix);
 }
 
+notmuch_string_map_iterator_t *
+_notmuch_database_user_headers (notmuch_database_t *notmuch)
+{
+    return _notmuch_string_map_iterator_create (notmuch->user_header, "", false);
+}
+
 const char *
 _user_prefix (void *ctx, const char* name)
 {
diff --git a/lib/index.cc b/lib/index.cc
index efd9da4c..c2c31e4a 100644
--- a/lib/index.cc
+++ b/lib/index.cc
@@ -595,6 +595,31 @@ _index_encrypted_mime_part (notmuch_message_t *message,
 
 }
 
+static notmuch_status_t
+_notmuch_message_index_user_headers (notmuch_message_t *message, GMimeMessage *mime_message)
+{
+
+    notmuch_database_t *notmuch = notmuch_message_get_database (message);
+    notmuch_string_map_iterator_t *iter = _notmuch_database_user_headers (notmuch);
+
+    for (; _notmuch_string_map_iterator_valid (iter);
+	 _notmuch_string_map_iterator_move_to_next (iter)) {
+
+	const char *prefix_name = _notmuch_string_map_iterator_key (iter);
+
+	const char *header_name = _notmuch_string_map_iterator_value (iter);
+
+	const char *header = g_mime_object_get_header (GMIME_OBJECT (mime_message), header_name);
+	if (header)
+	    _notmuch_message_gen_terms (message, prefix_name, header);
+    }
+
+    if (iter)
+	_notmuch_string_map_iterator_destroy (iter);
+    return NOTMUCH_STATUS_SUCCESS;
+
+}
+
 notmuch_status_t
 _notmuch_message_index_file (notmuch_message_t *message,
 			     notmuch_indexopts_t *indexopts,
@@ -625,6 +650,8 @@ _notmuch_message_index_file (notmuch_message_t *message,
     subject = g_mime_message_get_subject (mime_message);
     _notmuch_message_gen_terms (message, "subject", subject);
 
+    status = _notmuch_message_index_user_headers (message, mime_message);
+
     _index_mime_part (message, indexopts, g_mime_message_get_mime_part (mime_message));
 
     return NOTMUCH_STATUS_SUCCESS;
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index cf08411e..e46df9a8 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -652,6 +652,11 @@ _notmuch_string_map_iterator_value (notmuch_string_map_iterator_t *iterator);
 void
 _notmuch_string_map_iterator_destroy (notmuch_string_map_iterator_t *iterator);
 
+/* Create an iterator for user headers. Destroy with
+ * _notmuch_string_map_iterator_destroy. Actually in database.cc*/
+notmuch_string_map_iterator_t *
+_notmuch_database_user_headers (notmuch_database_t *notmuch);
+
 /* tags.c */
 
 notmuch_tags_t *
diff --git a/test/T750-user-header.sh b/test/T750-user-header.sh
index 2d6cc60b..204c052a 100755
--- a/test/T750-user-header.sh
+++ b/test/T750-user-header.sh
@@ -91,4 +91,21 @@ Query((Tmail AND (XUList:notmuchmail@1 PHRASE 2 XUList:org@2)))
 EOF
 test_expect_equal_file EXPECTED OUTPUT
 
+test_begin_subtest "index user header"
+notmuch config set index.header.List "List-Id"
+notmuch reindex '*'
+notmuch search --output=files List:notmuch | notmuch_search_files_sanitize | sort > OUTPUT
+cat <<EOF > EXPECTED
+MAIL_DIR/bar/baz/05:2,
+MAIL_DIR/bar/baz/23:2,
+MAIL_DIR/bar/baz/24:2,
+MAIL_DIR/bar/cur/20:2,
+MAIL_DIR/bar/new/21:2,
+MAIL_DIR/bar/new/22:2,
+MAIL_DIR/foo/cur/08:2,
+MAIL_DIR/foo/new/03:2,
+MAIL_DIR/new/04:2,
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
 test_done
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 9/9] doc: document user header indexing.
  2019-04-28 23:10 ` Index user defined headers v2 David Bremner
                     ` (7 preceding siblings ...)
  2019-04-28 23:10   ` [PATCH 8/9] lib/database: index user headers David Bremner
@ 2019-04-28 23:10   ` David Bremner
  8 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2019-04-28 23:10 UTC (permalink / raw)
  To: David Bremner, notmuch

It's a bit odd that the primary documentation is in notmuch-config,
but it is consistent with the "query:" prefix.
---
 doc/man1/notmuch-config.rst       | 9 +++++++++
 doc/man7/notmuch-search-terms.rst | 3 +++
 2 files changed, 12 insertions(+)

diff --git a/doc/man1/notmuch-config.rst b/doc/man1/notmuch-config.rst
index 89909808..28487079 100644
--- a/doc/man1/notmuch-config.rst
+++ b/doc/man1/notmuch-config.rst
@@ -195,6 +195,15 @@ The available configuration items are described below.
 
     Default: ``auto``.
 
+**index.header.<prefix>** **[STORED IN DATABASE]**
+    Define the query prefix <prefix>, based on a mail header. For
+    example ``index.header.List=List-Id`` will add a probabilistic
+    prefix ``List:`` that searches the ``List-Id`` field.  User
+    defined prefixes must not start with 'a'...'z'; in particular
+    adding a prefix with same name as a predefined prefix is not
+    supported. See **notmuch-search-terms(7)** for a list of existing
+    prefixes, and an explanation of probabilistic prefixes.
+
 **built_with.<name>**
     Compile time feature <name>. Current possibilities include
     "compact" (see **notmuch-compact(1)**) and "field_processor" (see
diff --git a/doc/man7/notmuch-search-terms.rst b/doc/man7/notmuch-search-terms.rst
index fd8bf634..1dd2dc58 100644
--- a/doc/man7/notmuch-search-terms.rst
+++ b/doc/man7/notmuch-search-terms.rst
@@ -169,6 +169,9 @@ property:<key>=<value>
     can be present on a given message with several different values.
     See **notmuch-properties(7)** for more details.
 
+User defined prefixes are also supported, see **notmuch-config(1)** for
+details.
+
 Operators
 ---------
 
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: Index user defined headers
  2019-04-26 11:15 ` Index user defined headers David Bremner
@ 2019-05-25 10:38   ` David Bremner
  0 siblings, 0 replies; 22+ messages in thread
From: David Bremner @ 2019-05-25 10:38 UTC (permalink / raw)
  To: notmuch

David Bremner <david@tethera.net> writes:

> David Bremner <david@tethera.net> writes:
>
>> This obsoletes [1]. Compared to the previous version the main change
>> is that it imposes the restriction that user defined prefixes may not
>> start with [a-z], and must consist of "unicode word characters". This
>> assumes a utf8 input encoding. People that don't like utf8 are welcome
>> to use ASCII :P
>>
>> [1]: id:20190302154133.25642-1-david@tethera.net
>> [2]: https://salsa.debian.org/bremner/notmuch/commits/wip/user-headers
>
> Last call for review or other feedback. I'll merge this in the next week
> if I don't hear anything.
>
> d

Next week turned into next month, but these this series is merged to
master now.

d

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2019-05-25 10:37 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-27 11:16 Index user defined headers David Bremner
2019-03-27 11:16 ` [PATCH 1/9] util: add unicode_word_utf8 David Bremner
2019-03-27 11:16 ` [PATCH 2/9] cli/config: refactor _stored_in_db David Bremner
2019-03-27 11:16 ` [PATCH 3/9] cli/config: support user header index config David Bremner
2019-03-27 11:16 ` [PATCH 4/9] cli/config: check syntax of user configured field names David Bremner
2019-03-27 11:16 ` [PATCH 5/9] lib: setup user headers in query parser David Bremner
2019-03-27 11:16 ` [PATCH 6/9] lib: cache user prefixes in database object David Bremner
2019-03-27 11:16 ` [PATCH 7/9] lib: support user prefix names in term generation David Bremner
2019-03-27 11:16 ` [PATCH 8/9] lib/database: index user headers David Bremner
2019-03-27 11:16 ` [PATCH 9/9] doc: document user header indexing David Bremner
2019-04-26 11:15 ` Index user defined headers David Bremner
2019-05-25 10:38   ` David Bremner
2019-04-28 23:10 ` Index user defined headers v2 David Bremner
2019-04-28 23:10   ` [PATCH 1/9] util: add unicode_word_utf8 David Bremner
2019-04-28 23:10   ` [PATCH 2/9] cli/config: refactor _stored_in_db David Bremner
2019-04-28 23:10   ` [PATCH 3/9] cli/config: support user header index config David Bremner
2019-04-28 23:10   ` [PATCH 4/9] cli/config: check syntax of user configured field names David Bremner
2019-04-28 23:10   ` [PATCH 5/9] lib: setup user headers in query parser David Bremner
2019-04-28 23:10   ` [PATCH 6/9] lib: cache user prefixes in database object David Bremner
2019-04-28 23:10   ` [PATCH 7/9] lib: support user prefix names in term generation David Bremner
2019-04-28 23:10   ` [PATCH 8/9] lib/database: index user headers David Bremner
2019-04-28 23:10   ` [PATCH 9/9] doc: document user header indexing David Bremner

Code repositories for project(s) associated with this public inbox

	https://yhetil.org/notmuch.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).