unofficial mirror of notmuch@notmuchmail.org
 help / color / mirror / code / Atom feed
* v3 of boolean folder: patches
@ 2014-03-08 21:19 David Bremner
  2014-03-08 21:19 ` [Patch v3 01/15] lib: refactor folder term update after filename removal David Bremner
                   ` (13 more replies)
  0 siblings, 14 replies; 23+ messages in thread
From: David Bremner @ 2014-03-08 21:19 UTC (permalink / raw)
  To: notmuch

This is a followup to 

     id:cover.1393105055.git.jani@nikula.org

The first 11 patches are very close to that series; in the last 4 I
have added some minimalist infrastructure to download and verify
pre-built test databases.

I decided that the benefits of being to really apply and test the
patch series outweighed the fact that one of patchs is about
150K. Apologies to those of you on GSM modems and the like.

Interdiff follows 

diff --git a/Makefile b/Makefile
index 0428160..97084b1 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,8 @@ all:
 # List all subdirectories here. Each contains its own Makefile.local.
 # Use of '=', without '+=', seems to be required for out-of-tree
 # builds to work.
-subdirs = compat completion emacs lib man parse-time-string performance-test util test
+subdirs = compat completion emacs lib man parse-time-string performance-test util test \
+	test/test-databases
 
 # We make all targets depend on the Makefiles themselves.
 global_deps = Makefile Makefile.config Makefile.local \
diff --git a/devel/gen-testdb.sh b/devel/gen-testdb.sh
index c291dff..621b31e 100755
--- a/devel/gen-testdb.sh
+++ b/devel/gen-testdb.sh
@@ -81,6 +81,10 @@ shift `expr $OPTIND - 1`
 
 . ./test-lib.sh
 
+SHORT_CORPUS=$(basename ${CORPUS:-database})
+DBNAME=${SHORT_CORPUS}${SUFFIX}
+TARBALLNAME=${DBNAME}.tar.xz
+
 CORPUS=${CORPUS:-${TEST_DIRECTORY}/corpus}
 
 test_expect_code 0 "notmuch version specified on the command line" \
@@ -112,13 +116,16 @@ cp -a ${CORPUS} ${MAIL_DIR}
 test_expect_code 0 "index the corpus" \
     "notmuch new"
 
-# finally, wrap the resulting mail store and database in a tarball
-DBNAME=database${SUFFIX}
+# wrap the resulting mail store and database in a tarball
+
 cp -a ${MAIL_DIR} ${TMP_DIRECTORY}/${DBNAME}
-tar zcf ${TMP_DIRECTORY}/${DBNAME}.tar.gz -C ${TMP_DIRECTORY} ${DBNAME}
+tar Jcf ${TMP_DIRECTORY}/${TARBALLNAME} -C ${TMP_DIRECTORY} ${DBNAME}
 mkdir -p  ${TEST_DIRECTORY}/test-databases
-cp -a ${TMP_DIRECTORY}/${DBNAME}.tar.gz ${TEST_DIRECTORY}/test-databases
-test_expect_code 0 "create the output tarball ${DBNAME}.tar.gz" \
-    "test -f ${TEST_DIRECTORY}/test-databases/${DBNAME}.tar.gz"
+cp -a ${TMP_DIRECTORY}/${TARBALLNAME} ${TEST_DIRECTORY}/test-databases
+test_expect_code 0 "create the output tarball ${TARBALLNAME}" \
+    "test -f ${TEST_DIRECTORY}/test-databases/${TARBALLNAME}"
 
+# generate a checksum file
+test_expect_code 0 "compute checksum" \
+    "(cd ${TEST_DIRECTORY}/test-databases/ && sha256sum ${TARBALLNAME} > ${TARBALLNAME}.sha256)"
 test_done
diff --git a/test/Makefile.local b/test/Makefile.local
index 99324ba..bfabd15 100644
--- a/test/Makefile.local
+++ b/test/Makefile.local
@@ -2,6 +2,8 @@
 
 dir := test
 
+# save against changes in $(dir)
+test_src_dir := $(dir)
 extra_cflags += -I.
 
 smtp_dummy_srcs =		\
@@ -44,12 +46,12 @@ TEST_BINARIES=$(dir)/arg-test \
 
 test-binaries: $(TEST_BINARIES)
 
-test:	all test-binaries
-	@${dir}/notmuch-test $(OPTIONS)
+test:	all test-binaries fetch-test-databases
+	@${test_src_dir}/notmuch-test $(OPTIONS)
 
 check: test
 
 SRCS := $(SRCS) $(smtp_dummy_srcs)
 CLEAN += $(TEST_BINARIES) $(addsuffix .o,$(TEST_BINARIES)) \
 	 $(dir)/database-test.o \
-	 $(dir)/corpus.mail $(dir)/test-results $(dir)/tmp.*
+	 $(dir)/corpus.mail.* $(dir)/test-results $(dir)/tmp.*
diff --git a/test/README b/test/README
index 79a9b1b..81a1c82 100644
--- a/test/README
+++ b/test/README
@@ -64,6 +64,14 @@ The following command-line options are available when running tests:
 	Pointing this argument at a tmpfs filesystem can improve the
 	speed of the test suite for some users.
 
+Certain tests require precomputed databases to complete. You can fetch these
+databases with
+
+	make download-test-databases
+
+If you do not download the test databases, the relevant tests will be
+skipped.
+
 When invoking the test suite via "make test" any of the above options
 can be specified as follows:
 
diff --git a/test/T530-upgrade.sh b/test/T530-upgrade.sh
index cf9914e..e1e8ac5 100755
--- a/test/T530-upgrade.sh
+++ b/test/T530-upgrade.sh
@@ -3,7 +3,19 @@ test_description="database upgrade"
 
 . ./test-lib.sh
 
-tar zxf $TEST_DIRECTORY/test-databases/database-v1.tar.gz -C ${MAIL_DIR} --strip-components=1
+dbtarball=folders-v1.tar.xz
+
+# XXX: Accomplish the same with test lib helpers
+if [ ! -e ${TEST_DIRECTORY}/test-databases/${dbtarball} ]; then
+    test_subtest_missing_external_prereq_["${dbtarball} - fetch with 'make download-test-databases'"]=t
+fi
+
+test_expect_success \
+    'database checksum' \
+    '( cd $TEST_DIRECTORY/test-databases &&
+       sha256sum --quiet --check --status ${dbtarball}.sha256 )'
+
+tar xf $TEST_DIRECTORY/test-databases/${dbtarball} -C ${MAIL_DIR} --strip-components=1
 
 test_begin_subtest "folder: search does not work with old database version"
 output=$(notmuch search folder:foo)
diff --git a/test/test-databases/.gitignore b/test/test-databases/.gitignore
new file mode 100644
index 0000000..b5624b7
--- /dev/null
+++ b/test/test-databases/.gitignore
@@ -0,0 +1 @@
+*.tar.xz
diff --git a/test/test-databases/Makefile b/test/test-databases/Makefile
new file mode 100644
index 0000000..b250a8b
--- /dev/null
+++ b/test/test-databases/Makefile
@@ -0,0 +1,7 @@
+# See Makefile.local for the list of files to be compiled in this
+# directory.
+all:
+	$(MAKE) -C ../.. all
+
+.DEFAULT:
+	$(MAKE) -C ../.. $@
diff --git a/test/test-databases/Makefile.local b/test/test-databases/Makefile.local
new file mode 100644
index 0000000..e777ada
--- /dev/null
+++ b/test/test-databases/Makefile.local
@@ -0,0 +1,14 @@
+# -*- makefile -*-
+
+TEST_DATABASE_MIRROR=http://notmuchmail.org/releases/test-databases
+
+dir := test/test-databases
+
+test_databases := $(dir)/folders-v1.tar.xz
+
+%.tar.xz:
+	wget -nv -O $@ ${TEST_DATABASE_MIRROR}/$(notdir $@);
+
+download-test-databases: ${test_databases}
+
+DISTCLEAN := $(DISTCLEAN) ${test_databases}
diff --git a/test/test-databases/README b/test/test-databases/README
deleted file mode 100644
index af5defe..0000000
--- a/test/test-databases/README
+++ /dev/null
@@ -1,5 +0,0 @@
-Notmuch test databases
-======================
-
-This directory contains pre-generated databases with their source
-corpus, chiefly for the purpose of testing database upgrade.
diff --git a/test/test-databases/database-v1.tar.gz b/test/test-databases/database-v1.tar.gz
deleted file mode 100644
index bb4df4d..0000000
Binary files a/test/test-databases/database-v1.tar.gz and /dev/null differ
diff --git a/test/test-databases/folders-v1.tar.xz.sha256 b/test/test-databases/folders-v1.tar.xz.sha256
new file mode 100644
index 0000000..01ad79d
--- /dev/null
+++ b/test/test-databases/folders-v1.tar.xz.sha256
@@ -0,0 +1 @@
+ace8a61216756b90a421e23d03910e1228bcb910e197c35e51e29f2cf57b37d9  folders-v1.tar.xz

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Patch v3 01/15] lib: refactor folder term update after filename removal
  2014-03-08 21:19 v3 of boolean folder: patches David Bremner
@ 2014-03-08 21:19 ` David Bremner
  2014-03-08 21:19 ` [Patch v3 02/15] lib: add support for path: prefix searches David Bremner
                   ` (12 subsequent siblings)
  13 siblings, 0 replies; 23+ messages in thread
From: David Bremner @ 2014-03-08 21:19 UTC (permalink / raw)
  To: notmuch

From: Jani Nikula <jani@nikula.org>

Abstract some blocks of code for reuse. No functional changes.
---
 lib/message.cc | 135 ++++++++++++++++++++++++++++-----------------------------
 1 file changed, 66 insertions(+), 69 deletions(-)

diff --git a/lib/message.cc b/lib/message.cc
index c91f3a5..7aff4ae 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -481,6 +481,68 @@ notmuch_message_get_replies (notmuch_message_t *message)
     return _notmuch_messages_create (message->replies);
 }
 
+static void
+_notmuch_message_remove_terms (notmuch_message_t *message, const char *prefix)
+{
+    Xapian::TermIterator i;
+    size_t prefix_len = strlen (prefix);
+
+    while (1) {
+	i = message->doc.termlist_begin ();
+	i.skip_to (prefix);
+
+	/* Terminate loop when no terms remain with desired prefix. */
+	if (i == message->doc.termlist_end () ||
+	    strncmp ((*i).c_str (), prefix, prefix_len))
+	    break;
+
+	try {
+	    message->doc.remove_term ((*i));
+	} catch (const Xapian::InvalidArgumentError) {
+	    /* Ignore failure to remove non-existent term. */
+	}
+    }
+}
+
+/* Add directory based terms for all filenames of the message. */
+static notmuch_status_t
+_notmuch_message_add_directory_terms (void *ctx, notmuch_message_t *message)
+{
+    const char *direntry_prefix = _find_prefix ("file-direntry");
+    int direntry_prefix_len = strlen (direntry_prefix);
+    Xapian::TermIterator i = message->doc.termlist_begin ();
+    notmuch_status_t status = NOTMUCH_STATUS_SUCCESS;
+
+    for (i.skip_to (direntry_prefix); i != message->doc.termlist_end (); i++) {
+	unsigned int directory_id;
+	const char *direntry, *directory;
+	char *colon;
+
+	/* Terminate loop at first term without desired prefix. */
+	if (strncmp ((*i).c_str (), direntry_prefix, direntry_prefix_len))
+	    break;
+
+	/* Indicate that there are filenames remaining. */
+	status = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID;
+
+	direntry = (*i).c_str ();
+	direntry += direntry_prefix_len;
+
+	directory_id = strtol (direntry, &colon, 10);
+
+	if (colon == NULL || *colon != ':')
+	    INTERNAL_ERROR ("malformed direntry");
+
+	directory = _notmuch_database_get_directory_path (ctx,
+							  message->notmuch,
+							  directory_id);
+	if (strlen (directory))
+	    _notmuch_message_gen_terms (message, "folder", directory);
+    }
+
+    return status;
+}
+
 /* Add an additional 'filename' for 'message'.
  *
  * This change will not be reflected in the database until the next
@@ -536,17 +598,12 @@ notmuch_status_t
 _notmuch_message_remove_filename (notmuch_message_t *message,
 				  const char *filename)
 {
-    const char *direntry_prefix = _find_prefix ("file-direntry");
-    int direntry_prefix_len = strlen (direntry_prefix);
-    const char *folder_prefix = _find_prefix ("folder");
-    int folder_prefix_len = strlen (folder_prefix);
     void *local = talloc_new (message);
+    const char *folder_prefix = _find_prefix ("folder");
     char *zfolder_prefix = talloc_asprintf(local, "Z%s", folder_prefix);
-    int zfolder_prefix_len = strlen (zfolder_prefix);
     char *direntry;
     notmuch_private_status_t private_status;
     notmuch_status_t status;
-    Xapian::TermIterator i, last;
 
     status = _notmuch_database_filename_to_direntry (
 	local, message->notmuch, filename, NOTMUCH_FIND_LOOKUP, &direntry);
@@ -567,73 +624,13 @@ _notmuch_message_remove_filename (notmuch_message_t *message,
      *  3. adding back terms for all remaining filenames of the message. */
 
     /* 1. removing all "folder:" terms */
-    while (1) {
-	i = message->doc.termlist_begin ();
-	i.skip_to (folder_prefix);
-
-	/* Terminate loop when no terms remain with desired prefix. */
-	if (i == message->doc.termlist_end () ||
-	    strncmp ((*i).c_str (), folder_prefix, folder_prefix_len))
-	{
-	    break;
-	}
-
-	try {
-	    message->doc.remove_term ((*i));
-	} catch (const Xapian::InvalidArgumentError) {
-	    /* Ignore failure to remove non-existent term. */
-	}
-    }
+    _notmuch_message_remove_terms (message, folder_prefix);
 
     /* 2. removing all "folder:" stemmed terms */
-    while (1) {
-	i = message->doc.termlist_begin ();
-	i.skip_to (zfolder_prefix);
-
-	/* Terminate loop when no terms remain with desired prefix. */
-	if (i == message->doc.termlist_end () ||
-	    strncmp ((*i).c_str (), zfolder_prefix, zfolder_prefix_len))
-	{
-	    break;
-	}
-
-	try {
-	    message->doc.remove_term ((*i));
-	} catch (const Xapian::InvalidArgumentError) {
-	    /* Ignore failure to remove non-existent term. */
-	}
-    }
+    _notmuch_message_remove_terms (message, zfolder_prefix);
 
     /* 3. adding back terms for all remaining filenames of the message. */
-    i = message->doc.termlist_begin ();
-    i.skip_to (direntry_prefix);
-
-    for (; i != message->doc.termlist_end (); i++) {
-	unsigned int directory_id;
-	const char *direntry, *directory;
-	char *colon;
-
-	/* Terminate loop at first term without desired prefix. */
-	if (strncmp ((*i).c_str (), direntry_prefix, direntry_prefix_len))
-	    break;
-
-	/* Indicate that there are filenames remaining. */
-	status = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID;
-
-	direntry = (*i).c_str ();
-	direntry += direntry_prefix_len;
-
-	directory_id = strtol (direntry, &colon, 10);
-
-	if (colon == NULL || *colon != ':')
-	    INTERNAL_ERROR ("malformed direntry");
-
-	directory = _notmuch_database_get_directory_path (local,
-							  message->notmuch,
-							  directory_id);
-	if (strlen (directory))
-	    _notmuch_message_gen_terms (message, "folder", directory);
-    }
+    status = _notmuch_message_add_directory_terms (local, message);
 
     talloc_free (local);
 
-- 
1.8.5.3

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Patch v3 02/15] lib: add support for path: prefix searches
  2014-03-08 21:19 v3 of boolean folder: patches David Bremner
  2014-03-08 21:19 ` [Patch v3 01/15] lib: refactor folder term update after filename removal David Bremner
@ 2014-03-08 21:19 ` David Bremner
  2014-03-08 21:19 ` [Patch v3 03/15] test: make insert test use the path: prefix David Bremner
                   ` (11 subsequent siblings)
  13 siblings, 0 replies; 23+ messages in thread
From: David Bremner @ 2014-03-08 21:19 UTC (permalink / raw)
  To: notmuch

From: Jani Nikula <jani@nikula.org>

The path: prefix is a literal boolean prefix matching the paths,
relative from the maildir root, of the message files.

path:foo matches all message files in foo (but not in foo/new or
foo/cur).

path:foo/new matches all message files in foo/new.

path:"" matches all message files in the top level maildir.

path:foo/** matches all message files in foo and recursively in all
subdirectories of foo.

path:** matches all message files recursively, i.e. all messages.
---
 lib/database.cc |  7 ++++---
 lib/message.cc  | 52 +++++++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 49 insertions(+), 10 deletions(-)

diff --git a/lib/database.cc b/lib/database.cc
index f395061..93cc7f5 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -100,8 +100,8 @@ typedef struct {
  * In addition, terms from the content of the message are added with
  * "from", "to", "attachment", and "subject" prefixes for use by the
  * user in searching. Similarly, terms from the path of the mail
- * message are added with a "folder" prefix. But the database doesn't
- * really care itself about any of these.
+ * message are added with "folder" and "path" prefixes. But the
+ * database doesn't really care itself about any of these.
  *
  * The data portion of a mail document is empty.
  *
@@ -208,7 +208,8 @@ static prefix_t BOOLEAN_PREFIX_EXTERNAL[] = {
     { "thread",			"G" },
     { "tag",			"K" },
     { "is",			"K" },
-    { "id",			"Q" }
+    { "id",			"Q" },
+    { "path",			"P" },
 };
 
 static prefix_t PROBABILISTIC_PREFIX[]= {
diff --git a/lib/message.cc b/lib/message.cc
index 7aff4ae..21abe8e 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -504,6 +504,40 @@ _notmuch_message_remove_terms (notmuch_message_t *message, const char *prefix)
     }
 }
 
+#define RECURSIVE_SUFFIX "/**"
+
+/* Add "path:" terms for directory. */
+static notmuch_status_t
+_notmuch_message_add_path_terms (notmuch_message_t *message,
+				 const char *directory)
+{
+    /* Add exact "path:" term. */
+    _notmuch_message_add_term (message, "path", directory);
+
+    if (strlen (directory)) {
+	char *path, *p;
+
+	path = talloc_asprintf (NULL, "%s%s", directory, RECURSIVE_SUFFIX);
+	if (! path)
+	    return NOTMUCH_STATUS_OUT_OF_MEMORY;
+
+	/* Add recursive "path:" terms for directory and all parents. */
+	for (p = path + strlen (path) - 1; p > path; p--) {
+	    if (*p == '/') {
+		strcpy (p, RECURSIVE_SUFFIX);
+		_notmuch_message_add_term (message, "path", path);
+	    }
+	}
+
+	talloc_free (path);
+    }
+
+    /* Recursive all-matching path:** for consistency. */
+    _notmuch_message_add_term (message, "path", "**");
+
+    return NOTMUCH_STATUS_SUCCESS;
+}
+
 /* Add directory based terms for all filenames of the message. */
 static notmuch_status_t
 _notmuch_message_add_directory_terms (void *ctx, notmuch_message_t *message)
@@ -538,6 +572,8 @@ _notmuch_message_add_directory_terms (void *ctx, notmuch_message_t *message)
 							  directory_id);
 	if (strlen (directory))
 	    _notmuch_message_gen_terms (message, "folder", directory);
+
+	_notmuch_message_add_path_terms (message, directory);
     }
 
     return status;
@@ -577,6 +613,8 @@ _notmuch_message_add_filename (notmuch_message_t *message,
     /* New terms allow user to search with folder: specification. */
     _notmuch_message_gen_terms (message, "folder", directory);
 
+    _notmuch_message_add_path_terms (message, directory);
+
     talloc_free (local);
 
     return NOTMUCH_STATUS_SUCCESS;
@@ -618,18 +656,18 @@ _notmuch_message_remove_filename (notmuch_message_t *message,
     if (status)
 	return status;
 
-    /* Re-synchronize "folder:" terms for this message. This requires:
-     *  1. removing all "folder:" terms
-     *  2. removing all "folder:" stemmed terms
-     *  3. adding back terms for all remaining filenames of the message. */
+    /* Re-synchronize "folder:" and "path:" terms for this message. */
 
-    /* 1. removing all "folder:" terms */
+    /* Remove all "folder:" terms. */
     _notmuch_message_remove_terms (message, folder_prefix);
 
-    /* 2. removing all "folder:" stemmed terms */
+    /* Remove all "folder:" stemmed terms. */
     _notmuch_message_remove_terms (message, zfolder_prefix);
 
-    /* 3. adding back terms for all remaining filenames of the message. */
+    /* Remove all "path:" terms. */
+    _notmuch_message_remove_terms (message, _find_prefix ("path"));
+
+    /* Add back terms for all remaining filenames of the message. */
     status = _notmuch_message_add_directory_terms (local, message);
 
     talloc_free (local);
-- 
1.8.5.3

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Patch v3 03/15] test: make insert test use the path: prefix
  2014-03-08 21:19 v3 of boolean folder: patches David Bremner
  2014-03-08 21:19 ` [Patch v3 01/15] lib: refactor folder term update after filename removal David Bremner
  2014-03-08 21:19 ` [Patch v3 02/15] lib: add support for path: prefix searches David Bremner
@ 2014-03-08 21:19 ` David Bremner
  2014-03-08 21:19 ` [Patch v3 04/15] lib: make folder: prefix literal David Bremner
                   ` (10 subsequent siblings)
  13 siblings, 0 replies; 23+ messages in thread
From: David Bremner @ 2014-03-08 21:19 UTC (permalink / raw)
  To: notmuch

From: Jani Nikula <jani@nikula.org>

This is a more strict test for the insert test.
---
 test/T070-insert.sh | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/test/T070-insert.sh b/test/T070-insert.sh
index b77c5e1..ea9db07 100755
--- a/test/T070-insert.sh
+++ b/test/T070-insert.sh
@@ -126,14 +126,14 @@ test_expect_equal "$dirname" "$MAIL_DIR/new"
 test_begin_subtest "Insert message into folder"
 gen_insert_msg
 notmuch insert --folder=Drafts < "$gen_msg_filename"
-output=$(notmuch search --output=files folder:Drafts)
+output=$(notmuch search --output=files path:Drafts/new)
 dirname=$(dirname "$output")
 test_expect_equal "$dirname" "$MAIL_DIR/Drafts/new"
 
 test_begin_subtest "Insert message into folder, add/remove tags"
 gen_insert_msg
 notmuch insert --folder=Drafts +draft -unread < "$gen_msg_filename"
-output=$(notmuch search --output=messages folder:Drafts tag:draft NOT tag:unread)
+output=$(notmuch search --output=messages path:Drafts/cur tag:draft NOT tag:unread)
 test_expect_equal "$output" "id:$gen_msg_id"
 
 gen_insert_msg
@@ -143,21 +143,21 @@ test_expect_code 1 "Insert message into non-existent folder" \
 test_begin_subtest "Insert message, create folder"
 gen_insert_msg
 notmuch insert --folder=F --create-folder +folder < "$gen_msg_filename"
-output=$(notmuch search --output=files folder:F tag:folder)
+output=$(notmuch search --output=files path:F/new tag:folder)
 basename=$(basename "$output")
 test_expect_equal_file "$gen_msg_filename" "$MAIL_DIR/F/new/${basename}"
 
 test_begin_subtest "Insert message, create subfolder"
 gen_insert_msg
 notmuch insert --folder=F/G/H/I/J --create-folder +folder < "$gen_msg_filename"
-output=$(notmuch search --output=files folder:F/G/H/I/J tag:folder)
+output=$(notmuch search --output=files path:F/G/H/I/J/new tag:folder)
 basename=$(basename "$output")
 test_expect_equal_file "$gen_msg_filename" "${MAIL_DIR}/F/G/H/I/J/new/${basename}"
 
 test_begin_subtest "Insert message, create existing subfolder"
 gen_insert_msg
 notmuch insert --folder=F/G/H/I/J --create-folder +folder < "$gen_msg_filename"
-output=$(notmuch count folder:F/G/H/I/J tag:folder)
+output=$(notmuch count path:F/G/H/I/J/new tag:folder)
 test_expect_equal "$output" "2"
 
 gen_insert_msg
-- 
1.8.5.3

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Patch v3 04/15] lib: make folder: prefix literal
  2014-03-08 21:19 v3 of boolean folder: patches David Bremner
                   ` (2 preceding siblings ...)
  2014-03-08 21:19 ` [Patch v3 03/15] test: make insert test use the path: prefix David Bremner
@ 2014-03-08 21:19 ` David Bremner
  2014-03-08 23:51   ` Austin Clements
  2014-03-08 21:19 ` [Patch v3 05/15] test: fix test for literal folder: search David Bremner
                   ` (9 subsequent siblings)
  13 siblings, 1 reply; 23+ messages in thread
From: David Bremner @ 2014-03-08 21:19 UTC (permalink / raw)
  To: notmuch

From: Jani Nikula <jani@nikula.org>

In xapian terms, convert folder: prefix from probabilistic to boolean
prefix, matching the paths, relative form the maildir root, of the
message files, ignoring the maildir new and cur leaf directories.

folder:foo matches all message files in foo, foo/new, and foo/cur.

folder:foo/new does *not* match message files in foo/new.

folder:"" matches all message files in the top level maildir and its
new and cur subdirectories.

This change constitutes a database change: bump the database version
and add database upgrade support for folder: terms. The upgrade also
adds path: terms.
---
 lib/database.cc       | 38 ++++++++++++++++++++++--
 lib/message.cc        | 80 ++++++++++++++++++++++++++++++++++++++++++++-------
 lib/notmuch-private.h |  3 ++
 3 files changed, 108 insertions(+), 13 deletions(-)

diff --git a/lib/database.cc b/lib/database.cc
index 93cc7f5..186e3a7 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -42,7 +42,7 @@ typedef struct {
     const char *prefix;
 } prefix_t;
 
-#define NOTMUCH_DATABASE_VERSION 1
+#define NOTMUCH_DATABASE_VERSION 2
 
 #define STRINGIFY(s) _SUB_STRINGIFY(s)
 #define _SUB_STRINGIFY(s) #s
@@ -210,6 +210,7 @@ static prefix_t BOOLEAN_PREFIX_EXTERNAL[] = {
     { "is",			"K" },
     { "id",			"Q" },
     { "path",			"P" },
+    { "folder",			"XFOLDER:" },
 };
 
 static prefix_t PROBABILISTIC_PREFIX[]= {
@@ -217,7 +218,6 @@ static prefix_t PROBABILISTIC_PREFIX[]= {
     { "to",			"XTO" },
     { "attachment",		"XATTACHMENT" },
     { "subject",		"XSUBJECT"},
-    { "folder",			"XFOLDER"}
 };
 
 const char *
@@ -1168,6 +1168,40 @@ notmuch_database_upgrade (notmuch_database_t *notmuch,
 	}
     }
 
+    /*
+     * Prior to version 2, the "folder:" prefix was probabilistic and
+     * stemmed. Change it to the current boolean prefix. Add "path:"
+     * prefixes while at it.
+     */
+    if (version < 2) {
+       notmuch_query_t *query = notmuch_query_create (notmuch, "");
+       notmuch_messages_t *messages;
+       notmuch_message_t *message;
+
+       count = 0;
+       total = notmuch_query_count_messages (query);
+
+       for (messages = notmuch_query_search_messages (query);
+            notmuch_messages_valid (messages);
+            notmuch_messages_move_to_next (messages)) {
+           if (do_progress_notify) {
+               progress_notify (closure, (double) count / total);
+               do_progress_notify = 0;
+           }
+
+           message = notmuch_messages_get (messages);
+
+           _notmuch_message_upgrade_folder (message);
+           _notmuch_message_sync (message);
+
+           notmuch_message_destroy (message);
+
+           count++;
+       }
+
+       notmuch_query_destroy (query);
+    }
+
     db->set_metadata ("version", STRINGIFY (NOTMUCH_DATABASE_VERSION));
     db->flush ();
 
diff --git a/lib/message.cc b/lib/message.cc
index 21abe8e..31cb9f1 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -504,6 +504,56 @@ _notmuch_message_remove_terms (notmuch_message_t *message, const char *prefix)
     }
 }
 
+/* Return true if p points at "new" or "cur". */
+static bool is_maildir (const char *p)
+{
+    return strcmp (p, "cur") == 0 || strcmp (p, "new") == 0;
+}
+
+/* Add "folder:" term for directory. */
+static notmuch_status_t
+_notmuch_message_add_folder_terms (notmuch_message_t *message,
+				   const char *directory)
+{
+    char *folder, *last;
+
+    folder = talloc_strdup (NULL, directory);
+    if (! folder)
+       return NOTMUCH_STATUS_OUT_OF_MEMORY;
+
+    /*
+     * If the message file is in a leaf directory named "new" or
+     * "cur", presume maildir and index the parent directory. Thus a
+     * "folder:" prefix search matches messages in the specified
+     * maildir folder, i.e. in the specified directory and its "new"
+     * and "cur" subdirectories.
+     *
+     * Note that this means the "folder:" prefix can't be used for
+     * distinguishing between message files in "new" or "cur". The
+     * "path:" prefix needs to be used for that.
+     *
+     * Note the deliberate difference to _filename_is_in_maildir(). We
+     * don't want to index different things depending on the existence
+     * or non-existence of all maildir sibling directories "new",
+     * "cur", and "tmp". Doing so would be surprising, and difficult
+     * for the user to fix in case all subdirectories were not in
+     * place during indexing.
+     */
+    last = strrchr (folder, '/');
+    if (last) {
+       if (is_maildir (last + 1))
+           *last = '\0';
+    } else if (is_maildir (folder)) {
+       *folder = '\0';
+    }
+
+    _notmuch_message_add_term (message, "folder", folder);
+
+    talloc_free (folder);
+
+    return NOTMUCH_STATUS_SUCCESS;
+}
+
 #define RECURSIVE_SUFFIX "/**"
 
 /* Add "path:" terms for directory. */
@@ -570,9 +620,8 @@ _notmuch_message_add_directory_terms (void *ctx, notmuch_message_t *message)
 	directory = _notmuch_database_get_directory_path (ctx,
 							  message->notmuch,
 							  directory_id);
-	if (strlen (directory))
-	    _notmuch_message_gen_terms (message, "folder", directory);
 
+	_notmuch_message_add_folder_terms (message, directory);
 	_notmuch_message_add_path_terms (message, directory);
     }
 
@@ -610,9 +659,7 @@ _notmuch_message_add_filename (notmuch_message_t *message,
      * notmuch_directory_get_child_files() . */
     _notmuch_message_add_term (message, "file-direntry", direntry);
 
-    /* New terms allow user to search with folder: specification. */
-    _notmuch_message_gen_terms (message, "folder", directory);
-
+    _notmuch_message_add_folder_terms (message, directory);
     _notmuch_message_add_path_terms (message, directory);
 
     talloc_free (local);
@@ -637,8 +684,6 @@ _notmuch_message_remove_filename (notmuch_message_t *message,
 				  const char *filename)
 {
     void *local = talloc_new (message);
-    const char *folder_prefix = _find_prefix ("folder");
-    char *zfolder_prefix = talloc_asprintf(local, "Z%s", folder_prefix);
     char *direntry;
     notmuch_private_status_t private_status;
     notmuch_status_t status;
@@ -659,10 +704,7 @@ _notmuch_message_remove_filename (notmuch_message_t *message,
     /* Re-synchronize "folder:" and "path:" terms for this message. */
 
     /* Remove all "folder:" terms. */
-    _notmuch_message_remove_terms (message, folder_prefix);
-
-    /* Remove all "folder:" stemmed terms. */
-    _notmuch_message_remove_terms (message, zfolder_prefix);
+    _notmuch_message_remove_terms (message, _find_prefix ("folder"));
 
     /* Remove all "path:" terms. */
     _notmuch_message_remove_terms (message, _find_prefix ("path"));
@@ -675,6 +717,22 @@ _notmuch_message_remove_filename (notmuch_message_t *message,
     return status;
 }
 
+/* Upgrade the "folder:" prefix from V1 to V2. */
+#define FOLDER_PREFIX_V1       "XFOLDER"
+#define ZFOLDER_PREFIX_V1      "Z" FOLDER_PREFIX_V1
+void
+_notmuch_message_upgrade_folder (notmuch_message_t *message)
+{
+    /* Remove all old "folder:" terms. */
+    _notmuch_message_remove_terms (message, FOLDER_PREFIX_V1);
+
+    /* Remove all old "folder:" stemmed terms. */
+    _notmuch_message_remove_terms (message, ZFOLDER_PREFIX_V1);
+
+    /* Add new boolean "folder:" and "path:" terms. */
+    _notmuch_message_add_directory_terms (message, message);
+}
+
 char *
 _notmuch_message_talloc_copy_data (notmuch_message_t *message)
 {
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index af185c7..59eb2bc 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -263,6 +263,9 @@ _notmuch_message_gen_terms (notmuch_message_t *message,
 void
 _notmuch_message_upgrade_filename_storage (notmuch_message_t *message);
 
+void
+_notmuch_message_upgrade_folder (notmuch_message_t *message);
+
 notmuch_status_t
 _notmuch_message_add_filename (notmuch_message_t *message,
 			       const char *filename);
-- 
1.8.5.3

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Patch v3 05/15] test: fix test for literal folder: search
  2014-03-08 21:19 v3 of boolean folder: patches David Bremner
                   ` (3 preceding siblings ...)
  2014-03-08 21:19 ` [Patch v3 04/15] lib: make folder: prefix literal David Bremner
@ 2014-03-08 21:19 ` David Bremner
  2014-03-08 21:19 ` [Patch v3 08/15] test: add tests for the new boolean folder: and path: prefixes David Bremner
                   ` (8 subsequent siblings)
  13 siblings, 0 replies; 23+ messages in thread
From: David Bremner @ 2014-03-08 21:19 UTC (permalink / raw)
  To: notmuch

From: Jani Nikula <jani@nikula.org>

Some of the folder: matching capabilities are lost in the
probabilistic to boolean prefix change. Fix them.
---
 test/T100-search-by-folder.sh | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/test/T100-search-by-folder.sh b/test/T100-search-by-folder.sh
index 5cc2ca8..84ca438 100755
--- a/test/T100-search-by-folder.sh
+++ b/test/T100-search-by-folder.sh
@@ -3,6 +3,7 @@ test_description='"notmuch search" by folder: (with variations)'
 . ./test-lib.sh
 
 add_message '[dir]=bad' '[subject]="To the bone"'
+add_message '[dir]=.' '[subject]="Top level"'
 add_message '[dir]=bad/news' '[subject]="Bears"'
 mkdir -p "${MAIL_DIR}/duplicate/bad/news"
 cp "$gen_msg_filename" "${MAIL_DIR}/duplicate/bad/news"
@@ -12,29 +13,46 @@ add_message '[dir]=things/favorite' '[subject]="Raindrops, whiskers, kettles"'
 add_message '[dir]=things/bad' '[subject]="Bites, stings, sad feelings"'
 
 test_begin_subtest "Single-world folder: specification (multiple results)"
-output=$(notmuch search folder:bad | notmuch_search_sanitize)
+output=$(notmuch search folder:bad folder:bad/news folder:things/bad | notmuch_search_sanitize)
 test_expect_equal "$output" "thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; To the bone (inbox unread)
 thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; Bears (inbox unread)
 thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; Bites, stings, sad feelings (inbox unread)"
 
+test_begin_subtest "Top level folder"
+output=$(notmuch search folder:'""' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; Top level (inbox unread)"
+
 test_begin_subtest "Two-word path to narrow results to one"
 output=$(notmuch search folder:bad/news | notmuch_search_sanitize)
 test_expect_equal "$output" "thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; Bears (inbox unread)"
 
+test_begin_subtest "Folder search with --output=files"
+output=$(notmuch search --output=files folder:bad/news | sed -e "s,$MAIL_DIR,MAIL_DIR,")
+test_expect_equal "$output" "MAIL_DIR/bad/news/msg-003
+MAIL_DIR/duplicate/bad/news/msg-003"
+
 test_begin_subtest "After removing duplicate instance of matching path"
 rm -r "${MAIL_DIR}/bad/news"
 notmuch new
 output=$(notmuch search folder:bad/news | notmuch_search_sanitize)
+test_expect_equal "$output" ""
+
+test_begin_subtest "Folder search with --output=files part #2"
+output=$(notmuch search --output=files folder:duplicate/bad/news | sed -e "s,$MAIL_DIR,MAIL_DIR,")
+test_expect_equal "$output" "MAIL_DIR/duplicate/bad/news/msg-003"
+
+test_begin_subtest "After removing duplicate instance of matching path part #2"
+output=$(notmuch search folder:duplicate/bad/news | notmuch_search_sanitize)
 test_expect_equal "$output" "thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; Bears (inbox unread)"
 
 test_begin_subtest "After rename, old path returns nothing"
 mv "${MAIL_DIR}/duplicate/bad/news" "${MAIL_DIR}/duplicate/bad/olds"
 notmuch new
-output=$(notmuch search folder:bad/news | notmuch_search_sanitize)
+output=$(notmuch search folder:duplicate/bad/news | notmuch_search_sanitize)
 test_expect_equal "$output" ""
 
 test_begin_subtest "After rename, new path returns result"
-output=$(notmuch search folder:bad/olds | notmuch_search_sanitize)
+output=$(notmuch search folder:duplicate/bad/olds | notmuch_search_sanitize)
 test_expect_equal "$output" "thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; Bears (inbox unread)"
 
 test_done
-- 
1.8.5.3

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Patch v3 08/15] test: add tests for the new boolean folder: and path: prefixes
  2014-03-08 21:19 v3 of boolean folder: patches David Bremner
                   ` (4 preceding siblings ...)
  2014-03-08 21:19 ` [Patch v3 05/15] test: fix test for literal folder: search David Bremner
@ 2014-03-08 21:19 ` David Bremner
  2014-03-09  2:55   ` Austin Clements
  2014-03-08 21:19 ` [Patch v3 09/15] test: add database upgrade test from format version 1 to 2 David Bremner
                   ` (7 subsequent siblings)
  13 siblings, 1 reply; 23+ messages in thread
From: David Bremner @ 2014-03-08 21:19 UTC (permalink / raw)
  To: notmuch

From: Jani Nikula <jani@nikula.org>

Additional tests for the boolean folder: and path: prefixes using the
new corpus.
---
 test/T101-search-by-folder-and-path.sh | 83 ++++++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)
 create mode 100755 test/T101-search-by-folder-and-path.sh

diff --git a/test/T101-search-by-folder-and-path.sh b/test/T101-search-by-folder-and-path.sh
new file mode 100755
index 0000000..9f809e4
--- /dev/null
+++ b/test/T101-search-by-folder-and-path.sh
@@ -0,0 +1,83 @@
+#!/usr/bin/env bash
+test_description='"notmuch search" by folder: and path:'
+. ./test-lib.sh
+
+add_email_corpus folders
+
+test_begin_subtest "folder: search"
+output=$(notmuch search --output=files folder:foo | sed -e "s,$MAIL_DIR,MAIL_DIR," | sort)
+# bar/baz/05:2, and new/03:2, are duplicates of foo/05:2, and
+# foo/new/03:2, respectively
+test_expect_equal "$output" "MAIL_DIR/bar/baz/05:2,
+MAIL_DIR/foo/05:2,
+MAIL_DIR/foo/06:2,
+MAIL_DIR/foo/cur/07:2,
+MAIL_DIR/foo/cur/08:2,
+MAIL_DIR/foo/new/03:2,
+MAIL_DIR/foo/new/09:2,
+MAIL_DIR/foo/new/10:2,
+MAIL_DIR/new/03:2,"
+
+test_begin_subtest "top level folder: search"
+output=$(notmuch search --output=files folder:'""' | sed -e "s,$MAIL_DIR,MAIL_DIR," | sort)
+# foo/new/03:2, is a duplicate of new/03:2,
+test_expect_equal "$output" "MAIL_DIR/01:2,
+MAIL_DIR/02:2,
+MAIL_DIR/cur/29:2,
+MAIL_DIR/cur/30:2,
+MAIL_DIR/cur/31:2,
+MAIL_DIR/cur/32:2,
+MAIL_DIR/cur/33:2,
+MAIL_DIR/cur/34:2,
+MAIL_DIR/cur/35:2,
+MAIL_DIR/cur/36:2,
+MAIL_DIR/cur/37:2,
+MAIL_DIR/cur/38:2,
+MAIL_DIR/cur/39:2,
+MAIL_DIR/cur/40:2,
+MAIL_DIR/cur/41:2,
+MAIL_DIR/cur/42:2,
+MAIL_DIR/cur/43:2,
+MAIL_DIR/cur/44:2,
+MAIL_DIR/cur/45:2,
+MAIL_DIR/cur/46:2,
+MAIL_DIR/cur/47:2,
+MAIL_DIR/cur/48:2,
+MAIL_DIR/cur/49:2,
+MAIL_DIR/cur/50:2,
+MAIL_DIR/cur/52:2,
+MAIL_DIR/cur/53:2,
+MAIL_DIR/foo/new/03:2,
+MAIL_DIR/new/03:2,
+MAIL_DIR/new/04:2,"
+
+test_begin_subtest "path: search"
+output=$(notmuch search --output=files path:"bar" | sed -e "s,$MAIL_DIR,MAIL_DIR," | sort)
+# foo/05:2, is a duplicate of bar/baz/05:2,
+test_expect_equal "$output" "MAIL_DIR/bar/17:2,
+MAIL_DIR/bar/18:2,"
+
+test_begin_subtest "top level path: search"
+output=$(notmuch search --output=files path:'""' | sed -e "s,$MAIL_DIR,MAIL_DIR," | sort)
+test_expect_equal "$output" "MAIL_DIR/01:2,
+MAIL_DIR/02:2,"
+
+test_begin_subtest "recursive path: search"
+output=$(notmuch search --output=files path:"bar/**" | sed -e "s,$MAIL_DIR,MAIL_DIR," | sort)
+# foo/05:2, is a duplicate of bar/baz/05:2,
+test_expect_equal "$output" "MAIL_DIR/bar/17:2,
+MAIL_DIR/bar/18:2,
+MAIL_DIR/bar/baz/05:2,
+MAIL_DIR/bar/baz/23:2,
+MAIL_DIR/bar/baz/24:2,
+MAIL_DIR/bar/baz/cur/25:2,
+MAIL_DIR/bar/baz/cur/26:2,
+MAIL_DIR/bar/baz/new/27:2,
+MAIL_DIR/bar/baz/new/28:2,
+MAIL_DIR/bar/cur/19:2,
+MAIL_DIR/bar/cur/20:2,
+MAIL_DIR/bar/new/21:2,
+MAIL_DIR/bar/new/22:2,
+MAIL_DIR/foo/05:2,"
+
+test_done
-- 
1.8.5.3

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Patch v3 09/15] test: add database upgrade test from format version 1 to 2
  2014-03-08 21:19 v3 of boolean folder: patches David Bremner
                   ` (5 preceding siblings ...)
  2014-03-08 21:19 ` [Patch v3 08/15] test: add tests for the new boolean folder: and path: prefixes David Bremner
@ 2014-03-08 21:19 ` David Bremner
  2014-03-08 21:19 ` [Patch v3 10/15] man: update man pages for folder: and path: search terms David Bremner
                   ` (6 subsequent siblings)
  13 siblings, 0 replies; 23+ messages in thread
From: David Bremner @ 2014-03-08 21:19 UTC (permalink / raw)
  To: notmuch

From: Jani Nikula <jani@nikula.org>

Test the upgrade from probabilistic to boolean folder: terms, and
addition of path: terms.

The test depends on the pre-built test corpus and database tarball and
checksum file being in place. If it's not, the test is skipped. The
mechanism to fetch the test database will be added later.

At the time of writing, a working test database and checksum file is
available at

   http://notmuchmail.org/releases/test-databases/

It has been noted that some non-GNU environments make lack
sha256sum. We leave this portability issue for a followup patch.
---
 test/T530-upgrade.sh | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 115 insertions(+)
 create mode 100755 test/T530-upgrade.sh

diff --git a/test/T530-upgrade.sh b/test/T530-upgrade.sh
new file mode 100755
index 0000000..a3c2ed1
--- /dev/null
+++ b/test/T530-upgrade.sh
@@ -0,0 +1,115 @@
+#!/usr/bin/env bash
+test_description="database upgrade"
+
+. ./test-lib.sh
+
+dbtarball=folders-v1.tar.xz
+
+# XXX: Accomplish the same with test lib helpers
+if [ ! -e ${TEST_DIRECTORY}/test-databases/${dbtarball} ]; then
+    test_subtest_missing_external_prereq_["${dbtarball}"]=t
+fi
+
+test_expect_success \
+    'database checksum' \
+    '( cd $TEST_DIRECTORY/test-databases &&
+       sha256sum --quiet --check --status ${dbtarball}.sha256 )'
+
+tar xf $TEST_DIRECTORY/test-databases/${dbtarball} -C ${MAIL_DIR} --strip-components=1
+
+test_begin_subtest "folder: search does not work with old database version"
+output=$(notmuch search folder:foo)
+test_expect_equal "$output" ""
+
+test_begin_subtest "path: search does not work with old database version"
+output=$(notmuch search path:foo)
+test_expect_equal "$output" ""
+
+test_begin_subtest "database upgrade from format version 1"
+output=$(notmuch new)
+test_expect_equal "$output" "\
+Welcome to a new version of notmuch! Your database will now be upgraded.
+Your notmuch database has now been upgraded to database format version 2.
+No new mail."
+
+test_begin_subtest "folder: no longer matches in the middle of path"
+output=$(notmuch search folder:baz)
+test_expect_equal "$output" ""
+
+test_begin_subtest "folder: search"
+output=$(notmuch search --output=files folder:foo | sed -e "s,$MAIL_DIR,MAIL_DIR," | sort)
+# bar/baz/05:2, and new/03:2, are duplicates of foo/05:2, and
+# foo/new/03:2, respectively
+test_expect_equal "$output" "MAIL_DIR/bar/baz/05:2,
+MAIL_DIR/foo/05:2,
+MAIL_DIR/foo/06:2,
+MAIL_DIR/foo/cur/07:2,
+MAIL_DIR/foo/cur/08:2,
+MAIL_DIR/foo/new/03:2,
+MAIL_DIR/foo/new/09:2,
+MAIL_DIR/foo/new/10:2,
+MAIL_DIR/new/03:2,"
+
+test_begin_subtest "top level folder: search"
+output=$(notmuch search --output=files folder:'""' | sed -e "s,$MAIL_DIR,MAIL_DIR," | sort)
+# foo/new/03:2, is a duplicate of new/03:2,
+test_expect_equal "$output" "MAIL_DIR/01:2,
+MAIL_DIR/02:2,
+MAIL_DIR/cur/29:2,
+MAIL_DIR/cur/30:2,
+MAIL_DIR/cur/31:2,
+MAIL_DIR/cur/32:2,
+MAIL_DIR/cur/33:2,
+MAIL_DIR/cur/34:2,
+MAIL_DIR/cur/35:2,
+MAIL_DIR/cur/36:2,
+MAIL_DIR/cur/37:2,
+MAIL_DIR/cur/38:2,
+MAIL_DIR/cur/39:2,
+MAIL_DIR/cur/40:2,
+MAIL_DIR/cur/41:2,
+MAIL_DIR/cur/42:2,
+MAIL_DIR/cur/43:2,
+MAIL_DIR/cur/44:2,
+MAIL_DIR/cur/45:2,
+MAIL_DIR/cur/46:2,
+MAIL_DIR/cur/47:2,
+MAIL_DIR/cur/48:2,
+MAIL_DIR/cur/49:2,
+MAIL_DIR/cur/50:2,
+MAIL_DIR/cur/52:2,
+MAIL_DIR/cur/53:2,
+MAIL_DIR/foo/new/03:2,
+MAIL_DIR/new/03:2,
+MAIL_DIR/new/04:2,"
+
+test_begin_subtest "path: search"
+output=$(notmuch search --output=files path:"bar" | sed -e "s,$MAIL_DIR,MAIL_DIR," | sort)
+# foo/05:2, is a duplicate of bar/baz/05:2,
+test_expect_equal "$output" "MAIL_DIR/bar/17:2,
+MAIL_DIR/bar/18:2,"
+
+test_begin_subtest "top level path: search"
+output=$(notmuch search --output=files path:'""' | sed -e "s,$MAIL_DIR,MAIL_DIR," | sort)
+test_expect_equal "$output" "MAIL_DIR/01:2,
+MAIL_DIR/02:2,"
+
+test_begin_subtest "recursive path: search"
+output=$(notmuch search --output=files path:"bar/**" | sed -e "s,$MAIL_DIR,MAIL_DIR," | sort)
+# foo/05:2, is a duplicate of bar/baz/05:2,
+test_expect_equal "$output" "MAIL_DIR/bar/17:2,
+MAIL_DIR/bar/18:2,
+MAIL_DIR/bar/baz/05:2,
+MAIL_DIR/bar/baz/23:2,
+MAIL_DIR/bar/baz/24:2,
+MAIL_DIR/bar/baz/cur/25:2,
+MAIL_DIR/bar/baz/cur/26:2,
+MAIL_DIR/bar/baz/new/27:2,
+MAIL_DIR/bar/baz/new/28:2,
+MAIL_DIR/bar/cur/19:2,
+MAIL_DIR/bar/cur/20:2,
+MAIL_DIR/bar/new/21:2,
+MAIL_DIR/bar/new/22:2,
+MAIL_DIR/foo/05:2,"
+
+test_done
-- 
1.8.5.3

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Patch v3 10/15] man: update man pages for folder: and path: search terms
  2014-03-08 21:19 v3 of boolean folder: patches David Bremner
                   ` (6 preceding siblings ...)
  2014-03-08 21:19 ` [Patch v3 09/15] test: add database upgrade test from format version 1 to 2 David Bremner
@ 2014-03-08 21:19 ` David Bremner
  2014-03-09  3:52   ` Austin Clements
  2014-03-08 21:19 ` [Patch v3 11/15] man: try to clarify the folder: and path: vs. --output=files confusion David Bremner
                   ` (5 subsequent siblings)
  13 siblings, 1 reply; 23+ messages in thread
From: David Bremner @ 2014-03-08 21:19 UTC (permalink / raw)
  To: notmuch

From: Jani Nikula <jani@nikula.org>

---
 man/man7/notmuch-search-terms.7 | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/man/man7/notmuch-search-terms.7 b/man/man7/notmuch-search-terms.7
index a768b63..907403d 100644
--- a/man/man7/notmuch-search-terms.7
+++ b/man/man7/notmuch-search-terms.7
@@ -54,6 +54,8 @@ terms to match against specific portions of an email, (where
 
 	folder:<directory-path>
 
+	path:<directory-path> or path:<directory-path>/**
+
 	date:<since>..<until>
 
 The
@@ -101,12 +103,26 @@ thread ID values can be seen in the first column of output from
 
 The
 .B folder:
-prefix can be used to search for email message files that are
-contained within particular directories within the mail store. If the
-same email message has multiple message files associated with it, it's
-sufficient for a match that at least one of the files is contained
-within a matching directory. Only the directory components below the
-top-level mail database path are available to be searched.
+and
+.B path:
+prefixes can be used to search for email message files that are
+contained within particular directories within the mail store. The
+directories are specified relative from the top-level mail database
+path, and thus only the directory components below that are available
+to be searched.
+
+The
+.B folder:
+prefix matches messages in the specified maildir folder, i.e. in the
+specified directory and its "new" and "cur" subdirectories. The
+.B path:
+prefix matches messages in the specified directory only, unless the
+"/**" suffix is used to denote the specified directory and all its
+subdirectories recursively. For both, the empty string "" matches the
+top level maildir folder or directory. If the same email message has
+multiple message files associated with it, it's sufficient for a match
+that at least one of the files is contained within a matching
+directory.
 
 The
 .B date:
-- 
1.8.5.3

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Patch v3 11/15] man: try to clarify the folder: and path: vs. --output=files confusion
  2014-03-08 21:19 v3 of boolean folder: patches David Bremner
                   ` (7 preceding siblings ...)
  2014-03-08 21:19 ` [Patch v3 10/15] man: update man pages for folder: and path: search terms David Bremner
@ 2014-03-08 21:19 ` David Bremner
  2014-03-08 21:19 ` [Patch v3 12/15] test: don't use $(dir) in recipes David Bremner
                   ` (4 subsequent siblings)
  13 siblings, 0 replies; 23+ messages in thread
From: David Bremner @ 2014-03-08 21:19 UTC (permalink / raw)
  To: notmuch

From: Jani Nikula <jani@nikula.org>

---
 man/man1/notmuch-search.1 | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/man/man1/notmuch-search.1 b/man/man1/notmuch-search.1
index 55a81e7..a2b1ae4 100644
--- a/man/man1/notmuch-search.1
+++ b/man/man1/notmuch-search.1
@@ -82,8 +82,14 @@ one per line (\-\-format=text), separated by null characters
 S-Expression list (\-\-format=sexp).
 
 Note that each message may have multiple filenames associated with it.
-All of them are included in the output, unless limited with the
-\-\-duplicate=N option.
+All of them are included in the output (unless limited with the
+\-\-duplicate=N option). This may be particularly confusing for
+.B folder:
+or
+.B path:
+searches in a specified directory, as the messages may have duplicates
+in other directories that are included in the output, although these
+files alone would not match the search.
 .RE
 .RS 4
 .TP 4
-- 
1.8.5.3

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Patch v3 12/15] test: don't use $(dir) in recipes.
  2014-03-08 21:19 v3 of boolean folder: patches David Bremner
                   ` (8 preceding siblings ...)
  2014-03-08 21:19 ` [Patch v3 11/15] man: try to clarify the folder: and path: vs. --output=files confusion David Bremner
@ 2014-03-08 21:19 ` David Bremner
  2014-03-08 21:19 ` [Patch v3 13/15] devel: add script to generate test databases David Bremner
                   ` (3 subsequent siblings)
  13 siblings, 0 replies; 23+ messages in thread
From: David Bremner @ 2014-03-08 21:19 UTC (permalink / raw)
  To: notmuch

According the semantics of make, the expansion of $(dir) in recipes
uses dynamic scope, i.e. the value at the time the recipe is run. This
means if test/Makefile.local is not the last sub-makefile included,
all heck breaks loose.
---
 test/Makefile.local | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/test/Makefile.local b/test/Makefile.local
index 070c84b..40aa7d8 100644
--- a/test/Makefile.local
+++ b/test/Makefile.local
@@ -2,6 +2,8 @@
 
 dir := test
 
+# save against changes in $(dir)
+test_src_dir := $(dir)
 extra_cflags += -I.
 
 smtp_dummy_srcs =		\
@@ -45,7 +47,7 @@ TEST_BINARIES=$(dir)/arg-test \
 test-binaries: $(TEST_BINARIES)
 
 test:	all test-binaries
-	@${dir}/notmuch-test $(OPTIONS)
+	@${test_src_dir}/notmuch-test $(OPTIONS)
 
 check: test
 
-- 
1.8.5.3

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Patch v3 13/15] devel: add script to generate test databases
  2014-03-08 21:19 v3 of boolean folder: patches David Bremner
                   ` (9 preceding siblings ...)
  2014-03-08 21:19 ` [Patch v3 12/15] test: don't use $(dir) in recipes David Bremner
@ 2014-03-08 21:19 ` David Bremner
  2014-03-08 21:19 ` [Patch v3 14/15] test: commit folders-v1.tar.xz checksum, ignore actual databases David Bremner
                   ` (2 subsequent siblings)
  13 siblings, 0 replies; 23+ messages in thread
From: David Bremner @ 2014-03-08 21:19 UTC (permalink / raw)
  To: notmuch

From: Jani Nikula <jani@nikula.org>

Add script to generate notmuch test databases using specified versions
of notmuch. This is useful for generating material for database
upgrade tests.

This reuses the test infrastructure to have a sandbox environment for
notmuch new etc.
---
 devel/gen-testdb.sh | 131 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 131 insertions(+)
 create mode 100755 devel/gen-testdb.sh

diff --git a/devel/gen-testdb.sh b/devel/gen-testdb.sh
new file mode 100755
index 0000000..621b31e
--- /dev/null
+++ b/devel/gen-testdb.sh
@@ -0,0 +1,131 @@
+#!/usr/bin/env bash
+#
+# NAME
+#	gen-testdb.sh - generate test databases
+#
+# SYNOPSIS
+#	gen-testdb.sh -v NOTMUCH-VERSION [-c CORPUS-PATH] [-s TAR-SUFFIX]
+#
+# DESCRIPTION
+#	Generate a tarball containing the specified test corpus and
+#	the corresponding notmuch database, indexed using a specific
+#	version of notmuch, resulting in a specific version of the
+#	database.
+#
+#	The specific version of notmuch will be built on the fly.
+#	Therefore the script must be run within a git repository to be
+#	able to build the old versions of notmuch.
+#
+#	This script reuses the test infrastructure, and the script
+#	must be run from within the test directory.
+#
+#	The output tarballs, named database-<TAR-SUFFIX>.tar.gz, are
+#	placed in the test/test-databases directory.
+#
+# OPTIONS
+#	-v NOTMUCH-VERSION
+#		Notmuch version in terms of a git tag or commit to use
+#		for generating the database. Required.
+#
+#	-c CORPUS-PATH
+#		Path to a corpus to use for generating the
+#		database. Due to CWD changes within the test
+#		infrastructure, use absolute paths. Defaults to the
+#		test corpus.
+#
+#	-s TAR-SUFFIX
+#		Suffix for the tarball basename. Empty by default.
+#
+# EXAMPLE
+#
+#	Generate a database indexed with notmuch 0.17. Use the default
+#	test corpus. Name the tarball database-v1.tar.gz to reflect
+#	the fact that notmuch 0.17 used database version 1.
+#
+#	$ cd test
+#	$ ../devel/gen-testdb.sh -v 0.17 -s v1
+#
+# CAVEATS
+#	Test infrastructure options won't work.
+#
+#	Any existing databases with the same name will be overwritten.
+#
+#	It may not be possible to build old versions of notmuch with
+#	the set of dependencies that satisfy building the current
+#	version of notmuch.
+#
+# AUTHOR
+#	Jani Nikula <jani@nikula.org>
+#
+# LICENSE
+#	Same as notmuch test infrastructure (GPLv2+).
+#
+
+test_description="database generation abusing test infrastructure"
+
+# immediate exit on subtest failure; see test_failure_ in test-lib.sh
+immediate=t
+
+VERSION=
+CORPUS=
+SUFFIX=
+
+while getopts v:c:s: opt; do
+    case "$opt" in
+	v) VERSION="$OPTARG";;
+	c) CORPUS="$OPTARG";;
+	s) SUFFIX="-$OPTARG";;
+    esac
+done
+shift `expr $OPTIND - 1`
+
+. ./test-lib.sh
+
+SHORT_CORPUS=$(basename ${CORPUS:-database})
+DBNAME=${SHORT_CORPUS}${SUFFIX}
+TARBALLNAME=${DBNAME}.tar.xz
+
+CORPUS=${CORPUS:-${TEST_DIRECTORY}/corpus}
+
+test_expect_code 0 "notmuch version specified on the command line" \
+    "test -n ${VERSION}"
+
+test_expect_code 0 "the specified version ${VERSION} refers to a commit" \
+    "git show ${VERSION} >/dev/null 2>&1"
+
+BUILD_DIR="notmuch-${VERSION}"
+test_expect_code 0 "generate snapshot of notmuch version ${VERSION}" \
+    "git -C $TEST_DIRECTORY/.. archive --prefix=${BUILD_DIR}/ --format=tar ${VERSION} | tar x"
+
+# force version string
+git describe --match '[0-9.]*' ${VERSION} > ${BUILD_DIR}/version
+
+test_expect_code 0 "configure and build notmuch version ${VERSION}" \
+    "make -C ${BUILD_DIR}"
+
+# use the newly built notmuch
+export PATH=./${BUILD_DIR}:$PATH
+
+test_begin_subtest "verify the newly built notmuch version"
+test_expect_equal "`notmuch --version`" "notmuch `cat ${BUILD_DIR}/version`"
+
+# replace the existing mails, if any, with the specified corpus
+rm -rf ${MAIL_DIR}
+cp -a ${CORPUS} ${MAIL_DIR}
+
+test_expect_code 0 "index the corpus" \
+    "notmuch new"
+
+# wrap the resulting mail store and database in a tarball
+
+cp -a ${MAIL_DIR} ${TMP_DIRECTORY}/${DBNAME}
+tar Jcf ${TMP_DIRECTORY}/${TARBALLNAME} -C ${TMP_DIRECTORY} ${DBNAME}
+mkdir -p  ${TEST_DIRECTORY}/test-databases
+cp -a ${TMP_DIRECTORY}/${TARBALLNAME} ${TEST_DIRECTORY}/test-databases
+test_expect_code 0 "create the output tarball ${TARBALLNAME}" \
+    "test -f ${TEST_DIRECTORY}/test-databases/${TARBALLNAME}"
+
+# generate a checksum file
+test_expect_code 0 "compute checksum" \
+    "(cd ${TEST_DIRECTORY}/test-databases/ && sha256sum ${TARBALLNAME} > ${TARBALLNAME}.sha256)"
+test_done
-- 
1.8.5.3

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Patch v3 14/15] test: commit folders-v1.tar.xz checksum, ignore actual databases
  2014-03-08 21:19 v3 of boolean folder: patches David Bremner
                   ` (10 preceding siblings ...)
  2014-03-08 21:19 ` [Patch v3 13/15] devel: add script to generate test databases David Bremner
@ 2014-03-08 21:19 ` David Bremner
  2014-03-08 21:19 ` [Patch v3 15/15] test: add machinery to download and verify databases David Bremner
  2014-03-08 21:40 ` v3 of boolean folder: patches David Bremner
  13 siblings, 0 replies; 23+ messages in thread
From: David Bremner @ 2014-03-08 21:19 UTC (permalink / raw)
  To: notmuch

The checksum file is used by the test infrastructure to verify the downloaded
test database is the one we had in mind.  Note that this test is
rather strict, and the the checksum file needs to be recommitted when
the database is regenerated.

add a pattern .gitignore to ignore the actual databases
---
 test/test-databases/.gitignore               | 1 +
 test/test-databases/folders-v1.tar.xz.sha256 | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 test/test-databases/.gitignore
 create mode 100644 test/test-databases/folders-v1.tar.xz.sha256

diff --git a/test/test-databases/.gitignore b/test/test-databases/.gitignore
new file mode 100644
index 0000000..b5624b7
--- /dev/null
+++ b/test/test-databases/.gitignore
@@ -0,0 +1 @@
+*.tar.xz
diff --git a/test/test-databases/folders-v1.tar.xz.sha256 b/test/test-databases/folders-v1.tar.xz.sha256
new file mode 100644
index 0000000..01ad79d
--- /dev/null
+++ b/test/test-databases/folders-v1.tar.xz.sha256
@@ -0,0 +1 @@
+ace8a61216756b90a421e23d03910e1228bcb910e197c35e51e29f2cf57b37d9  folders-v1.tar.xz
-- 
1.8.5.3

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Patch v3 15/15] test: add machinery to download and verify databases
  2014-03-08 21:19 v3 of boolean folder: patches David Bremner
                   ` (11 preceding siblings ...)
  2014-03-08 21:19 ` [Patch v3 14/15] test: commit folders-v1.tar.xz checksum, ignore actual databases David Bremner
@ 2014-03-08 21:19 ` David Bremner
  2014-03-08 21:40 ` v3 of boolean folder: patches David Bremner
  13 siblings, 0 replies; 23+ messages in thread
From: David Bremner @ 2014-03-08 21:19 UTC (permalink / raw)
  To: notmuch

Note that it is intentional that the checksum file is not
downloaded. The intent is to check those into git.
---
 Makefile                           |  3 ++-
 test/Makefile.local                |  2 +-
 test/README                        |  8 ++++++++
 test/T530-upgrade.sh               |  2 +-
 test/test-databases/Makefile       |  7 +++++++
 test/test-databases/Makefile.local | 14 ++++++++++++++
 6 files changed, 33 insertions(+), 3 deletions(-)
 create mode 100644 test/test-databases/Makefile
 create mode 100644 test/test-databases/Makefile.local

diff --git a/Makefile b/Makefile
index 0428160..97084b1 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,8 @@ all:
 # List all subdirectories here. Each contains its own Makefile.local.
 # Use of '=', without '+=', seems to be required for out-of-tree
 # builds to work.
-subdirs = compat completion emacs lib man parse-time-string performance-test util test
+subdirs = compat completion emacs lib man parse-time-string performance-test util test \
+	test/test-databases
 
 # We make all targets depend on the Makefiles themselves.
 global_deps = Makefile Makefile.config Makefile.local \
diff --git a/test/Makefile.local b/test/Makefile.local
index 40aa7d8..bfabd15 100644
--- a/test/Makefile.local
+++ b/test/Makefile.local
@@ -46,7 +46,7 @@ TEST_BINARIES=$(dir)/arg-test \
 
 test-binaries: $(TEST_BINARIES)
 
-test:	all test-binaries
+test:	all test-binaries fetch-test-databases
 	@${test_src_dir}/notmuch-test $(OPTIONS)
 
 check: test
diff --git a/test/README b/test/README
index 79a9b1b..81a1c82 100644
--- a/test/README
+++ b/test/README
@@ -64,6 +64,14 @@ The following command-line options are available when running tests:
 	Pointing this argument at a tmpfs filesystem can improve the
 	speed of the test suite for some users.
 
+Certain tests require precomputed databases to complete. You can fetch these
+databases with
+
+	make download-test-databases
+
+If you do not download the test databases, the relevant tests will be
+skipped.
+
 When invoking the test suite via "make test" any of the above options
 can be specified as follows:
 
diff --git a/test/T530-upgrade.sh b/test/T530-upgrade.sh
index a3c2ed1..e1e8ac5 100755
--- a/test/T530-upgrade.sh
+++ b/test/T530-upgrade.sh
@@ -7,7 +7,7 @@ dbtarball=folders-v1.tar.xz
 
 # XXX: Accomplish the same with test lib helpers
 if [ ! -e ${TEST_DIRECTORY}/test-databases/${dbtarball} ]; then
-    test_subtest_missing_external_prereq_["${dbtarball}"]=t
+    test_subtest_missing_external_prereq_["${dbtarball} - fetch with 'make download-test-databases'"]=t
 fi
 
 test_expect_success \
diff --git a/test/test-databases/Makefile b/test/test-databases/Makefile
new file mode 100644
index 0000000..b250a8b
--- /dev/null
+++ b/test/test-databases/Makefile
@@ -0,0 +1,7 @@
+# See Makefile.local for the list of files to be compiled in this
+# directory.
+all:
+	$(MAKE) -C ../.. all
+
+.DEFAULT:
+	$(MAKE) -C ../.. $@
diff --git a/test/test-databases/Makefile.local b/test/test-databases/Makefile.local
new file mode 100644
index 0000000..e777ada
--- /dev/null
+++ b/test/test-databases/Makefile.local
@@ -0,0 +1,14 @@
+# -*- makefile -*-
+
+TEST_DATABASE_MIRROR=http://notmuchmail.org/releases/test-databases
+
+dir := test/test-databases
+
+test_databases := $(dir)/folders-v1.tar.xz
+
+%.tar.xz:
+	wget -nv -O $@ ${TEST_DATABASE_MIRROR}/$(notdir $@);
+
+download-test-databases: ${test_databases}
+
+DISTCLEAN := $(DISTCLEAN) ${test_databases}
-- 
1.8.5.3

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: v3 of boolean folder: patches
  2014-03-08 21:19 v3 of boolean folder: patches David Bremner
                   ` (12 preceding siblings ...)
  2014-03-08 21:19 ` [Patch v3 15/15] test: add machinery to download and verify databases David Bremner
@ 2014-03-08 21:40 ` David Bremner
  2014-03-10 18:10   ` W. Trevor King
  13 siblings, 1 reply; 23+ messages in thread
From: David Bremner @ 2014-03-08 21:40 UTC (permalink / raw)
  To: notmuch

David Bremner <david@tethera.net> writes:

>
> I decided that the benefits of being to really apply and test the
> patch series outweighed the fact that one of patchs is about
> 150K. Apologies to those of you on GSM modems and the like.
>

Oops. I didn't realize the "reorganize the corpus" patch was so big. So
I guess if you want to test the series, you'll have to look at

    git://pivot.cs.unb.ca/notmuch.git
    branch: fetch-databases

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Patch v3 04/15] lib: make folder: prefix literal
  2014-03-08 21:19 ` [Patch v3 04/15] lib: make folder: prefix literal David Bremner
@ 2014-03-08 23:51   ` Austin Clements
  2014-03-09  8:45     ` Jani Nikula
  0 siblings, 1 reply; 23+ messages in thread
From: Austin Clements @ 2014-03-08 23:51 UTC (permalink / raw)
  To: jani, David Bremner; +Cc: notmuch

Quoth David Bremner on Mar 08 at  5:19 pm:
> From: Jani Nikula <jani@nikula.org>
> 
> In xapian terms, convert folder: prefix from probabilistic to boolean
> prefix, matching the paths, relative form the maildir root, of the

s/form/from/

> message files, ignoring the maildir new and cur leaf directories.
> 
> folder:foo matches all message files in foo, foo/new, and foo/cur.
> 
> folder:foo/new does *not* match message files in foo/new.
> 
> folder:"" matches all message files in the top level maildir and its
> new and cur subdirectories.
> 
> This change constitutes a database change: bump the database version
> and add database upgrade support for folder: terms. The upgrade also
> adds path: terms.
> ---
>  lib/database.cc       | 38 ++++++++++++++++++++++--
>  lib/message.cc        | 80 ++++++++++++++++++++++++++++++++++++++++++++-------
>  lib/notmuch-private.h |  3 ++
>  3 files changed, 108 insertions(+), 13 deletions(-)
> 
> diff --git a/lib/database.cc b/lib/database.cc
> index 93cc7f5..186e3a7 100644
> --- a/lib/database.cc
> +++ b/lib/database.cc
> @@ -42,7 +42,7 @@ typedef struct {
>      const char *prefix;
>  } prefix_t;
>  
> -#define NOTMUCH_DATABASE_VERSION 1
> +#define NOTMUCH_DATABASE_VERSION 2
>  
>  #define STRINGIFY(s) _SUB_STRINGIFY(s)
>  #define _SUB_STRINGIFY(s) #s
> @@ -210,6 +210,7 @@ static prefix_t BOOLEAN_PREFIX_EXTERNAL[] = {
>      { "is",			"K" },
>      { "id",			"Q" },
>      { "path",			"P" },
> +    { "folder",			"XFOLDER:" },

It took me a while to figure out that the ":" here means that Xapian
will unconditionally use a ":" after the prefix, instead of only using
a ":" when the first letter following the prefix is upper-case ASCII.
Maybe I was only confused by this because I simultaneously knew too
much and not enough about Xapian, but it might be worth a comment.
Something like,

    /* Without the ":", since this is a multi-letter prefix, Xapian
     * will add a colon itself if the first letter of the path is
     * upper-case ASCII.  Including the ":" forces there to always be
     * a colon, which keeps our own logic simpler. */

>  };
>  
>  static prefix_t PROBABILISTIC_PREFIX[]= {
> @@ -217,7 +218,6 @@ static prefix_t PROBABILISTIC_PREFIX[]= {
>      { "to",			"XTO" },
>      { "attachment",		"XATTACHMENT" },
>      { "subject",		"XSUBJECT"},
> -    { "folder",			"XFOLDER"}
>  };
>  
>  const char *
> @@ -1168,6 +1168,40 @@ notmuch_database_upgrade (notmuch_database_t *notmuch,
>  	}
>      }
>  
> +    /*
> +     * Prior to version 2, the "folder:" prefix was probabilistic and
> +     * stemmed. Change it to the current boolean prefix. Add "path:"
> +     * prefixes while at it.
> +     */
> +    if (version < 2) {
> +       notmuch_query_t *query = notmuch_query_create (notmuch, "");

Three space indentation and no tabs?  (It looks like this was in
Jani's v2, also.  I'm guessing at some point there was a copy-paste
from a diff with tabs converted to spaces?)

> +       notmuch_messages_t *messages;
> +       notmuch_message_t *message;
> +
> +       count = 0;
> +       total = notmuch_query_count_messages (query);
> +
> +       for (messages = notmuch_query_search_messages (query);
> +            notmuch_messages_valid (messages);
> +            notmuch_messages_move_to_next (messages)) {
> +           if (do_progress_notify) {
> +               progress_notify (closure, (double) count / total);
> +               do_progress_notify = 0;
> +           }
> +
> +           message = notmuch_messages_get (messages);
> +
> +           _notmuch_message_upgrade_folder (message);
> +           _notmuch_message_sync (message);
> +
> +           notmuch_message_destroy (message);
> +
> +           count++;
> +       }
> +
> +       notmuch_query_destroy (query);
> +    }
> +
>      db->set_metadata ("version", STRINGIFY (NOTMUCH_DATABASE_VERSION));
>      db->flush ();
>  
> diff --git a/lib/message.cc b/lib/message.cc
> index 21abe8e..31cb9f1 100644
> --- a/lib/message.cc
> +++ b/lib/message.cc
> @@ -504,6 +504,56 @@ _notmuch_message_remove_terms (notmuch_message_t *message, const char *prefix)
>      }
>  }
>  
> +/* Return true if p points at "new" or "cur". */
> +static bool is_maildir (const char *p)
> +{
> +    return strcmp (p, "cur") == 0 || strcmp (p, "new") == 0;
> +}
> +
> +/* Add "folder:" term for directory. */
> +static notmuch_status_t
> +_notmuch_message_add_folder_terms (notmuch_message_t *message,
> +				   const char *directory)
> +{
> +    char *folder, *last;
> +
> +    folder = talloc_strdup (NULL, directory);
> +    if (! folder)
> +       return NOTMUCH_STATUS_OUT_OF_MEMORY;

Same formatting problem in this chunk.

> +
> +    /*
> +     * If the message file is in a leaf directory named "new" or
> +     * "cur", presume maildir and index the parent directory. Thus a
> +     * "folder:" prefix search matches messages in the specified
> +     * maildir folder, i.e. in the specified directory and its "new"
> +     * and "cur" subdirectories.
> +     *
> +     * Note that this means the "folder:" prefix can't be used for
> +     * distinguishing between message files in "new" or "cur". The
> +     * "path:" prefix needs to be used for that.
> +     *
> +     * Note the deliberate difference to _filename_is_in_maildir(). We
> +     * don't want to index different things depending on the existence
> +     * or non-existence of all maildir sibling directories "new",
> +     * "cur", and "tmp". Doing so would be surprising, and difficult
> +     * for the user to fix in case all subdirectories were not in
> +     * place during indexing.
> +     */
> +    last = strrchr (folder, '/');
> +    if (last) {
> +       if (is_maildir (last + 1))
> +           *last = '\0';
> +    } else if (is_maildir (folder)) {
> +       *folder = '\0';
> +    }
> +
> +    _notmuch_message_add_term (message, "folder", folder);
> +
> +    talloc_free (folder);
> +
> +    return NOTMUCH_STATUS_SUCCESS;
> +}
> +
>  #define RECURSIVE_SUFFIX "/**"
>  
>  /* Add "path:" terms for directory. */
> @@ -570,9 +620,8 @@ _notmuch_message_add_directory_terms (void *ctx, notmuch_message_t *message)
>  	directory = _notmuch_database_get_directory_path (ctx,
>  							  message->notmuch,
>  							  directory_id);
> -	if (strlen (directory))
> -	    _notmuch_message_gen_terms (message, "folder", directory);
>  
> +	_notmuch_message_add_folder_terms (message, directory);
>  	_notmuch_message_add_path_terms (message, directory);
>      }
>  
> @@ -610,9 +659,7 @@ _notmuch_message_add_filename (notmuch_message_t *message,
>       * notmuch_directory_get_child_files() . */
>      _notmuch_message_add_term (message, "file-direntry", direntry);
>  
> -    /* New terms allow user to search with folder: specification. */
> -    _notmuch_message_gen_terms (message, "folder", directory);
> -
> +    _notmuch_message_add_folder_terms (message, directory);
>      _notmuch_message_add_path_terms (message, directory);
>  
>      talloc_free (local);
> @@ -637,8 +684,6 @@ _notmuch_message_remove_filename (notmuch_message_t *message,
>  				  const char *filename)
>  {
>      void *local = talloc_new (message);
> -    const char *folder_prefix = _find_prefix ("folder");
> -    char *zfolder_prefix = talloc_asprintf(local, "Z%s", folder_prefix);
>      char *direntry;
>      notmuch_private_status_t private_status;
>      notmuch_status_t status;
> @@ -659,10 +704,7 @@ _notmuch_message_remove_filename (notmuch_message_t *message,
>      /* Re-synchronize "folder:" and "path:" terms for this message. */
>  
>      /* Remove all "folder:" terms. */
> -    _notmuch_message_remove_terms (message, folder_prefix);
> -
> -    /* Remove all "folder:" stemmed terms. */
> -    _notmuch_message_remove_terms (message, zfolder_prefix);
> +    _notmuch_message_remove_terms (message, _find_prefix ("folder"));
>  
>      /* Remove all "path:" terms. */
>      _notmuch_message_remove_terms (message, _find_prefix ("path"));
> @@ -675,6 +717,22 @@ _notmuch_message_remove_filename (notmuch_message_t *message,
>      return status;
>  }
>  
> +/* Upgrade the "folder:" prefix from V1 to V2. */
> +#define FOLDER_PREFIX_V1       "XFOLDER"
> +#define ZFOLDER_PREFIX_V1      "Z" FOLDER_PREFIX_V1
> +void
> +_notmuch_message_upgrade_folder (notmuch_message_t *message)
> +{
> +    /* Remove all old "folder:" terms. */
> +    _notmuch_message_remove_terms (message, FOLDER_PREFIX_V1);
> +
> +    /* Remove all old "folder:" stemmed terms. */
> +    _notmuch_message_remove_terms (message, ZFOLDER_PREFIX_V1);
> +
> +    /* Add new boolean "folder:" and "path:" terms. */
> +    _notmuch_message_add_directory_terms (message, message);
> +}
> +
>  char *
>  _notmuch_message_talloc_copy_data (notmuch_message_t *message)
>  {
> diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
> index af185c7..59eb2bc 100644
> --- a/lib/notmuch-private.h
> +++ b/lib/notmuch-private.h
> @@ -263,6 +263,9 @@ _notmuch_message_gen_terms (notmuch_message_t *message,
>  void
>  _notmuch_message_upgrade_filename_storage (notmuch_message_t *message);
>  
> +void
> +_notmuch_message_upgrade_folder (notmuch_message_t *message);
> +
>  notmuch_status_t
>  _notmuch_message_add_filename (notmuch_message_t *message,
>  			       const char *filename);

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Patch v3 08/15] test: add tests for the new boolean folder: and path: prefixes
  2014-03-08 21:19 ` [Patch v3 08/15] test: add tests for the new boolean folder: and path: prefixes David Bremner
@ 2014-03-09  2:55   ` Austin Clements
  0 siblings, 0 replies; 23+ messages in thread
From: Austin Clements @ 2014-03-09  2:55 UTC (permalink / raw)
  To: jani, David Bremner; +Cc: notmuch

Quoth David Bremner on Mar 08 at  5:19 pm:
> From: Jani Nikula <jani@nikula.org>
> 
> Additional tests for the boolean folder: and path: prefixes using the
> new corpus.
> ---
>  test/T101-search-by-folder-and-path.sh | 83 ++++++++++++++++++++++++++++++++++

Would it be possible to fold these in to (or simply improve)
T100-search-by-folder.sh?  Maybe just concatenate the two and blow
away the database in the middle.  There's already a lot of ambiguity
between the test scripts (do I put Emacs show tests in T310-emacs
where most of them are, or in T450-emacs-show?) and it would be
unfortunate to introduce more.

>  1 file changed, 83 insertions(+)
>  create mode 100755 test/T101-search-by-folder-and-path.sh
> 
> diff --git a/test/T101-search-by-folder-and-path.sh b/test/T101-search-by-folder-and-path.sh
> new file mode 100755
> index 0000000..9f809e4
> --- /dev/null
> +++ b/test/T101-search-by-folder-and-path.sh
> @@ -0,0 +1,83 @@
> +#!/usr/bin/env bash
> +test_description='"notmuch search" by folder: and path:'
> +. ./test-lib.sh
> +
> +add_email_corpus folders
> +
> +test_begin_subtest "folder: search"
> +output=$(notmuch search --output=files folder:foo | sed -e "s,$MAIL_DIR,MAIL_DIR," | sort)

No need to do this in this series, but it would be nice if this
canonicalization were put into a function in test-lib, since we do it
in a lot of places.

> +# bar/baz/05:2, and new/03:2, are duplicates of foo/05:2, and
> +# foo/new/03:2, respectively
> +test_expect_equal "$output" "MAIL_DIR/bar/baz/05:2,
> +MAIL_DIR/foo/05:2,
> +MAIL_DIR/foo/06:2,
> +MAIL_DIR/foo/cur/07:2,
> +MAIL_DIR/foo/cur/08:2,
> +MAIL_DIR/foo/new/03:2,
> +MAIL_DIR/foo/new/09:2,
> +MAIL_DIR/foo/new/10:2,
> +MAIL_DIR/new/03:2,"
> +
> +test_begin_subtest "top level folder: search"
> +output=$(notmuch search --output=files folder:'""' | sed -e "s,$MAIL_DIR,MAIL_DIR," | sort)
> +# foo/new/03:2, is a duplicate of new/03:2,
> +test_expect_equal "$output" "MAIL_DIR/01:2,
> +MAIL_DIR/02:2,
> +MAIL_DIR/cur/29:2,
> +MAIL_DIR/cur/30:2,
> +MAIL_DIR/cur/31:2,
> +MAIL_DIR/cur/32:2,
> +MAIL_DIR/cur/33:2,
> +MAIL_DIR/cur/34:2,
> +MAIL_DIR/cur/35:2,
> +MAIL_DIR/cur/36:2,
> +MAIL_DIR/cur/37:2,
> +MAIL_DIR/cur/38:2,
> +MAIL_DIR/cur/39:2,
> +MAIL_DIR/cur/40:2,
> +MAIL_DIR/cur/41:2,
> +MAIL_DIR/cur/42:2,
> +MAIL_DIR/cur/43:2,
> +MAIL_DIR/cur/44:2,
> +MAIL_DIR/cur/45:2,
> +MAIL_DIR/cur/46:2,
> +MAIL_DIR/cur/47:2,
> +MAIL_DIR/cur/48:2,
> +MAIL_DIR/cur/49:2,
> +MAIL_DIR/cur/50:2,
> +MAIL_DIR/cur/52:2,
> +MAIL_DIR/cur/53:2,
> +MAIL_DIR/foo/new/03:2,
> +MAIL_DIR/new/03:2,
> +MAIL_DIR/new/04:2,"
> +
> +test_begin_subtest "path: search"
> +output=$(notmuch search --output=files path:"bar" | sed -e "s,$MAIL_DIR,MAIL_DIR," | sort)
> +# foo/05:2, is a duplicate of bar/baz/05:2,
> +test_expect_equal "$output" "MAIL_DIR/bar/17:2,
> +MAIL_DIR/bar/18:2,"
> +
> +test_begin_subtest "top level path: search"
> +output=$(notmuch search --output=files path:'""' | sed -e "s,$MAIL_DIR,MAIL_DIR," | sort)
> +test_expect_equal "$output" "MAIL_DIR/01:2,
> +MAIL_DIR/02:2,"
> +
> +test_begin_subtest "recursive path: search"
> +output=$(notmuch search --output=files path:"bar/**" | sed -e "s,$MAIL_DIR,MAIL_DIR," | sort)
> +# foo/05:2, is a duplicate of bar/baz/05:2,
> +test_expect_equal "$output" "MAIL_DIR/bar/17:2,
> +MAIL_DIR/bar/18:2,
> +MAIL_DIR/bar/baz/05:2,
> +MAIL_DIR/bar/baz/23:2,
> +MAIL_DIR/bar/baz/24:2,
> +MAIL_DIR/bar/baz/cur/25:2,
> +MAIL_DIR/bar/baz/cur/26:2,
> +MAIL_DIR/bar/baz/new/27:2,
> +MAIL_DIR/bar/baz/new/28:2,
> +MAIL_DIR/bar/cur/19:2,
> +MAIL_DIR/bar/cur/20:2,
> +MAIL_DIR/bar/new/21:2,
> +MAIL_DIR/bar/new/22:2,
> +MAIL_DIR/foo/05:2,"
> +
> +test_done

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Patch v3 10/15] man: update man pages for folder: and path: search terms
  2014-03-08 21:19 ` [Patch v3 10/15] man: update man pages for folder: and path: search terms David Bremner
@ 2014-03-09  3:52   ` Austin Clements
  0 siblings, 0 replies; 23+ messages in thread
From: Austin Clements @ 2014-03-09  3:52 UTC (permalink / raw)
  To: jani, David Bremner; +Cc: notmuch

Quoth David Bremner on Mar 08 at  5:19 pm:
> From: Jani Nikula <jani@nikula.org>
> 
> ---
>  man/man7/notmuch-search-terms.7 | 28 ++++++++++++++++++++++------
>  1 file changed, 22 insertions(+), 6 deletions(-)
> 
> diff --git a/man/man7/notmuch-search-terms.7 b/man/man7/notmuch-search-terms.7
> index a768b63..907403d 100644
> --- a/man/man7/notmuch-search-terms.7
> +++ b/man/man7/notmuch-search-terms.7
> @@ -54,6 +54,8 @@ terms to match against specific portions of an email, (where
>  
>  	folder:<directory-path>

Maybe change to folder:<maildir-folder>?

>  
> +	path:<directory-path> or path:<directory-path>/**
> +
>  	date:<since>..<until>
>  
>  The
> @@ -101,12 +103,26 @@ thread ID values can be seen in the first column of output from
>  
>  The
>  .B folder:
> -prefix can be used to search for email message files that are
> -contained within particular directories within the mail store. If the
> -same email message has multiple message files associated with it, it's
> -sufficient for a match that at least one of the files is contained
> -within a matching directory. Only the directory components below the
> -top-level mail database path are available to be searched.
> +and
> +.B path:
> +prefixes can be used to search for email message files that are
> +contained within particular directories within the mail store. The
> +directories are specified relative from the top-level mail database
> +path, and thus only the directory components below that are available
> +to be searched.
> +
> +The
> +.B folder:
> +prefix matches messages in the specified maildir folder, i.e. in the
> +specified directory and its "new" and "cur" subdirectories. The
> +.B path:
> +prefix matches messages in the specified directory only, unless the
> +"/**" suffix is used to denote the specified directory and all its
> +subdirectories recursively. For both, the empty string "" matches the
> +top level maildir folder or directory. If the same email message has
> +multiple message files associated with it, it's sufficient for a match
> +that at least one of the files is contained within a matching
> +directory.

This isn't bad, but I think it could be more from a user's perspective
without losing the operational details.  What about something like the
following?  This follows much of what the above says, but is
structured somewhat differently and works in some examples.

The
.B path:
prefix searches for email messages that are in particular directories
within the mail store.  The directory must be specified relative to
the top-level maildir (and without the leading slash).  By default,
.B path:
matches messages in the specified directory only.  The "/**" suffix
can be used to match messages in the specified directory and all its
subdirectories recursively.
.B path:""
matches messages in the root of the mail store and, likewise,
.B path:**
matches all messages.

The
.B folder:
prefix searches for email messages by maildir or MH folder.  For
MH-style folders, this is equivalent to \fBpath:\fR.  For maildir,
this includes messages in the "new" and "cur" subdirectories.  The
exact syntax for maildir folders depends on your mail configuration.
For maildir++, \fBfolder:""\fR matches the inbox folder (which is the
root in maildir++), other folder names always start with ".", and
nested folders are separated by "."s, such as
\fBfolder:.classes.topology\fR.  For "file system" maildir, the inbox
is typically \fBfolder:INBOX\fR and nested folders are separated by
slashes, such as \fBfolder:classes/topology\fR.

Both
.B path:
and
.B folder:
will find a message if \fIany\fR copy of that message is in the
specific directory/folder.

>  
>  The
>  .B date:

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Patch v3 04/15] lib: make folder: prefix literal
  2014-03-08 23:51   ` Austin Clements
@ 2014-03-09  8:45     ` Jani Nikula
  2014-03-09 16:15       ` Austin Clements
  0 siblings, 1 reply; 23+ messages in thread
From: Jani Nikula @ 2014-03-09  8:45 UTC (permalink / raw)
  To: Austin Clements, David Bremner; +Cc: notmuch

On Sun, 09 Mar 2014, Austin Clements <amdragon@MIT.EDU> wrote:
> Quoth David Bremner on Mar 08 at  5:19 pm:
>> From: Jani Nikula <jani@nikula.org>
>> 
>> In xapian terms, convert folder: prefix from probabilistic to boolean
>> prefix, matching the paths, relative form the maildir root, of the
>
> s/form/from/
>
>> message files, ignoring the maildir new and cur leaf directories.
>> 
>> folder:foo matches all message files in foo, foo/new, and foo/cur.
>> 
>> folder:foo/new does *not* match message files in foo/new.
>> 
>> folder:"" matches all message files in the top level maildir and its
>> new and cur subdirectories.
>> 
>> This change constitutes a database change: bump the database version
>> and add database upgrade support for folder: terms. The upgrade also
>> adds path: terms.
>> ---
>>  lib/database.cc       | 38 ++++++++++++++++++++++--
>>  lib/message.cc        | 80 ++++++++++++++++++++++++++++++++++++++++++++-------
>>  lib/notmuch-private.h |  3 ++
>>  3 files changed, 108 insertions(+), 13 deletions(-)
>> 
>> diff --git a/lib/database.cc b/lib/database.cc
>> index 93cc7f5..186e3a7 100644
>> --- a/lib/database.cc
>> +++ b/lib/database.cc
>> @@ -42,7 +42,7 @@ typedef struct {
>>      const char *prefix;
>>  } prefix_t;
>>  
>> -#define NOTMUCH_DATABASE_VERSION 1
>> +#define NOTMUCH_DATABASE_VERSION 2
>>  
>>  #define STRINGIFY(s) _SUB_STRINGIFY(s)
>>  #define _SUB_STRINGIFY(s) #s
>> @@ -210,6 +210,7 @@ static prefix_t BOOLEAN_PREFIX_EXTERNAL[] = {
>>      { "is",			"K" },
>>      { "id",			"Q" },
>>      { "path",			"P" },
>> +    { "folder",			"XFOLDER:" },
>
> It took me a while to figure out that the ":" here means that Xapian
> will unconditionally use a ":" after the prefix, instead of only using
> a ":" when the first letter following the prefix is upper-case ASCII.
> Maybe I was only confused by this because I simultaneously knew too
> much and not enough about Xapian, but it might be worth a comment.
> Something like,
>
>     /* Without the ":", since this is a multi-letter prefix, Xapian
>      * will add a colon itself if the first letter of the path is
>      * upper-case ASCII.  Including the ":" forces there to always be
>      * a colon, which keeps our own logic simpler. */

Do you mean "... first letter of the _prefix_ is ..."?

Jani.

>
>>  };
>>  
>>  static prefix_t PROBABILISTIC_PREFIX[]= {
>> @@ -217,7 +218,6 @@ static prefix_t PROBABILISTIC_PREFIX[]= {
>>      { "to",			"XTO" },
>>      { "attachment",		"XATTACHMENT" },
>>      { "subject",		"XSUBJECT"},
>> -    { "folder",			"XFOLDER"}
>>  };
>>  
>>  const char *
>> @@ -1168,6 +1168,40 @@ notmuch_database_upgrade (notmuch_database_t *notmuch,
>>  	}
>>      }
>>  
>> +    /*
>> +     * Prior to version 2, the "folder:" prefix was probabilistic and
>> +     * stemmed. Change it to the current boolean prefix. Add "path:"
>> +     * prefixes while at it.
>> +     */
>> +    if (version < 2) {
>> +       notmuch_query_t *query = notmuch_query_create (notmuch, "");
>
> Three space indentation and no tabs?  (It looks like this was in
> Jani's v2, also.  I'm guessing at some point there was a copy-paste
> from a diff with tabs converted to spaces?)
>
>> +       notmuch_messages_t *messages;
>> +       notmuch_message_t *message;
>> +
>> +       count = 0;
>> +       total = notmuch_query_count_messages (query);
>> +
>> +       for (messages = notmuch_query_search_messages (query);
>> +            notmuch_messages_valid (messages);
>> +            notmuch_messages_move_to_next (messages)) {
>> +           if (do_progress_notify) {
>> +               progress_notify (closure, (double) count / total);
>> +               do_progress_notify = 0;
>> +           }
>> +
>> +           message = notmuch_messages_get (messages);
>> +
>> +           _notmuch_message_upgrade_folder (message);
>> +           _notmuch_message_sync (message);
>> +
>> +           notmuch_message_destroy (message);
>> +
>> +           count++;
>> +       }
>> +
>> +       notmuch_query_destroy (query);
>> +    }
>> +
>>      db->set_metadata ("version", STRINGIFY (NOTMUCH_DATABASE_VERSION));
>>      db->flush ();
>>  
>> diff --git a/lib/message.cc b/lib/message.cc
>> index 21abe8e..31cb9f1 100644
>> --- a/lib/message.cc
>> +++ b/lib/message.cc
>> @@ -504,6 +504,56 @@ _notmuch_message_remove_terms (notmuch_message_t *message, const char *prefix)
>>      }
>>  }
>>  
>> +/* Return true if p points at "new" or "cur". */
>> +static bool is_maildir (const char *p)
>> +{
>> +    return strcmp (p, "cur") == 0 || strcmp (p, "new") == 0;
>> +}
>> +
>> +/* Add "folder:" term for directory. */
>> +static notmuch_status_t
>> +_notmuch_message_add_folder_terms (notmuch_message_t *message,
>> +				   const char *directory)
>> +{
>> +    char *folder, *last;
>> +
>> +    folder = talloc_strdup (NULL, directory);
>> +    if (! folder)
>> +       return NOTMUCH_STATUS_OUT_OF_MEMORY;
>
> Same formatting problem in this chunk.
>
>> +
>> +    /*
>> +     * If the message file is in a leaf directory named "new" or
>> +     * "cur", presume maildir and index the parent directory. Thus a
>> +     * "folder:" prefix search matches messages in the specified
>> +     * maildir folder, i.e. in the specified directory and its "new"
>> +     * and "cur" subdirectories.
>> +     *
>> +     * Note that this means the "folder:" prefix can't be used for
>> +     * distinguishing between message files in "new" or "cur". The
>> +     * "path:" prefix needs to be used for that.
>> +     *
>> +     * Note the deliberate difference to _filename_is_in_maildir(). We
>> +     * don't want to index different things depending on the existence
>> +     * or non-existence of all maildir sibling directories "new",
>> +     * "cur", and "tmp". Doing so would be surprising, and difficult
>> +     * for the user to fix in case all subdirectories were not in
>> +     * place during indexing.
>> +     */
>> +    last = strrchr (folder, '/');
>> +    if (last) {
>> +       if (is_maildir (last + 1))
>> +           *last = '\0';
>> +    } else if (is_maildir (folder)) {
>> +       *folder = '\0';
>> +    }
>> +
>> +    _notmuch_message_add_term (message, "folder", folder);
>> +
>> +    talloc_free (folder);
>> +
>> +    return NOTMUCH_STATUS_SUCCESS;
>> +}
>> +
>>  #define RECURSIVE_SUFFIX "/**"
>>  
>>  /* Add "path:" terms for directory. */
>> @@ -570,9 +620,8 @@ _notmuch_message_add_directory_terms (void *ctx, notmuch_message_t *message)
>>  	directory = _notmuch_database_get_directory_path (ctx,
>>  							  message->notmuch,
>>  							  directory_id);
>> -	if (strlen (directory))
>> -	    _notmuch_message_gen_terms (message, "folder", directory);
>>  
>> +	_notmuch_message_add_folder_terms (message, directory);
>>  	_notmuch_message_add_path_terms (message, directory);
>>      }
>>  
>> @@ -610,9 +659,7 @@ _notmuch_message_add_filename (notmuch_message_t *message,
>>       * notmuch_directory_get_child_files() . */
>>      _notmuch_message_add_term (message, "file-direntry", direntry);
>>  
>> -    /* New terms allow user to search with folder: specification. */
>> -    _notmuch_message_gen_terms (message, "folder", directory);
>> -
>> +    _notmuch_message_add_folder_terms (message, directory);
>>      _notmuch_message_add_path_terms (message, directory);
>>  
>>      talloc_free (local);
>> @@ -637,8 +684,6 @@ _notmuch_message_remove_filename (notmuch_message_t *message,
>>  				  const char *filename)
>>  {
>>      void *local = talloc_new (message);
>> -    const char *folder_prefix = _find_prefix ("folder");
>> -    char *zfolder_prefix = talloc_asprintf(local, "Z%s", folder_prefix);
>>      char *direntry;
>>      notmuch_private_status_t private_status;
>>      notmuch_status_t status;
>> @@ -659,10 +704,7 @@ _notmuch_message_remove_filename (notmuch_message_t *message,
>>      /* Re-synchronize "folder:" and "path:" terms for this message. */
>>  
>>      /* Remove all "folder:" terms. */
>> -    _notmuch_message_remove_terms (message, folder_prefix);
>> -
>> -    /* Remove all "folder:" stemmed terms. */
>> -    _notmuch_message_remove_terms (message, zfolder_prefix);
>> +    _notmuch_message_remove_terms (message, _find_prefix ("folder"));
>>  
>>      /* Remove all "path:" terms. */
>>      _notmuch_message_remove_terms (message, _find_prefix ("path"));
>> @@ -675,6 +717,22 @@ _notmuch_message_remove_filename (notmuch_message_t *message,
>>      return status;
>>  }
>>  
>> +/* Upgrade the "folder:" prefix from V1 to V2. */
>> +#define FOLDER_PREFIX_V1       "XFOLDER"
>> +#define ZFOLDER_PREFIX_V1      "Z" FOLDER_PREFIX_V1
>> +void
>> +_notmuch_message_upgrade_folder (notmuch_message_t *message)
>> +{
>> +    /* Remove all old "folder:" terms. */
>> +    _notmuch_message_remove_terms (message, FOLDER_PREFIX_V1);
>> +
>> +    /* Remove all old "folder:" stemmed terms. */
>> +    _notmuch_message_remove_terms (message, ZFOLDER_PREFIX_V1);
>> +
>> +    /* Add new boolean "folder:" and "path:" terms. */
>> +    _notmuch_message_add_directory_terms (message, message);
>> +}
>> +
>>  char *
>>  _notmuch_message_talloc_copy_data (notmuch_message_t *message)
>>  {
>> diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
>> index af185c7..59eb2bc 100644
>> --- a/lib/notmuch-private.h
>> +++ b/lib/notmuch-private.h
>> @@ -263,6 +263,9 @@ _notmuch_message_gen_terms (notmuch_message_t *message,
>>  void
>>  _notmuch_message_upgrade_filename_storage (notmuch_message_t *message);
>>  
>> +void
>> +_notmuch_message_upgrade_folder (notmuch_message_t *message);
>> +
>>  notmuch_status_t
>>  _notmuch_message_add_filename (notmuch_message_t *message,
>>  			       const char *filename);

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Patch v3 04/15] lib: make folder: prefix literal
  2014-03-09  8:45     ` Jani Nikula
@ 2014-03-09 16:15       ` Austin Clements
  0 siblings, 0 replies; 23+ messages in thread
From: Austin Clements @ 2014-03-09 16:15 UTC (permalink / raw)
  To: Jani Nikula; +Cc: notmuch

Quoth Jani Nikula on Mar 09 at 10:45 am:
> On Sun, 09 Mar 2014, Austin Clements <amdragon@MIT.EDU> wrote:
> > Quoth David Bremner on Mar 08 at  5:19 pm:
> >> From: Jani Nikula <jani@nikula.org>
> >> 
> >> In xapian terms, convert folder: prefix from probabilistic to boolean
> >> prefix, matching the paths, relative form the maildir root, of the
> >
> > s/form/from/
> >
> >> message files, ignoring the maildir new and cur leaf directories.
> >> 
> >> folder:foo matches all message files in foo, foo/new, and foo/cur.
> >> 
> >> folder:foo/new does *not* match message files in foo/new.
> >> 
> >> folder:"" matches all message files in the top level maildir and its
> >> new and cur subdirectories.
> >> 
> >> This change constitutes a database change: bump the database version
> >> and add database upgrade support for folder: terms. The upgrade also
> >> adds path: terms.
> >> ---
> >>  lib/database.cc       | 38 ++++++++++++++++++++++--
> >>  lib/message.cc        | 80 ++++++++++++++++++++++++++++++++++++++++++++-------
> >>  lib/notmuch-private.h |  3 ++
> >>  3 files changed, 108 insertions(+), 13 deletions(-)
> >> 
> >> diff --git a/lib/database.cc b/lib/database.cc
> >> index 93cc7f5..186e3a7 100644
> >> --- a/lib/database.cc
> >> +++ b/lib/database.cc
> >> @@ -42,7 +42,7 @@ typedef struct {
> >>      const char *prefix;
> >>  } prefix_t;
> >>  
> >> -#define NOTMUCH_DATABASE_VERSION 1
> >> +#define NOTMUCH_DATABASE_VERSION 2
> >>  
> >>  #define STRINGIFY(s) _SUB_STRINGIFY(s)
> >>  #define _SUB_STRINGIFY(s) #s
> >> @@ -210,6 +210,7 @@ static prefix_t BOOLEAN_PREFIX_EXTERNAL[] = {
> >>      { "is",			"K" },
> >>      { "id",			"Q" },
> >>      { "path",			"P" },
> >> +    { "folder",			"XFOLDER:" },
> >
> > It took me a while to figure out that the ":" here means that Xapian
> > will unconditionally use a ":" after the prefix, instead of only using
> > a ":" when the first letter following the prefix is upper-case ASCII.
> > Maybe I was only confused by this because I simultaneously knew too
> > much and not enough about Xapian, but it might be worth a comment.
> > Something like,
> >
> >     /* Without the ":", since this is a multi-letter prefix, Xapian
> >      * will add a colon itself if the first letter of the path is
> >      * upper-case ASCII.  Including the ":" forces there to always be
> >      * a colon, which keeps our own logic simpler. */
> 
> Do you mean "... first letter of the _prefix_ is ..."?

I did mean the path.  If the folder prefer were just "XFOLDER", then
Xapian::QueryParser would translate the query folder:foo into the term
XFOLDERfoo like you'd expect, but it would translate the query
folder:Foo into the term XFOLDER:Foo.  We'd have to account for this
when constructing terms and (arguably) when removing terms.  But
"XFOLDER:" suppresses the colon-adding logic, so these two queries
simply map to XFOLDER:foo and XFOLDER:Foo.

> Jani.
> 
> >
> >>  };
> >>  
> >>  static prefix_t PROBABILISTIC_PREFIX[]= {
> >> @@ -217,7 +218,6 @@ static prefix_t PROBABILISTIC_PREFIX[]= {
> >>      { "to",			"XTO" },
> >>      { "attachment",		"XATTACHMENT" },
> >>      { "subject",		"XSUBJECT"},
> >> -    { "folder",			"XFOLDER"}
> >>  };
> >>  
> >>  const char *
> >> @@ -1168,6 +1168,40 @@ notmuch_database_upgrade (notmuch_database_t *notmuch,
> >>  	}
> >>      }
> >>  
> >> +    /*
> >> +     * Prior to version 2, the "folder:" prefix was probabilistic and
> >> +     * stemmed. Change it to the current boolean prefix. Add "path:"
> >> +     * prefixes while at it.
> >> +     */
> >> +    if (version < 2) {
> >> +       notmuch_query_t *query = notmuch_query_create (notmuch, "");
> >
> > Three space indentation and no tabs?  (It looks like this was in
> > Jani's v2, also.  I'm guessing at some point there was a copy-paste
> > from a diff with tabs converted to spaces?)
> >
> >> +       notmuch_messages_t *messages;
> >> +       notmuch_message_t *message;
> >> +
> >> +       count = 0;
> >> +       total = notmuch_query_count_messages (query);
> >> +
> >> +       for (messages = notmuch_query_search_messages (query);
> >> +            notmuch_messages_valid (messages);
> >> +            notmuch_messages_move_to_next (messages)) {
> >> +           if (do_progress_notify) {
> >> +               progress_notify (closure, (double) count / total);
> >> +               do_progress_notify = 0;
> >> +           }
> >> +
> >> +           message = notmuch_messages_get (messages);
> >> +
> >> +           _notmuch_message_upgrade_folder (message);
> >> +           _notmuch_message_sync (message);
> >> +
> >> +           notmuch_message_destroy (message);
> >> +
> >> +           count++;
> >> +       }
> >> +
> >> +       notmuch_query_destroy (query);
> >> +    }
> >> +
> >>      db->set_metadata ("version", STRINGIFY (NOTMUCH_DATABASE_VERSION));
> >>      db->flush ();
> >>  
> >> diff --git a/lib/message.cc b/lib/message.cc
> >> index 21abe8e..31cb9f1 100644
> >> --- a/lib/message.cc
> >> +++ b/lib/message.cc
> >> @@ -504,6 +504,56 @@ _notmuch_message_remove_terms (notmuch_message_t *message, const char *prefix)
> >>      }
> >>  }
> >>  
> >> +/* Return true if p points at "new" or "cur". */
> >> +static bool is_maildir (const char *p)
> >> +{
> >> +    return strcmp (p, "cur") == 0 || strcmp (p, "new") == 0;
> >> +}
> >> +
> >> +/* Add "folder:" term for directory. */
> >> +static notmuch_status_t
> >> +_notmuch_message_add_folder_terms (notmuch_message_t *message,
> >> +				   const char *directory)
> >> +{
> >> +    char *folder, *last;
> >> +
> >> +    folder = talloc_strdup (NULL, directory);
> >> +    if (! folder)
> >> +       return NOTMUCH_STATUS_OUT_OF_MEMORY;
> >
> > Same formatting problem in this chunk.
> >
> >> +
> >> +    /*
> >> +     * If the message file is in a leaf directory named "new" or
> >> +     * "cur", presume maildir and index the parent directory. Thus a
> >> +     * "folder:" prefix search matches messages in the specified
> >> +     * maildir folder, i.e. in the specified directory and its "new"
> >> +     * and "cur" subdirectories.
> >> +     *
> >> +     * Note that this means the "folder:" prefix can't be used for
> >> +     * distinguishing between message files in "new" or "cur". The
> >> +     * "path:" prefix needs to be used for that.
> >> +     *
> >> +     * Note the deliberate difference to _filename_is_in_maildir(). We
> >> +     * don't want to index different things depending on the existence
> >> +     * or non-existence of all maildir sibling directories "new",
> >> +     * "cur", and "tmp". Doing so would be surprising, and difficult
> >> +     * for the user to fix in case all subdirectories were not in
> >> +     * place during indexing.
> >> +     */
> >> +    last = strrchr (folder, '/');
> >> +    if (last) {
> >> +       if (is_maildir (last + 1))
> >> +           *last = '\0';
> >> +    } else if (is_maildir (folder)) {
> >> +       *folder = '\0';
> >> +    }
> >> +
> >> +    _notmuch_message_add_term (message, "folder", folder);
> >> +
> >> +    talloc_free (folder);
> >> +
> >> +    return NOTMUCH_STATUS_SUCCESS;
> >> +}
> >> +
> >>  #define RECURSIVE_SUFFIX "/**"
> >>  
> >>  /* Add "path:" terms for directory. */
> >> @@ -570,9 +620,8 @@ _notmuch_message_add_directory_terms (void *ctx, notmuch_message_t *message)
> >>  	directory = _notmuch_database_get_directory_path (ctx,
> >>  							  message->notmuch,
> >>  							  directory_id);
> >> -	if (strlen (directory))
> >> -	    _notmuch_message_gen_terms (message, "folder", directory);
> >>  
> >> +	_notmuch_message_add_folder_terms (message, directory);
> >>  	_notmuch_message_add_path_terms (message, directory);
> >>      }
> >>  
> >> @@ -610,9 +659,7 @@ _notmuch_message_add_filename (notmuch_message_t *message,
> >>       * notmuch_directory_get_child_files() . */
> >>      _notmuch_message_add_term (message, "file-direntry", direntry);
> >>  
> >> -    /* New terms allow user to search with folder: specification. */
> >> -    _notmuch_message_gen_terms (message, "folder", directory);
> >> -
> >> +    _notmuch_message_add_folder_terms (message, directory);
> >>      _notmuch_message_add_path_terms (message, directory);
> >>  
> >>      talloc_free (local);
> >> @@ -637,8 +684,6 @@ _notmuch_message_remove_filename (notmuch_message_t *message,
> >>  				  const char *filename)
> >>  {
> >>      void *local = talloc_new (message);
> >> -    const char *folder_prefix = _find_prefix ("folder");
> >> -    char *zfolder_prefix = talloc_asprintf(local, "Z%s", folder_prefix);
> >>      char *direntry;
> >>      notmuch_private_status_t private_status;
> >>      notmuch_status_t status;
> >> @@ -659,10 +704,7 @@ _notmuch_message_remove_filename (notmuch_message_t *message,
> >>      /* Re-synchronize "folder:" and "path:" terms for this message. */
> >>  
> >>      /* Remove all "folder:" terms. */
> >> -    _notmuch_message_remove_terms (message, folder_prefix);
> >> -
> >> -    /* Remove all "folder:" stemmed terms. */
> >> -    _notmuch_message_remove_terms (message, zfolder_prefix);
> >> +    _notmuch_message_remove_terms (message, _find_prefix ("folder"));
> >>  
> >>      /* Remove all "path:" terms. */
> >>      _notmuch_message_remove_terms (message, _find_prefix ("path"));
> >> @@ -675,6 +717,22 @@ _notmuch_message_remove_filename (notmuch_message_t *message,
> >>      return status;
> >>  }
> >>  
> >> +/* Upgrade the "folder:" prefix from V1 to V2. */
> >> +#define FOLDER_PREFIX_V1       "XFOLDER"
> >> +#define ZFOLDER_PREFIX_V1      "Z" FOLDER_PREFIX_V1
> >> +void
> >> +_notmuch_message_upgrade_folder (notmuch_message_t *message)
> >> +{
> >> +    /* Remove all old "folder:" terms. */
> >> +    _notmuch_message_remove_terms (message, FOLDER_PREFIX_V1);
> >> +
> >> +    /* Remove all old "folder:" stemmed terms. */
> >> +    _notmuch_message_remove_terms (message, ZFOLDER_PREFIX_V1);
> >> +
> >> +    /* Add new boolean "folder:" and "path:" terms. */
> >> +    _notmuch_message_add_directory_terms (message, message);
> >> +}
> >> +
> >>  char *
> >>  _notmuch_message_talloc_copy_data (notmuch_message_t *message)
> >>  {
> >> diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
> >> index af185c7..59eb2bc 100644
> >> --- a/lib/notmuch-private.h
> >> +++ b/lib/notmuch-private.h
> >> @@ -263,6 +263,9 @@ _notmuch_message_gen_terms (notmuch_message_t *message,
> >>  void
> >>  _notmuch_message_upgrade_filename_storage (notmuch_message_t *message);
> >>  
> >> +void
> >> +_notmuch_message_upgrade_folder (notmuch_message_t *message);
> >> +
> >>  notmuch_status_t
> >>  _notmuch_message_add_filename (notmuch_message_t *message,
> >>  			       const char *filename);

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: v3 of boolean folder: patches
  2014-03-08 21:40 ` v3 of boolean folder: patches David Bremner
@ 2014-03-10 18:10   ` W. Trevor King
  2014-03-10 18:24     ` Jani Nikula
  0 siblings, 1 reply; 23+ messages in thread
From: W. Trevor King @ 2014-03-10 18:10 UTC (permalink / raw)
  To: David Bremner; +Cc: notmuch

[-- Attachment #1: Type: text/plain, Size: 1174 bytes --]

On Sat, Mar 08, 2014 at 05:40:45PM -0400, David Bremner wrote:
> David Bremner <david@tethera.net> writes:
> 
> >
> > I decided that the benefits of being to really apply and test the
> > patch series outweighed the fact that one of patchs is about
> > 150K. Apologies to those of you on GSM modems and the like.
> >
> 
> Oops. I didn't realize the "reorganize the corpus" patch was so big. So
> I guess if you want to test the series, you'll have to look at
> 
>     git://pivot.cs.unb.ca/notmuch.git
>     branch: fetch-databases

Since patches 06/15 and 07/15 never made it to the list [1] (or gmane
[2]), I think we need to untag 06/15 in nmbug.  It was tagged in
b3eb5c4 (2014-03-09), but without the messages in my local archive, I
get:

  $ nmbug status
  U       1394313585-28422-7-git-send-email-david@tethera.net     obsolete

Cheers,
Trevor

[1]: http://notmuchmail.org/pipermail/notmuch/2014/thread.html
[2]: http://news.gmane.org/group/gmane.mail.notmuch.general/thread=17412

-- 
This email may be signed or encrypted with GnuPG (http://www.gnupg.org).
For more information, see http://en.wikipedia.org/wiki/Pretty_Good_Privacy

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: v3 of boolean folder: patches
  2014-03-10 18:10   ` W. Trevor King
@ 2014-03-10 18:24     ` Jani Nikula
  2014-03-10 18:31       ` W. Trevor King
  0 siblings, 1 reply; 23+ messages in thread
From: Jani Nikula @ 2014-03-10 18:24 UTC (permalink / raw)
  To: W. Trevor King, David Bremner; +Cc: notmuch

On Mon, 10 Mar 2014, "W. Trevor King" <wking@tremily.us> wrote:
> Since patches 06/15 and 07/15 never made it to the list [1] (or gmane
> [2]), I think we need to untag 06/15 in nmbug.  It was tagged in
> b3eb5c4 (2014-03-09), but without the messages in my local archive, I
> get:

I untagged it earlier today. Do you still see the problem?

BR,
Jani.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: v3 of boolean folder: patches
  2014-03-10 18:24     ` Jani Nikula
@ 2014-03-10 18:31       ` W. Trevor King
  0 siblings, 0 replies; 23+ messages in thread
From: W. Trevor King @ 2014-03-10 18:31 UTC (permalink / raw)
  To: Jani Nikula; +Cc: notmuch

[-- Attachment #1: Type: text/plain, Size: 668 bytes --]

On Mon, Mar 10, 2014 at 08:24:50PM +0200, Jani Nikula wrote:
> On Mon, 10 Mar 2014, "W. Trevor King" <wking@tremily.us> wrote:
> > Since patches 06/15 and 07/15 never made it to the list [1] (or gmane
> > [2]), I think we need to untag 06/15 in nmbug.  It was tagged in
> > b3eb5c4 (2014-03-09), but without the messages in my local archive, I
> > get:
> 
> I untagged it earlier today. Do you still see the problem?

Nope, all better :).  Sorry I didn't re-pull before posting :p.

Thanks,
Trevor

-- 
This email may be signed or encrypted with GnuPG (http://www.gnupg.org).
For more information, see http://en.wikipedia.org/wiki/Pretty_Good_Privacy

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2014-03-10 18:31 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-03-08 21:19 v3 of boolean folder: patches David Bremner
2014-03-08 21:19 ` [Patch v3 01/15] lib: refactor folder term update after filename removal David Bremner
2014-03-08 21:19 ` [Patch v3 02/15] lib: add support for path: prefix searches David Bremner
2014-03-08 21:19 ` [Patch v3 03/15] test: make insert test use the path: prefix David Bremner
2014-03-08 21:19 ` [Patch v3 04/15] lib: make folder: prefix literal David Bremner
2014-03-08 23:51   ` Austin Clements
2014-03-09  8:45     ` Jani Nikula
2014-03-09 16:15       ` Austin Clements
2014-03-08 21:19 ` [Patch v3 05/15] test: fix test for literal folder: search David Bremner
2014-03-08 21:19 ` [Patch v3 08/15] test: add tests for the new boolean folder: and path: prefixes David Bremner
2014-03-09  2:55   ` Austin Clements
2014-03-08 21:19 ` [Patch v3 09/15] test: add database upgrade test from format version 1 to 2 David Bremner
2014-03-08 21:19 ` [Patch v3 10/15] man: update man pages for folder: and path: search terms David Bremner
2014-03-09  3:52   ` Austin Clements
2014-03-08 21:19 ` [Patch v3 11/15] man: try to clarify the folder: and path: vs. --output=files confusion David Bremner
2014-03-08 21:19 ` [Patch v3 12/15] test: don't use $(dir) in recipes David Bremner
2014-03-08 21:19 ` [Patch v3 13/15] devel: add script to generate test databases David Bremner
2014-03-08 21:19 ` [Patch v3 14/15] test: commit folders-v1.tar.xz checksum, ignore actual databases David Bremner
2014-03-08 21:19 ` [Patch v3 15/15] test: add machinery to download and verify databases David Bremner
2014-03-08 21:40 ` v3 of boolean folder: patches David Bremner
2014-03-10 18:10   ` W. Trevor King
2014-03-10 18:24     ` Jani Nikula
2014-03-10 18:31       ` W. Trevor King

Code repositories for project(s) associated with this public inbox

	https://yhetil.org/notmuch.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).