unofficial mirror of notmuch@notmuchmail.org
 help / color / mirror / code / Atom feed
* [Patch v8 01/18] parse_tag_line: use enum for return value.
@ 2012-12-21 13:08 david
  2012-12-21 13:08 ` [Patch v8 02/18] tag-util: factor out rules for illegal tags, use in parse_tag_line david
                   ` (18 more replies)
  0 siblings, 19 replies; 25+ messages in thread
From: david @ 2012-12-21 13:08 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

This is essentially cosmetic, since success=0 is promised by
the comments in tag-utils.h.
---
 tag-util.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tag-util.c b/tag-util.c
index eab482f..3d588d3 100644
--- a/tag-util.c
+++ b/tag-util.c
@@ -40,7 +40,7 @@ parse_tag_line (void *ctx, char *line,
     char *tok = line;
     size_t tok_len = 0;
     char *line_for_error;
-    int ret = 0;
+    tag_parse_status_t ret = TAG_PARSE_SUCCESS;
 
     chomp_newline (line);
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [Patch v8 02/18] tag-util: factor out rules for illegal tags, use in parse_tag_line
  2012-12-21 13:08 [Patch v8 01/18] parse_tag_line: use enum for return value david
@ 2012-12-21 13:08 ` david
  2012-12-21 13:08 ` [Patch v8 03/18] notmuch-tag.c: convert to use tag-utils david
                   ` (17 subsequent siblings)
  18 siblings, 0 replies; 25+ messages in thread
From: david @ 2012-12-21 13:08 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

This will allow us to be consistent between batch tagging and command
line tagging as far as what is an illegal tag.
---
 tag-util.c |   36 +++++++++++++++++++++++++++++++-----
 1 file changed, 31 insertions(+), 5 deletions(-)

diff --git a/tag-util.c b/tag-util.c
index 3d588d3..13d9035 100644
--- a/tag-util.c
+++ b/tag-util.c
@@ -31,6 +31,30 @@ line_error (tag_parse_status_t status,
     return status;
 }
 
+/*
+ * Test tags for some forbidden cases.
+ *
+ * return: NULL if OK,
+ *	   explanatory message otherwise.
+ */
+
+static const char *
+illegal_tag (const char *tag, notmuch_bool_t remove)
+{
+
+    if (*tag == '\0' && ! remove)
+	return "empty tag forbidden";
+
+    /* This disallows adding the non-removable tag "-" and
+     * enables notmuch tag to take long options more easily.
+     */
+
+    if (*tag == '-' && ! remove)
+	return "tag starting with '-' forbidden";
+
+    return NULL;
+}
+
 tag_parse_status_t
 parse_tag_line (void *ctx, char *line,
 		tag_op_flag_t flags,
@@ -95,11 +119,13 @@ parse_tag_line (void *ctx, char *line,
 	remove = (*tok == '-');
 	tag = tok + 1;
 
-	/* Maybe refuse empty tags. */
-	if (! (flags & TAG_FLAG_BE_GENEROUS) && *tag == '\0') {
-	    ret = line_error (TAG_PARSE_INVALID, line_for_error,
-			      "empty tag");
-	    goto DONE;
+	/* Maybe refuse illegal tags. */
+	if (! (flags & TAG_FLAG_BE_GENEROUS)) {
+	    const char *msg = illegal_tag (tag, remove);
+	    if (msg) {
+		ret = line_error (TAG_PARSE_INVALID, line_for_error, msg);
+		goto DONE;
+	    }
 	}
 
 	/* Decode tag. */
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [Patch v8 03/18] notmuch-tag.c: convert to use tag-utils
  2012-12-21 13:08 [Patch v8 01/18] parse_tag_line: use enum for return value david
  2012-12-21 13:08 ` [Patch v8 02/18] tag-util: factor out rules for illegal tags, use in parse_tag_line david
@ 2012-12-21 13:08 ` david
  2012-12-21 13:08 ` [Patch v8 04/18] notmuch-tag: factor out double quoting routine david
                   ` (16 subsequent siblings)
  18 siblings, 0 replies; 25+ messages in thread
From: david @ 2012-12-21 13:08 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

Command line parsing is factored out into a function
parse_tag_command_line in tag-utils.c.

There is some duplicated code eliminated in tag_query, and a bunch of
translation from using the bare tag_op structs to using that tag-utils
API.
---
 notmuch-tag.c |  109 +++++++++++++--------------------------------------------
 tag-util.c    |   51 +++++++++++++++++++++++++--
 tag-util.h    |   15 ++++++++
 3 files changed, 89 insertions(+), 86 deletions(-)

diff --git a/notmuch-tag.c b/notmuch-tag.c
index 88d559b..0965ee7 100644
--- a/notmuch-tag.c
+++ b/notmuch-tag.c
@@ -19,6 +19,7 @@
  */
 
 #include "notmuch-client.h"
+#include "tag-util.h"
 
 static volatile sig_atomic_t interrupted;
 
@@ -54,14 +55,9 @@ _escape_tag (char *buf, const char *tag)
     return buf;
 }
 
-typedef struct {
-    const char *tag;
-    notmuch_bool_t remove;
-} tag_operation_t;
-
 static char *
 _optimize_tag_query (void *ctx, const char *orig_query_string,
-		     const tag_operation_t *tag_ops)
+		     const tag_op_list_t *list)
 {
     /* This is subtler than it looks.  Xapian ignores the '-' operator
      * at the beginning both queries and parenthesized groups and,
@@ -73,19 +69,20 @@ _optimize_tag_query (void *ctx, const char *orig_query_string,
 
     char *escaped, *query_string;
     const char *join = "";
-    int i;
+    size_t i;
     unsigned int max_tag_len = 0;
 
     /* Don't optimize if there are no tag changes. */
-    if (tag_ops[0].tag == NULL)
+    if (tag_op_list_size (list) == 0)
 	return talloc_strdup (ctx, orig_query_string);
 
     /* Allocate a buffer for escaping tags.  This is large enough to
      * hold a fully escaped tag with every character doubled plus
      * enclosing quotes and a NUL. */
-    for (i = 0; tag_ops[i].tag; i++)
-	if (strlen (tag_ops[i].tag) > max_tag_len)
-	    max_tag_len = strlen (tag_ops[i].tag);
+    for (i = 0; i < tag_op_list_size (list); i++)
+	if (strlen (tag_op_list_tag (list, i)) > max_tag_len)
+	    max_tag_len = strlen (tag_op_list_tag (list, i));
+
     escaped = talloc_array (ctx, char, max_tag_len * 2 + 3);
     if (! escaped)
 	return NULL;
@@ -96,11 +93,11 @@ _optimize_tag_query (void *ctx, const char *orig_query_string,
     else
 	query_string = talloc_asprintf (ctx, "( %s ) and (", orig_query_string);
 
-    for (i = 0; tag_ops[i].tag && query_string; i++) {
+    for (i = 0; i < tag_op_list_size (list) && query_string; i++) {
 	query_string = talloc_asprintf_append_buffer (
 	    query_string, "%s%stag:%s", join,
-	    tag_ops[i].remove ? "" : "not ",
-	    _escape_tag (escaped, tag_ops[i].tag));
+	    tag_op_list_isremove (list, i) ? "" : "not ",
+	    _escape_tag (escaped, tag_op_list_tag (list, i)));
 	join = " or ";
     }
 
@@ -111,17 +108,15 @@ _optimize_tag_query (void *ctx, const char *orig_query_string,
     return query_string;
 }
 
-/* Tag messages matching 'query_string' according to 'tag_ops', which
- * must be an array of tagging operations terminated with an empty
- * element. */
+/* Tag messages matching 'query_string' according to 'tag_ops'
+ */
 static int
 tag_query (void *ctx, notmuch_database_t *notmuch, const char *query_string,
-	   tag_operation_t *tag_ops, notmuch_bool_t synchronize_flags)
+	   tag_op_list_t *tag_ops, tag_op_flag_t flags)
 {
     notmuch_query_t *query;
     notmuch_messages_t *messages;
     notmuch_message_t *message;
-    int i;
 
     /* Optimize the query so it excludes messages that already have
      * the specified set of tags. */
@@ -144,21 +139,7 @@ tag_query (void *ctx, notmuch_database_t *notmuch, const char *query_string,
 	 notmuch_messages_valid (messages) && ! interrupted;
 	 notmuch_messages_move_to_next (messages)) {
 	message = notmuch_messages_get (messages);
-
-	notmuch_message_freeze (message);
-
-	for (i = 0; tag_ops[i].tag; i++) {
-	    if (tag_ops[i].remove)
-		notmuch_message_remove_tag (message, tag_ops[i].tag);
-	    else
-		notmuch_message_add_tag (message, tag_ops[i].tag);
-	}
-
-	notmuch_message_thaw (message);
-
-	if (synchronize_flags)
-	    notmuch_message_tags_to_maildir_flags (message);
-
+	tag_op_list_apply (message, tag_ops, flags | TAG_FLAG_PRE_OPTIMIZED);
 	notmuch_message_destroy (message);
     }
 
@@ -170,15 +151,13 @@ tag_query (void *ctx, notmuch_database_t *notmuch, const char *query_string,
 int
 notmuch_tag_command (void *ctx, int argc, char *argv[])
 {
-    tag_operation_t *tag_ops;
-    int tag_ops_count = 0;
-    char *query_string;
+    tag_op_list_t *tag_ops = NULL;
+    char *query_string = NULL;
     notmuch_config_t *config;
     notmuch_database_t *notmuch;
     struct sigaction action;
-    notmuch_bool_t synchronize_flags;
-    int i;
-    int ret;
+    tag_op_flag_t tag_flags = TAG_FLAG_NONE;
+    int ret = 0;
 
     /* Setup our handler for SIGINT */
     memset (&action, 0, sizeof (struct sigaction));
@@ -187,54 +166,15 @@ notmuch_tag_command (void *ctx, int argc, char *argv[])
     action.sa_flags = SA_RESTART;
     sigaction (SIGINT, &action, NULL);
 
-    argc--; argv++; /* skip subcommand argument */
-
-    /* Array of tagging operations (add or remove), terminated with an
-     * empty element. */
-    tag_ops = talloc_array (ctx, tag_operation_t, argc + 1);
+    tag_ops = tag_op_list_create (ctx);
     if (tag_ops == NULL) {
 	fprintf (stderr, "Out of memory.\n");
 	return 1;
     }
 
-    for (i = 0; i < argc; i++) {
-	if (strcmp (argv[i], "--") == 0) {
-	    i++;
-	    break;
-	}
-	if (argv[i][0] == '+' || argv[i][0] == '-') {
-	    if (argv[i][0] == '+' && argv[i][1] == '\0') {
-		fprintf (stderr, "Error: tag names cannot be empty.\n");
-		return 1;
-	    }
-	    if (argv[i][0] == '+' && argv[i][1] == '-') {
-		/* This disallows adding the non-removable tag "-" and
-		 * enables notmuch tag to take long options in the
-		 * future. */
-		fprintf (stderr, "Error: tag names must not start with '-'.\n");
-		return 1;
-	    }
-	    tag_ops[tag_ops_count].tag = argv[i] + 1;
-	    tag_ops[tag_ops_count].remove = (argv[i][0] == '-');
-	    tag_ops_count++;
-	} else {
-	    break;
-	}
-    }
-
-    tag_ops[tag_ops_count].tag = NULL;
-
-    if (tag_ops_count == 0) {
-	fprintf (stderr, "Error: 'notmuch tag' requires at least one tag to add or remove.\n");
-	return 1;
-    }
-
-    query_string = query_string_from_args (ctx, argc - i, &argv[i]);
-
-    if (*query_string == '\0') {
-	fprintf (stderr, "Error: notmuch tag requires at least one search term.\n");
+    if (parse_tag_command_line (ctx, argc - 1, argv + 1,
+				&query_string, tag_ops))
 	return 1;
-    }
 
     config = notmuch_config_open (ctx, NULL, NULL);
     if (config == NULL)
@@ -244,9 +184,10 @@ notmuch_tag_command (void *ctx, int argc, char *argv[])
 			       NOTMUCH_DATABASE_MODE_READ_WRITE, &notmuch))
 	return 1;
 
-    synchronize_flags = notmuch_config_get_maildir_synchronize_flags (config);
+    if (notmuch_config_get_maildir_synchronize_flags (config))
+	tag_flags |= TAG_FLAG_MAILDIR_SYNC;
 
-    ret = tag_query (ctx, notmuch, query_string, tag_ops, synchronize_flags);
+    ret = tag_query (ctx, notmuch, query_string, tag_ops, tag_flags);
 
     notmuch_database_destroy (notmuch);
 
diff --git a/tag-util.c b/tag-util.c
index 13d9035..f89669a 100644
--- a/tag-util.c
+++ b/tag-util.c
@@ -45,8 +45,9 @@ illegal_tag (const char *tag, notmuch_bool_t remove)
     if (*tag == '\0' && ! remove)
 	return "empty tag forbidden";
 
-    /* This disallows adding the non-removable tag "-" and
-     * enables notmuch tag to take long options more easily.
+    /* This disallows adding tags starting with "-", in particular the
+     * non-removable tag "-" and enables notmuch tag to take long
+     * options more easily.
      */
 
     if (*tag == '-' && ! remove)
@@ -163,6 +164,52 @@ parse_tag_line (void *ctx, char *line,
     return ret;
 }
 
+tag_parse_status_t
+parse_tag_command_line (void *ctx, int argc, char **argv,
+			char **query_str, tag_op_list_t *tag_ops)
+{
+
+    int i;
+
+    tag_op_list_reset (tag_ops);
+
+    for (i = 0; i < argc; i++) {
+	if (strcmp (argv[i], "--") == 0) {
+	    i++;
+	    break;
+	}
+
+	if (argv[i][0] != '+' && argv[i][0] != '-')
+	    break;
+
+	notmuch_bool_t is_remove = argv[i][0] == '-';
+	const char *msg;
+
+	msg = illegal_tag (argv[i] + 1, is_remove);
+	if (msg) {
+	    fprintf (stderr, "Error: %s", msg);
+	    return TAG_PARSE_INVALID;
+	}
+
+	tag_op_list_append (ctx, tag_ops, argv[i] + 1, is_remove);
+    }
+
+    if (tag_op_list_size (tag_ops) == 0) {
+	fprintf (stderr, "Error: 'notmuch tag' requires at least one tag to add or remove.\n");
+	return TAG_PARSE_INVALID;
+    }
+
+    *query_str = query_string_from_args (ctx, argc - i, &argv[i]);
+
+    if (*query_str == NULL || **query_str == '\0') {
+	fprintf (stderr, "Error: notmuch tag requires at least one search term.\n");
+	return TAG_PARSE_INVALID;
+    }
+
+    return TAG_PARSE_SUCCESS;
+}
+
+
 static inline void
 message_error (notmuch_message_t *message,
 	       notmuch_status_t status,
diff --git a/tag-util.h b/tag-util.h
index 99b0fa0..2889736 100644
--- a/tag-util.h
+++ b/tag-util.h
@@ -72,6 +72,21 @@ parse_tag_line (void *ctx, char *line,
 		tag_op_flag_t flags,
 		char **query_str, tag_op_list_t *ops);
 
+
+
+/* Parse a command line of the following format:
+ *
+ * +<tag>|-<tag> [...] [--] <search-terms>
+ *
+ * Output Parameters:
+ *	ops	contains a list of tag operations
+ *	query_str the search terms.
+ */
+
+tag_parse_status_t
+parse_tag_command_line (void *ctx, int argc, char **argv,
+			char **query_str, tag_op_list_t *ops);
+
 /*
  * Create an empty list of tag operations
  *
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [Patch v8 04/18] notmuch-tag: factor out double quoting routine
  2012-12-21 13:08 [Patch v8 01/18] parse_tag_line: use enum for return value david
  2012-12-21 13:08 ` [Patch v8 02/18] tag-util: factor out rules for illegal tags, use in parse_tag_line david
  2012-12-21 13:08 ` [Patch v8 03/18] notmuch-tag.c: convert to use tag-utils david
@ 2012-12-21 13:08 ` david
  2012-12-21 13:08 ` [Patch v8 05/18] util/string-util: add strnspn david
                   ` (15 subsequent siblings)
  18 siblings, 0 replies; 25+ messages in thread
From: david @ 2012-12-21 13:08 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

This could live in tag-util as well, but it is really nothing specific
to tags (although the conventions are arguable specific to Xapian).

The API is changed from "caller-allocates" to "readline-like". The scan for
max tag length is pushed down into the double quoting routine.
---
 notmuch-tag.c      |   52 ++++++++++++++++++----------------------------------
 util/string-util.c |   37 +++++++++++++++++++++++++++++++++++++
 util/string-util.h |    8 ++++++++
 3 files changed, 63 insertions(+), 34 deletions(-)

diff --git a/notmuch-tag.c b/notmuch-tag.c
index 0965ee7..a480215 100644
--- a/notmuch-tag.c
+++ b/notmuch-tag.c
@@ -20,6 +20,7 @@
 
 #include "notmuch-client.h"
 #include "tag-util.h"
+#include "string-util.h"
 
 static volatile sig_atomic_t interrupted;
 
@@ -37,25 +38,6 @@ handle_sigint (unused (int sig))
 }
 
 static char *
-_escape_tag (char *buf, const char *tag)
-{
-    const char *in = tag;
-    char *out = buf;
-
-    /* Boolean terms surrounded by double quotes can contain any
-     * character.  Double quotes are quoted by doubling them. */
-    *out++ = '"';
-    while (*in) {
-	if (*in == '"')
-	    *out++ = '"';
-	*out++ = *in++;
-    }
-    *out++ = '"';
-    *out = 0;
-    return buf;
-}
-
-static char *
 _optimize_tag_query (void *ctx, const char *orig_query_string,
 		     const tag_op_list_t *list)
 {
@@ -67,44 +49,46 @@ _optimize_tag_query (void *ctx, const char *orig_query_string,
      * parenthesize and the exclusion part of the query must not use
      * the '-' operator (though the NOT operator is fine). */
 
-    char *escaped, *query_string;
+    char *escaped = NULL;
+    size_t escaped_len = 0;
+    char *query_string;
     const char *join = "";
     size_t i;
-    unsigned int max_tag_len = 0;
 
     /* Don't optimize if there are no tag changes. */
     if (tag_op_list_size (list) == 0)
 	return talloc_strdup (ctx, orig_query_string);
 
-    /* Allocate a buffer for escaping tags.  This is large enough to
-     * hold a fully escaped tag with every character doubled plus
-     * enclosing quotes and a NUL. */
-    for (i = 0; i < tag_op_list_size (list); i++)
-	if (strlen (tag_op_list_tag (list, i)) > max_tag_len)
-	    max_tag_len = strlen (tag_op_list_tag (list, i));
-
-    escaped = talloc_array (ctx, char, max_tag_len * 2 + 3);
-    if (! escaped)
-	return NULL;
-
     /* Build the new query string */
     if (strcmp (orig_query_string, "*") == 0)
 	query_string = talloc_strdup (ctx, "(");
     else
 	query_string = talloc_asprintf (ctx, "( %s ) and (", orig_query_string);
 
+    /* Boolean terms surrounded by double quotes can contain any
+     * character.  Double quotes are quoted by doubling them. */
+
     for (i = 0; i < tag_op_list_size (list) && query_string; i++) {
+	/* XXX in case of OOM, query_string will be deallocated when
+	 * ctx is, which might be at shutdown */
+	if (double_quote_str (ctx,
+			      tag_op_list_tag (list, i),
+			      &escaped, &escaped_len))
+	    return NULL;
+
 	query_string = talloc_asprintf_append_buffer (
 	    query_string, "%s%stag:%s", join,
 	    tag_op_list_isremove (list, i) ? "" : "not ",
-	    _escape_tag (escaped, tag_op_list_tag (list, i)));
+	    escaped);
 	join = " or ";
     }
 
     if (query_string)
 	query_string = talloc_strdup_append_buffer (query_string, ")");
 
-    talloc_free (escaped);
+    if (escaped)
+	talloc_free (escaped);
+
     return query_string;
 }
 
diff --git a/util/string-util.c b/util/string-util.c
index 44f8cd3..b9039f4 100644
--- a/util/string-util.c
+++ b/util/string-util.c
@@ -20,6 +20,7 @@
 
 
 #include "string-util.h"
+#include "talloc.h"
 
 char *
 strtok_len (char *s, const char *delim, size_t *len)
@@ -32,3 +33,39 @@ strtok_len (char *s, const char *delim, size_t *len)
 
     return *len ? s : NULL;
 }
+
+
+int
+double_quote_str (void *ctx, const char *str,
+		  char **buf, size_t *len)
+{
+    const char *in;
+    char *out;
+    size_t needed = 3;
+
+    for (in = str; *in; in++)
+	needed += (*in == '"') ? 2 : 1;
+
+    if ((*buf == NULL) || (needed > *len)) {
+	*len = 2 * needed;
+	*buf = talloc_realloc (ctx, *buf, char, *len);
+    }
+
+
+    if (! *buf)
+	return 1;
+
+    out = *buf;
+
+    *out++ = '"';
+    in = str;
+    while (*in) {
+	if (*in == '"')
+	    *out++ = '"';
+	*out++ = *in++;
+    }
+    *out++ = '"';
+    *out = '\0';
+
+    return 0;
+}
diff --git a/util/string-util.h b/util/string-util.h
index ac7676c..4fc7942 100644
--- a/util/string-util.h
+++ b/util/string-util.h
@@ -19,4 +19,12 @@
 
 char *strtok_len (char *s, const char *delim, size_t *len);
 
+/* Copy str to dest, surrounding with double quotes.
+ * Any internal double-quotes are doubled, i.e. a"b -> "a""b"
+ *
+ * Output is into buf; it may be talloc_realloced
+ * Return: 0 on success, non-zero on memory allocation failure.
+ */
+int double_quote_str (void *talloc_ctx, const char *str,
+		      char **buf, size_t *len);
 #endif
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [Patch v8 05/18] util/string-util: add strnspn
  2012-12-21 13:08 [Patch v8 01/18] parse_tag_line: use enum for return value david
                   ` (2 preceding siblings ...)
  2012-12-21 13:08 ` [Patch v8 04/18] notmuch-tag: factor out double quoting routine david
@ 2012-12-21 13:08 ` david
  2012-12-21 13:08 ` [Patch v8 06/18] util/hex-escape: add a function to check strings unchanged by hex encoding david
                   ` (14 subsequent siblings)
  18 siblings, 0 replies; 25+ messages in thread
From: david @ 2012-12-21 13:08 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

this is like the standard strspn, but with a limit on how much of a
prefix is tested.
---
 util/string-util.c |   12 ++++++++++++
 util/string-util.h |    5 +++++
 2 files changed, 17 insertions(+)

diff --git a/util/string-util.c b/util/string-util.c
index b9039f4..7d5deaf 100644
--- a/util/string-util.c
+++ b/util/string-util.c
@@ -35,6 +35,18 @@ strtok_len (char *s, const char *delim, size_t *len)
 }
 
 
+size_t
+strnspn (const char *str, size_t len,  const char *accept)
+{
+    size_t count;
+
+    for (count = 0; count < len && str[count] != '\0'; count++) {
+	if (strchr (accept, str[count]) == NULL)
+	    break;
+    }
+    return count;
+}
+
 int
 double_quote_str (void *ctx, const char *str,
 		  char **buf, size_t *len)
diff --git a/util/string-util.h b/util/string-util.h
index 4fc7942..b46f981 100644
--- a/util/string-util.h
+++ b/util/string-util.h
@@ -19,6 +19,11 @@
 
 char *strtok_len (char *s, const char *delim, size_t *len);
 
+/* like strspn, but consider at most `n' characters of 's'. `accept'
+ * must be null terminated; `s' may be null terminated.
+ */
+size_t strnspn (const char *s, size_t n, const char *accept);
+
 /* Copy str to dest, surrounding with double quotes.
  * Any internal double-quotes are doubled, i.e. a"b -> "a""b"
  *
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [Patch v8 06/18] util/hex-escape: add a function to check strings unchanged by hex encoding.
  2012-12-21 13:08 [Patch v8 01/18] parse_tag_line: use enum for return value david
                   ` (3 preceding siblings ...)
  2012-12-21 13:08 ` [Patch v8 05/18] util/string-util: add strnspn david
@ 2012-12-21 13:08 ` david
  2012-12-21 13:08 ` [Patch v8 07/18] unhex_and_quote: new function to quote hex-decoded queries david
                   ` (13 subsequent siblings)
  18 siblings, 0 replies; 25+ messages in thread
From: david @ 2012-12-21 13:08 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

Initial use case is to recognize input tokens for batch tagging that
need quoting.
---
 util/hex-escape.c |    7 +++++++
 util/hex-escape.h |    8 ++++++++
 2 files changed, 15 insertions(+)

diff --git a/util/hex-escape.c b/util/hex-escape.c
index b7e2e07..b601a33 100644
--- a/util/hex-escape.c
+++ b/util/hex-escape.c
@@ -24,6 +24,7 @@
 #include <ctype.h>
 #include "error_util.h"
 #include "hex-escape.h"
+#include "string-util.h"
 
 static const size_t default_buf_size = 1024;
 
@@ -38,6 +39,12 @@ is_output (char c)
     return (strchr (output_charset, c) != NULL);
 }
 
+size_t
+hex_invariant (const char *str, size_t len)
+{
+    return strnspn (str, len, output_charset);
+}
+
 static int
 maybe_realloc (void *ctx, size_t needed, char **out, size_t *out_size)
 {
diff --git a/util/hex-escape.h b/util/hex-escape.h
index 5182042..e7d0b31 100644
--- a/util/hex-escape.h
+++ b/util/hex-escape.h
@@ -38,4 +38,12 @@ hex_decode (void *talloc_ctx, const char *in, char **out,
  */
 hex_status_t
 hex_decode_inplace (char *s);
+
+/*
+ * Calculate the length of the initial segment of str that would not
+ * be changed by hex encoding. Consider at most `len' characters
+ * (bytes) before stopping.
+ */
+size_t
+hex_invariant (const char *str, size_t len);
 #endif
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [Patch v8 07/18] unhex_and_quote: new function to quote hex-decoded queries
  2012-12-21 13:08 [Patch v8 01/18] parse_tag_line: use enum for return value david
                   ` (4 preceding siblings ...)
  2012-12-21 13:08 ` [Patch v8 06/18] util/hex-escape: add a function to check strings unchanged by hex encoding david
@ 2012-12-21 13:08 ` david
  2012-12-22 23:36   ` Jani Nikula
  2012-12-21 13:08 ` [Patch v8 08/18] notmuch-restore: move query handling for batch restore to parser david
                   ` (12 subsequent siblings)
  18 siblings, 1 reply; 25+ messages in thread
From: david @ 2012-12-21 13:08 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

Space delimited tokens are hex decoded and then quoted according to
Xapian rules. Prefixes and '*' are passed through unquoted, as is
anything that hex-decoding would not change.
---
 tag-util.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)

diff --git a/tag-util.c b/tag-util.c
index f89669a..46aab4e 100644
--- a/tag-util.c
+++ b/tag-util.c
@@ -56,6 +56,87 @@ illegal_tag (const char *tag, notmuch_bool_t remove)
     return NULL;
 }
 
+/* Input is a hex encoded string, presumed to be a query for Xapian.
+ *
+ * Space delimited tokens are decoded and quoted, with '*' and prefixes
+ * of the form "foo:" passed through unquoted.
+ */
+static tag_parse_status_t
+unhex_and_quote (void *ctx, char *encoded, const char *line_for_error,
+		 char **query_string)
+{
+    char *tok = encoded;
+    size_t tok_len = 0;
+    char *buf = NULL;
+    size_t buf_len = 0;
+    tag_parse_status_t ret = TAG_PARSE_SUCCESS;
+
+    *query_string = talloc_strdup (ctx, "");
+
+    while ((tok = strtok_len (tok + tok_len, " ", &tok_len)) != NULL) {
+
+	size_t prefix_len;
+	char delim = *(tok + tok_len);
+
+	*(tok + tok_len++) = '\0';
+
+	prefix_len = hex_invariant (tok, tok_len);
+
+	if ((strcmp (tok, "*") == 0) || prefix_len >= tok_len - 1) {
+
+	    /* pass some things through without quoting or decoding.
+	     * Note for '*' this is mandatory.
+	     */
+
+	    if (! (*query_string = talloc_asprintf_append_buffer (
+		       *query_string, "%s%c", tok, delim))) {
+
+		ret = line_error (TAG_PARSE_OUT_OF_MEMORY,
+				  line_for_error, "aborting");
+		goto DONE;
+	    }
+
+	} else {
+	    /* potential prefix: one for ':', then something after */
+	    if ((tok_len - prefix_len > 2) && *(tok + prefix_len) == ':') {
+		if (! (*query_string = talloc_strndup_append (*query_string,
+							      tok,
+							      prefix_len + 1))) {
+		    ret = line_error (TAG_PARSE_OUT_OF_MEMORY,
+				      line_for_error, "aborting");
+		    goto DONE;
+		}
+		tok += prefix_len + 1;
+		tok_len -= prefix_len + 1;
+	    }
+
+	    if (hex_decode_inplace (tok) != HEX_SUCCESS) {
+		ret = line_error (TAG_PARSE_INVALID, line_for_error,
+				  "hex decoding of token '%s' failed", tok);
+		goto DONE;
+	    }
+
+	    if (double_quote_str (ctx, tok, &buf, &buf_len)) {
+		ret = line_error (TAG_PARSE_OUT_OF_MEMORY,
+				  line_for_error, "aborting");
+		goto DONE;
+	    }
+
+	    if (! (*query_string = talloc_asprintf_append_buffer (
+		       *query_string, "%s%c", buf, delim))) {
+		ret = line_error (TAG_PARSE_OUT_OF_MEMORY,
+				  line_for_error, "aborting");
+		goto DONE;
+	    }
+	}
+    }
+
+  DONE:
+    if (ret != TAG_PARSE_SUCCESS && *query_string)
+	talloc_free (*query_string);
+    return ret;
+}
+
 tag_parse_status_t
 parse_tag_line (void *ctx, char *line,
 		tag_op_flag_t flags,
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [Patch v8 08/18] notmuch-restore: move query handling for batch restore to parser
  2012-12-21 13:08 [Patch v8 01/18] parse_tag_line: use enum for return value david
                   ` (5 preceding siblings ...)
  2012-12-21 13:08 ` [Patch v8 07/18] unhex_and_quote: new function to quote hex-decoded queries david
@ 2012-12-21 13:08 ` david
  2012-12-21 13:08 ` [Patch v8 09/18] cli: add support for batch tagging operations to "notmuch tag" david
                   ` (11 subsequent siblings)
  18 siblings, 0 replies; 25+ messages in thread
From: david @ 2012-12-21 13:08 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

We are able to detect more errors by looking at the string before it
is hex-decoded. We also need this to avoid the query quoting for more
general queries (to be written) that will mess up raw message-ids.
---
 notmuch-restore.c |   18 +-----------------
 tag-util.c        |   26 ++++++++++++++++++++------
 tag-util.h        |    7 ++++++-
 test/dump-restore |    5 ++---
 4 files changed, 29 insertions(+), 27 deletions(-)

diff --git a/notmuch-restore.c b/notmuch-restore.c
index 40596a8..1b66e76 100644
--- a/notmuch-restore.c
+++ b/notmuch-restore.c
@@ -208,24 +208,8 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
 	if (input_format == DUMP_FORMAT_SUP) {
 	    ret = parse_sup_line (ctx, line, &query_string, tag_ops);
 	} else {
-	    ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS,
+	    ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS | TAG_FLAG_ID_DIRECT,
 				  &query_string, tag_ops);
-
-	    if (ret == 0) {
-		if (strncmp ("id:", query_string, 3) != 0) {
-		    fprintf (stderr, "Warning: unsupported query: %s\n", query_string);
-		    continue;
-		}
-		/* delete id: from front of string; tag_message
-		 * expects a raw message-id.
-		 *
-		 * XXX: Note that query string id:foo and bar will be
-		 * interpreted as a message id "foo and bar". This
-		 * should eventually be fixed to give a better error
-		 * message.
-		 */
-		query_string = query_string + 3;
-	    }
 	}
 
 	if (ret > 0)
diff --git a/tag-util.c b/tag-util.c
index 46aab4e..b0a846b 100644
--- a/tag-util.c
+++ b/tag-util.c
@@ -232,14 +232,28 @@ parse_tag_line (void *ctx, char *line,
     }
 
     /* tok now points to the query string */
-    if (hex_decode_inplace (tok) != HEX_SUCCESS) {
-	ret = line_error (TAG_PARSE_INVALID, line_for_error,
-			  "hex decoding of query %s failed", tok);
-	goto DONE;
+    if (flags & TAG_FLAG_ID_DIRECT) {
+	/* this is under the assumption that any whitespace in the
+	 * message-id must be hex-encoded. The check is probably not
+	 * perfect for exotic unicode whitespace; as fallback the
+	 * search for strange message-ids will fail */
+	if ((strncmp ("id:", tok, 3) != 0) ||
+	    (strcspn (tok, " \t") < strlen (tok))) {
+	    ret = line_error (TAG_PARSE_INVALID, line_for_error,
+			      "query '%s' is not 'id:<message-id>'", tok);
+	    goto DONE;
+	}
+	if (hex_decode_inplace (tok) != HEX_SUCCESS) {
+	    ret = line_error (TAG_PARSE_INVALID, line_for_error,
+			      "hex decoding of query %s failed", tok);
+	    goto DONE;
+	}
+	/* skip 'id:' */
+	*query_string = tok + 3;
+    } else {
+	ret = unhex_and_quote (ctx, tok, line_for_error, query_string);
     }
 
-    *query_string = tok;
-
   DONE:
     talloc_free (line_for_error);
     return ret;
diff --git a/tag-util.h b/tag-util.h
index 2889736..eec00cf 100644
--- a/tag-util.h
+++ b/tag-util.h
@@ -26,7 +26,12 @@ typedef enum {
     /* Accept strange tags that might be user error;
      * intended for use by notmuch-restore.
      */
-    TAG_FLAG_BE_GENEROUS = (1 << 3)
+    TAG_FLAG_BE_GENEROUS = (1 << 3),
+
+    /* Directly look up messages by hex-decoded message-id, rather
+     * than parsing a general query. The query MUST be of the form
+     * id:$message-id. */
+    TAG_FLAG_ID_DIRECT = (1 << 4)
 
 } tag_op_flag_t;
 
diff --git a/test/dump-restore b/test/dump-restore
index 6a989b6..eb7933a 100755
--- a/test/dump-restore
+++ b/test/dump-restore
@@ -199,19 +199,18 @@ a
 # the next non-comment line should report an an empty tag error for
 # batch tagging, but not for restore
 + +e -- id:20091117232137.GA7669@griffis1.net
-# highlight the sketchy id parsing; this should be last
 +g -- id:foo and bar
 EOF
 
 cat <<EOF > EXPECTED
-Warning: unsupported query: a
+Warning: query 'a' is not 'id:<message-id>' [a]
 Warning: no query string [+0]
 Warning: no query string [+a +b]
 Warning: missing query string [+a +b ]
 Warning: no query string after -- [+c +d --]
 Warning: hex decoding of tag %zz failed [+%zz -- id:whatever]
 Warning: hex decoding of query id:%yy failed [+e +f id:%yy]
-Warning: cannot apply tags to missing message: foo and bar
+Warning: query 'id:foo and bar' is not 'id:<message-id>' [+g -- id:foo and bar]
 EOF
 
 test_expect_equal_file EXPECTED OUTPUT
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [Patch v8 09/18] cli: add support for batch tagging operations to "notmuch tag"
  2012-12-21 13:08 [Patch v8 01/18] parse_tag_line: use enum for return value david
                   ` (6 preceding siblings ...)
  2012-12-21 13:08 ` [Patch v8 08/18] notmuch-restore: move query handling for batch restore to parser david
@ 2012-12-21 13:08 ` david
  2012-12-22 22:51   ` Jani Nikula
  2012-12-21 13:08 ` [Patch v8 10/18] test/tagging: add test for error messages of tag --batch david
                   ` (10 subsequent siblings)
  18 siblings, 1 reply; 25+ messages in thread
From: david @ 2012-12-21 13:08 UTC (permalink / raw)
  To: notmuch

From: Jani Nikula <jani@nikula.org>

Add support for batch tagging operations through stdin to "notmuch
tag". This can be enabled with the new --batch command line option to
"notmuch tag". The input must consist of lines of the format:

+<tag>|-<tag> [...] [--] <search-term> [...]

Each line is interpreted similarly to "notmuch tag" command line
arguments. The delimiter is one or more spaces ' '. Any characters in
<tag> and <search-term> MAY be hex encoded with %NN where NN is the
hexadecimal value of the character. Any ' ' and '%' characters in
<tag> and <search-term> MUST be hex encoded (using %20 and %25,
respectively). Any characters that are not part of <tag> or
<search-term> MUST NOT be hex encoded.

Leading and trailing space ' ' is ignored. Empty lines and lines
beginning with '#' are ignored.

Signed-off-by: Jani Nikula <jani@nikula.org>

Hacked-like-crazy-by: David Bremner <david@tethera.net>
---
 notmuch-tag.c |   91 ++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 83 insertions(+), 8 deletions(-)

diff --git a/notmuch-tag.c b/notmuch-tag.c
index a480215..5c8bad4 100644
--- a/notmuch-tag.c
+++ b/notmuch-tag.c
@@ -132,6 +132,46 @@ tag_query (void *ctx, notmuch_database_t *notmuch, const char *query_string,
     return interrupted;
 }
 
+static int
+tag_file (void *ctx, notmuch_database_t *notmuch, tag_op_flag_t flags,
+	  FILE *input)
+{
+    char *line = NULL;
+    char *query_string = NULL;
+    size_t line_size = 0;
+    ssize_t line_len;
+    int ret = 0;
+    tag_op_list_t *tag_ops;
+
+    tag_ops = tag_op_list_create (ctx);
+    if (tag_ops == NULL) {
+	fprintf (stderr, "Out of memory.\n");
+	return 1;
+    }
+
+    while ((line_len = getline (&line, &line_size, input)) != -1 &&
+	   ! interrupted) {
+
+	ret = parse_tag_line (ctx, line, TAG_FLAG_NONE,
+			      &query_string, tag_ops);
+
+	if (ret > 0)
+	    continue;
+
+	if (ret < 0)
+	    break;
+
+	ret = tag_query (ctx, notmuch, query_string, tag_ops, flags);
+	if (ret)
+	    break;
+    }
+
+    if (line)
+	free (line);
+
+    return ret;
+}
+
 int
 notmuch_tag_command (void *ctx, int argc, char *argv[])
 {
@@ -141,6 +181,10 @@ notmuch_tag_command (void *ctx, int argc, char *argv[])
     notmuch_database_t *notmuch;
     struct sigaction action;
     tag_op_flag_t tag_flags = TAG_FLAG_NONE;
+    notmuch_bool_t batch = FALSE;
+    FILE *input = stdin;
+    char *input_file_name = NULL;
+    int opt_index;
     int ret = 0;
 
     /* Setup our handler for SIGINT */
@@ -150,15 +194,43 @@ notmuch_tag_command (void *ctx, int argc, char *argv[])
     action.sa_flags = SA_RESTART;
     sigaction (SIGINT, &action, NULL);
 
-    tag_ops = tag_op_list_create (ctx);
-    if (tag_ops == NULL) {
-	fprintf (stderr, "Out of memory.\n");
+    notmuch_opt_desc_t options[] = {
+	{ NOTMUCH_OPT_BOOLEAN, &batch, "batch", 0, 0 },
+	{ NOTMUCH_OPT_STRING, &input_file_name, "input", 'i', 0 },
+	{ 0, 0, 0, 0, 0 }
+    };
+
+    opt_index = parse_arguments (argc, argv, options, 1);
+    if (opt_index < 0)
 	return 1;
+
+    if (input_file_name) {
+	batch = TRUE;
+	input = fopen (input_file_name, "r");
+	if (input == NULL) {
+	    fprintf (stderr, "Error opening %s for reading: %s\n",
+		     input_file_name, strerror (errno));
+	    return 1;
+	}
     }
 
-    if (parse_tag_command_line (ctx, argc - 1, argv + 1,
-				&query_string, tag_ops))
-	return 1;
+    if (batch) {
+	if (opt_index != argc) {
+	    fprintf (stderr, "Can't specify both cmdline and stdin!\n");
+	    return 1;
+	}
+    } else {
+
+	tag_ops = tag_op_list_create (ctx);
+	if (tag_ops == NULL) {
+	    fprintf (stderr, "Out of memory.\n");
+	    return 1;
+	}
+
+	if (parse_tag_command_line (ctx, argc - opt_index, argv + opt_index,
+				    &query_string, tag_ops))
+	    return 1;
+    }
 
     config = notmuch_config_open (ctx, NULL, NULL);
     if (config == NULL)
@@ -171,9 +243,12 @@ notmuch_tag_command (void *ctx, int argc, char *argv[])
     if (notmuch_config_get_maildir_synchronize_flags (config))
 	tag_flags |= TAG_FLAG_MAILDIR_SYNC;
 
-    ret = tag_query (ctx, notmuch, query_string, tag_ops, tag_flags);
+    if (batch)
+	ret = tag_file (ctx, notmuch, tag_flags, input);
+    else
+	ret = tag_query (ctx, notmuch, query_string, tag_ops, tag_flags);
 
     notmuch_database_destroy (notmuch);
 
-    return ret;
+    return ret || interrupted;
 }
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [Patch v8 10/18] test/tagging: add test for error messages of tag --batch
  2012-12-21 13:08 [Patch v8 01/18] parse_tag_line: use enum for return value david
                   ` (7 preceding siblings ...)
  2012-12-21 13:08 ` [Patch v8 09/18] cli: add support for batch tagging operations to "notmuch tag" david
@ 2012-12-21 13:08 ` david
  2012-12-21 13:08 ` [Patch v8 11/18] test/tagging: add basic tests for batch tagging functionality david
                   ` (9 subsequent siblings)
  18 siblings, 0 replies; 25+ messages in thread
From: david @ 2012-12-21 13:08 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

This is based on the similar test for notmuch restore, but the parser
in batch tagging mode is less tolerant of a few cases, in particular
those tested by illegal_tag.
---
 test/tagging |   37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/test/tagging b/test/tagging
index 980ff92..30cec48 100755
--- a/test/tagging
+++ b/test/tagging
@@ -46,6 +46,43 @@ test_expect_equal "$output" "\
 thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; One (:\"  inbox tag1 unread)
 thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; Two (inbox tag1 tag4 unread)"
 
+test_begin_subtest '--batch: checking error messages'
+notmuch dump --format=batch-tag > BACKUP
+notmuch tag --batch <<EOF 2>OUTPUT
+# the next line has a space
+ 
+# this line has no tag operations, but this is permitted in batch format.
+a
++0
++a +b
+# trailing whitespace
++a +b 
++c +d --
+# this is a harmless comment, do not yell about it.
+
+# the previous line was blank; also no yelling please
++%zz -- id:whatever
++e +f id:%yy
+# the next non-comment line should report an an empty tag error for
+# batch tagging, but not for restore
++ +e -- id:foo
++- -- id:foo
+EOF
+
+cat <<EOF > EXPECTED
+Warning: no query string [+0]
+Warning: no query string [+a +b]
+Warning: missing query string [+a +b ]
+Warning: no query string after -- [+c +d --]
+Warning: hex decoding of tag %zz failed [+%zz -- id:whatever]
+Warning: hex decoding of token '%yy' failed [+e +f id:%yy]
+Warning: empty tag forbidden [+ +e -- id:foo]
+Warning: tag starting with '-' forbidden [+- -- id:foo]
+EOF
+
+notmuch restore --format=batch-tag < BACKUP
+test_expect_equal_file EXPECTED OUTPUT
+
 test_expect_code 1 "Empty tag names" 'notmuch tag + One'
 
 test_expect_code 1 "Tag name beginning with -" 'notmuch tag +- One'
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [Patch v8 11/18] test/tagging: add basic tests for batch tagging functionality
  2012-12-21 13:08 [Patch v8 01/18] parse_tag_line: use enum for return value david
                   ` (8 preceding siblings ...)
  2012-12-21 13:08 ` [Patch v8 10/18] test/tagging: add test for error messages of tag --batch david
@ 2012-12-21 13:08 ` david
  2012-12-21 13:08 ` [Patch v8 12/18] test/tagging: add tests for exotic tags david
                   ` (8 subsequent siblings)
  18 siblings, 0 replies; 25+ messages in thread
From: david @ 2012-12-21 13:08 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

This tests argument parsing, blank lines and comments, and basic hex
decoding functionality.
---
 test/tagging |   51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/test/tagging b/test/tagging
index 30cec48..b454d38 100755
--- a/test/tagging
+++ b/test/tagging
@@ -46,6 +46,57 @@ test_expect_equal "$output" "\
 thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; One (:\"  inbox tag1 unread)
 thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; Two (inbox tag1 tag4 unread)"
 
+test_begin_subtest "--batch"
+notmuch tag --batch <<EOF
+# %20 is a space in tag
+-:"%20 -tag1 +tag5 +tag6 -- One
++tag1 -tag1 -tag4 +tag4 -- Two
+-tag6 One
++tag5 Two
+EOF
+output=$(notmuch search \* | notmuch_search_sanitize)
+test_expect_equal "$output" "\
+thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; One (inbox tag5 unread)
+thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; Two (inbox tag4 tag5 unread)"
+
+# generate a common input file for the next several tests.
+cat > batch.in  <<EOF
+# %40 is an @ in tag
++%40 -tag5 +tag6 -- One
++tag1 -tag1 -tag4 +tag4 -- Two
+-tag5 +tag6 Two
+EOF
+
+cat > batch.expected <<EOF
+thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; One (@ inbox tag6 unread)
+thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; Two (inbox tag4 tag6 unread)
+EOF
+
+test_begin_subtest "--input"
+notmuch dump --format=batch-tag > backup.tags
+notmuch tag --input=batch.in
+notmuch search \* | notmuch_search_sanitize > OUTPUT
+notmuch restore --format=batch-tag < backup.tags
+test_expect_equal_file batch.expected OUTPUT
+
+test_begin_subtest "--batch --input"
+notmuch dump --format=batch-tag > backup.tags
+notmuch tag --batch --input=batch.in
+notmuch search \* | notmuch_search_sanitize > OUTPUT
+notmuch restore --format=batch-tag < backup.tags
+test_expect_equal_file batch.expected OUTPUT
+
+test_begin_subtest "--batch, blank lines and comments"
+notmuch dump | sort > EXPECTED
+notmuch tag --batch <<EOF
+# this line is a comment; the next has only white space
+ 	 
+
+# the previous line is empty
+EOF
+notmuch dump | sort > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
 test_begin_subtest '--batch: checking error messages'
 notmuch dump --format=batch-tag > BACKUP
 notmuch tag --batch <<EOF 2>OUTPUT
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [Patch v8 12/18] test/tagging: add tests for exotic tags
  2012-12-21 13:08 [Patch v8 01/18] parse_tag_line: use enum for return value david
                   ` (9 preceding siblings ...)
  2012-12-21 13:08 ` [Patch v8 11/18] test/tagging: add basic tests for batch tagging functionality david
@ 2012-12-21 13:08 ` david
  2012-12-21 13:08 ` [Patch v8 13/18] test/tagging: add test for exotic message-ids and batch tagging david
                   ` (7 subsequent siblings)
  18 siblings, 0 replies; 25+ messages in thread
From: david @ 2012-12-21 13:08 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

We test quotes seperately because they matter to the query escaper.
---
 test/tagging |   66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/test/tagging b/test/tagging
index b454d38..18f532e 100755
--- a/test/tagging
+++ b/test/tagging
@@ -134,6 +134,72 @@ EOF
 notmuch restore --format=batch-tag < BACKUP
 test_expect_equal_file EXPECTED OUTPUT
 
+test_begin_subtest '--batch: tags with quotes'
+notmuch dump --format=batch-tag > BACKUP
+
+notmuch tag --batch <<EOF
++%22%27%22%27%22%22%27%27 -- One
+-%22%27%22%27%22%22%27%27 -- One
++%22%27%22%22%22%27 -- One
++%22%27%22%27%22%22%27%27 -- Two
+EOF
+
+cat <<EOF > EXPECTED
++%22%27%22%22%22%27 +inbox +tag5 +unread -- id:msg-001@notmuch-test-suite
++%22%27%22%27%22%22%27%27 +inbox +tag4 +tag5 +unread -- id:msg-002@notmuch-test-suite
+EOF
+
+notmuch dump --format=batch-tag | sort > OUTPUT
+notmuch restore --format=batch-tag < BACKUP
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest '--batch: tags with punctuation and space'
+notmuch dump --format=batch-tag > BACKUP
+
+notmuch tag --batch <<EOF
++%21@%23%24%25%5e%26%2a%29-_=+%5b%7b%5c%20%7c%3b%3a%27%20%22,.%3c%60%7e -- One
+-%21@%23%24%25%5e%26%2a%29-_=+%5b%7b%5c%20%7c%3b%3a%27%20%22,.%3c%60%7e -- One
++%21@%23%24%25%5e%26%2a%29-_=+%5b%7b%5c%20%7c%3b%3a%27%20%22,.%3c%20%60%7e -- Two
+-%21@%23%24%25%5e%26%2a%29-_=+%5b%7b%5c%20%7c%3b%3a%27%20%22,.%3c%20%60%7e -- Two
++%21@%23%20%24%25%5e%26%2a%29-_=+%5b%7b%5c%20%7c%3b%3a%27%20%22,.%3c%60%7e -- One
++%21@%23%20%24%25%5e%26%2a%29-_=+%5b%7b%5c%20%7c%3b%3a%27%20%22,.%3c%60%7e -- Two
+EOF
+
+cat <<EOF > EXPECTED
++%21@%23%20%24%25%5e%26%2a%29-_=+%5b%7b%5c%20%7c%3b%3a%27%20%22,.%3c%60%7e +inbox +tag4 +tag5 +unread -- id:msg-002@notmuch-test-suite
++%21@%23%20%24%25%5e%26%2a%29-_=+%5b%7b%5c%20%7c%3b%3a%27%20%22,.%3c%60%7e +inbox +tag5 +unread -- id:msg-001@notmuch-test-suite
+EOF
+
+notmuch dump --format=batch-tag | sort > OUTPUT
+notmuch restore --format=batch-tag < BACKUP
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest '--batch: unicode tags'
+notmuch dump --format=batch-tag > BACKUP
+
+notmuch tag --batch <<EOF
++%2a@%7d%cf%b5%f4%85%80%adO3%da%a7 -- One
++=%e0%ac%95%c8%b3+%ef%aa%95%c8%a64w%c7%9d%c9%a2%cf%b3%d6%82%24B%c4%a9%c5%a1UX%ee%99%b0%27E7%ca%a4%d0%8b%5d -- One
++A%e1%a0%bc%de%8b%d5%b2V%d9%9b%f3%b5%a2%a3M%d8%a1u@%f0%a0%ac%948%7e%f0%ab%86%af%27 -- One
++R -- One
++%da%88=f%cc%b9I%ce%af%7b%c9%97%e3%b9%8bH%cb%92X%d2%8c6 -- One
++%dc%9crh%d2%86B%e5%97%a2%22t%ed%99%82d -- One
++L%df%85%ef%a1%a5m@%d3%96%c2%ab%d4%9f%ca%b8%f3%b3%a2%bf%c7%b1_u%d7%b4%c7%b1 -- One
++P%c4%98%2f -- One
++%7e%d1%8b%25%ec%a0%ae%d1%a0M%3b%e3%b6%b7%e9%a4%87%3c%db%9a%cc%a8%e1%96%9d -- One
++%c4%bf7%c7%ab9H%c4%99k%ea%91%bd%c3%8ck%e2%b3%8dk%c5%952V%e4%99%b2%d9%b3%e4%8b%bda%5b%24%c7%9b -- One
++%2a@%7d%cf%b5%f4%85%80%adO3%da%a7  +=%e0%ac%95%c8%b3+%ef%aa%95%c8%a64w%c7%9d%c9%a2%cf%b3%d6%82%24B%c4%a9%c5%a1UX%ee%99%b0%27E7%ca%a4%d0%8b%5d  +A%e1%a0%bc%de%8b%d5%b2V%d9%9b%f3%b5%a2%a3M%d8%a1u@%f0%a0%ac%948%7e%f0%ab%86%af%27  +R  +%da%88=f%cc%b9I%ce%af%7b%c9%97%e3%b9%8bH%cb%92X%d2%8c6  +%dc%9crh%d2%86B%e5%97%a2%22t%ed%99%82d  +L%df%85%ef%a1%a5m@%d3%96%c2%ab%d4%9f%ca%b8%f3%b3%a2%bf%c7%b1_u%d7%b4%c7%b1  +P%c4%98%2f  +%7e%d1%8b%25%ec%a0%ae%d1%a0M%3b%e3%b6%b7%e9%a4%87%3c%db%9a%cc%a8%e1%96%9d  +%c4%bf7%c7%ab9H%c4%99k%ea%91%bd%c3%8ck%e2%b3%8dk%c5%952V%e4%99%b2%d9%b3%e4%8b%bda%5b%24%c7%9b -- Two
+EOF
+
+cat <<EOF > EXPECTED
++%2a@%7d%cf%b5%f4%85%80%adO3%da%a7 +=%e0%ac%95%c8%b3+%ef%aa%95%c8%a64w%c7%9d%c9%a2%cf%b3%d6%82%24B%c4%a9%c5%a1UX%ee%99%b0%27E7%ca%a4%d0%8b%5d +A%e1%a0%bc%de%8b%d5%b2V%d9%9b%f3%b5%a2%a3M%d8%a1u@%f0%a0%ac%948%7e%f0%ab%86%af%27 +L%df%85%ef%a1%a5m@%d3%96%c2%ab%d4%9f%ca%b8%f3%b3%a2%bf%c7%b1_u%d7%b4%c7%b1 +P%c4%98%2f +R +inbox +tag4 +tag5 +unread +%7e%d1%8b%25%ec%a0%ae%d1%a0M%3b%e3%b6%b7%e9%a4%87%3c%db%9a%cc%a8%e1%96%9d +%c4%bf7%c7%ab9H%c4%99k%ea%91%bd%c3%8ck%e2%b3%8dk%c5%952V%e4%99%b2%d9%b3%e4%8b%bda%5b%24%c7%9b +%da%88=f%cc%b9I%ce%af%7b%c9%97%e3%b9%8bH%cb%92X%d2%8c6 +%dc%9crh%d2%86B%e5%97%a2%22t%ed%99%82d -- id:msg-002@notmuch-test-suite
++%2a@%7d%cf%b5%f4%85%80%adO3%da%a7 +=%e0%ac%95%c8%b3+%ef%aa%95%c8%a64w%c7%9d%c9%a2%cf%b3%d6%82%24B%c4%a9%c5%a1UX%ee%99%b0%27E7%ca%a4%d0%8b%5d +A%e1%a0%bc%de%8b%d5%b2V%d9%9b%f3%b5%a2%a3M%d8%a1u@%f0%a0%ac%948%7e%f0%ab%86%af%27 +L%df%85%ef%a1%a5m@%d3%96%c2%ab%d4%9f%ca%b8%f3%b3%a2%bf%c7%b1_u%d7%b4%c7%b1 +P%c4%98%2f +R +inbox +tag5 +unread +%7e%d1%8b%25%ec%a0%ae%d1%a0M%3b%e3%b6%b7%e9%a4%87%3c%db%9a%cc%a8%e1%96%9d +%c4%bf7%c7%ab9H%c4%99k%ea%91%bd%c3%8ck%e2%b3%8dk%c5%952V%e4%99%b2%d9%b3%e4%8b%bda%5b%24%c7%9b +%da%88=f%cc%b9I%ce%af%7b%c9%97%e3%b9%8bH%cb%92X%d2%8c6 +%dc%9crh%d2%86B%e5%97%a2%22t%ed%99%82d -- id:msg-001@notmuch-test-suite
+EOF
+
+notmuch dump --format=batch-tag | sort > OUTPUT
+notmuch restore --format=batch-tag < BACKUP
+test_expect_equal_file EXPECTED OUTPUT
+
 test_expect_code 1 "Empty tag names" 'notmuch tag + One'
 
 test_expect_code 1 "Tag name beginning with -" 'notmuch tag +- One'
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [Patch v8 13/18] test/tagging: add test for exotic message-ids and batch tagging
  2012-12-21 13:08 [Patch v8 01/18] parse_tag_line: use enum for return value david
                   ` (10 preceding siblings ...)
  2012-12-21 13:08 ` [Patch v8 12/18] test/tagging: add tests for exotic tags david
@ 2012-12-21 13:08 ` david
  2012-12-21 13:08 ` [Patch v8 14/18] test/tagging: add test for compound queries with " david
                   ` (6 subsequent siblings)
  18 siblings, 0 replies; 25+ messages in thread
From: david @ 2012-12-21 13:08 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

The (now fixed) bug that this test revealed is that unquoted
message-ids with whitespace or other control characters in them are
split into several tokens by the Xapian query parser.
---
 test/tagging |   18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/test/tagging b/test/tagging
index 18f532e..1d59af0 100755
--- a/test/tagging
+++ b/test/tagging
@@ -200,6 +200,24 @@ notmuch dump --format=batch-tag | sort > OUTPUT
 notmuch restore --format=batch-tag < BACKUP
 test_expect_equal_file EXPECTED OUTPUT
 
+test_begin_subtest '--batch: unicode message-ids'
+
+${TEST_DIRECTORY}/random-corpus --config-path=${NOTMUCH_CONFIG} \
+     --num-messages=100
+
+notmuch dump --format=batch-tag | sed 's/^.* -- /+common_tag -- /' | \
+    sort > EXPECTED
+
+notmuch dump --format=batch-tag | sed 's/^.* -- /  -- /' | \
+    notmuch restore --format=batch-tag
+
+notmuch tag --batch < EXPECTED
+
+notmuch dump --format=batch-tag| \
+    sort > OUTPUT
+
+test_expect_equal_file EXPECTED OUTPUT
+
 test_expect_code 1 "Empty tag names" 'notmuch tag + One'
 
 test_expect_code 1 "Tag name beginning with -" 'notmuch tag +- One'
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [Patch v8 14/18] test/tagging: add test for compound queries with batch tagging
  2012-12-21 13:08 [Patch v8 01/18] parse_tag_line: use enum for return value david
                   ` (11 preceding siblings ...)
  2012-12-21 13:08 ` [Patch v8 13/18] test/tagging: add test for exotic message-ids and batch tagging david
@ 2012-12-21 13:08 ` david
  2012-12-21 13:08 ` [Patch v8 15/18] notmuch-tag.1: tidy synopsis formatting, reference david
                   ` (5 subsequent siblings)
  18 siblings, 0 replies; 25+ messages in thread
From: david @ 2012-12-21 13:08 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

This is to watch for errors in quoting the query.
---
 test/tagging |   28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/test/tagging b/test/tagging
index 1d59af0..88cd18b 100755
--- a/test/tagging
+++ b/test/tagging
@@ -174,6 +174,34 @@ notmuch dump --format=batch-tag | sort > OUTPUT
 notmuch restore --format=batch-tag < BACKUP
 test_expect_equal_file EXPECTED OUTPUT
 
+test_begin_subtest '--batch: compound queries'
+notmuch dump --format=batch-tag > BACKUP
+
+notmuch tag --batch <<EOF
+# match nothing
++compound1 -- One and Two
+# match everything
++compound2 -- One or Two
++tag%20with%20spaces -- *
+# redundant compound query
++compound3 -- id:msg-002@notmuch-test-suite and Two
+# check for prefix in second token
++compound4 -- Two and id:msg-002@notmuch-test-suite
+# both have prefixes
+-unread -- tag:tag%20with%20spaces and id:msg-002@notmuch-test-suite
+# remove from only msg-001
+-tag%20with%20spaces -- tag:unread and tag:compound2
+EOF
+
+cat <<EOF > EXPECTED
++compound2 +compound3 +compound4 +inbox +tag%20with%20spaces +tag4 +tag5 -- id:msg-002@notmuch-test-suite
++compound2 +inbox +tag5 +unread -- id:msg-001@notmuch-test-suite
+EOF
+
+notmuch dump --format=batch-tag | sort > OUTPUT
+notmuch restore --format=batch-tag < BACKUP
+test_expect_equal_file EXPECTED OUTPUT
+
 test_begin_subtest '--batch: unicode tags'
 notmuch dump --format=batch-tag > BACKUP
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [Patch v8 15/18] notmuch-tag.1: tidy synopsis formatting, reference
  2012-12-21 13:08 [Patch v8 01/18] parse_tag_line: use enum for return value david
                   ` (12 preceding siblings ...)
  2012-12-21 13:08 ` [Patch v8 14/18] test/tagging: add test for compound queries with " david
@ 2012-12-21 13:08 ` david
  2012-12-21 13:08 ` [Patch v8 16/18] man: document notmuch tag --batch, --input options david
                   ` (4 subsequent siblings)
  18 siblings, 0 replies; 25+ messages in thread
From: david @ 2012-12-21 13:08 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

Consistently use [...]; one less space. Use singular <search-term>
---
 man/man1/notmuch-tag.1 |    7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/man/man1/notmuch-tag.1 b/man/man1/notmuch-tag.1
index 0f86582..9444aa4 100644
--- a/man/man1/notmuch-tag.1
+++ b/man/man1/notmuch-tag.1
@@ -4,20 +4,21 @@ notmuch-tag \- add/remove tags for all messages matching the search terms
 
 .SH SYNOPSIS
 .B notmuch tag
-.RI  "+<" tag> "|\-<" tag "> [...] [\-\-] <" search-term ">..."
+.RI "+<" tag ">|\-<" tag "> [...] [\-\-] <" search-term "> [...]"
 
 .SH DESCRIPTION
 
 Add/remove tags for all messages matching the search terms.
 
 See \fBnotmuch-search-terms\fR(7)
-for details of the supported syntax for <search-terms>.
+for details of the supported syntax for
+.RI < search-term >.
 
 Tags prefixed by '+' are added while those prefixed by '\-' are
 removed. For each message, tag removal is performed before tag
 addition.
 
-The beginning of <search-terms> is recognized by the first
+The beginning of the search terms is recognized by the first
 argument that begins with neither '+' nor '\-'. Support for
 an initial search term beginning with '+' or '\-' is provided
 by allowing the user to specify a "\-\-" argument to separate
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [Patch v8 16/18] man: document notmuch tag --batch, --input options
  2012-12-21 13:08 [Patch v8 01/18] parse_tag_line: use enum for return value david
                   ` (13 preceding siblings ...)
  2012-12-21 13:08 ` [Patch v8 15/18] notmuch-tag.1: tidy synopsis formatting, reference david
@ 2012-12-21 13:08 ` david
  2012-12-21 13:08 ` [Patch v8 17/18] test/tagging: add test for naked punctuation in tags; compare with quoting spaces david
                   ` (3 subsequent siblings)
  18 siblings, 0 replies; 25+ messages in thread
From: david @ 2012-12-21 13:08 UTC (permalink / raw)
  To: notmuch

From: Jani Nikula <jani@nikula.org>

---
 man/man1/notmuch-tag.1 |   76 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)

diff --git a/man/man1/notmuch-tag.1 b/man/man1/notmuch-tag.1
index 9444aa4..2459740 100644
--- a/man/man1/notmuch-tag.1
+++ b/man/man1/notmuch-tag.1
@@ -6,6 +6,11 @@ notmuch-tag \- add/remove tags for all messages matching the search terms
 .B notmuch tag
 .RI "+<" tag ">|\-<" tag "> [...] [\-\-] <" search-term "> [...]"
 
+.B notmuch tag
+.RI "--batch"
+.RI "[ --input=<" filename "> ]"
+
+
 .SH DESCRIPTION
 
 Add/remove tags for all messages matching the search terms.
@@ -30,6 +35,77 @@ updates the maildir flags according to tag changes if the
 configuration option is enabled. See \fBnotmuch-config\fR(1) for
 details.
 
+Supported options for
+.B tag
+include
+.RS 4
+.TP 4
+.BR \-\-batch
+
+Read batch tagging operations from a file (stdin by default). This is more
+efficient than repeated
+.B notmuch tag
+invocations. See
+.B TAG FILE FORMAT
+below for the input format. This option is not compatible with
+specifying tagging on the command line.
+.RE
+
+.RS 4
+.TP 4
+.BR "\-\-input=" <filename>
+
+Read input from given file, instead of from stdin. Implies
+.BR --batch .
+
+.SH TAG FILE FORMAT
+
+The input must consist of lines of the format:
+
+.RI "+<" tag ">|\-<" tag "> [...] [\-\-] <" search-term "> [...]"
+
+Each line is interpreted similarly to
+.B notmuch tag
+command line arguments. The delimiter is one or more spaces ' '. Any
+characters in
+.RI < tag >
+and
+.RI < search-term >
+.B may
+be hex-encoded with %NN where NN is the hexadecimal value of the
+character. To hex-encode a character with a multi-byte UTF-8 encoding,
+hex-encode each byte.
+Any spaces in <tag> and <search-term>
+.B must
+be hex-encoded as %20.  The ':' indicating a prefix like 'id:' or 'tag:',
+the '*' wildcard for all messages, and any characters that are not
+part of
+.RI  < tag >
+or
+.RI < search-term >
+.B must not
+be hex-encoded.
+
+Leading and trailing space ' ' is ignored. Empty lines and lines
+beginning with '#' are ignored.
+
+.SS EXAMPLE
+
+The following shows a valid input to batch tagging.
+
+.RS
+.nf
++winner *
++foo::bar -- One
++found::it -- tag:foo::bar
+# ignore this line and the next
+
++space%20in%20tags -- Two
++crazy( +tags\ +&are +#possible -- tag:space%20in%20tags
++match*crazy -- tag:crazy(
+.fi
+.RE
+
 .SH SEE ALSO
 
 \fBnotmuch\fR(1), \fBnotmuch-config\fR(1), \fBnotmuch-count\fR(1),
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [Patch v8 17/18] test/tagging: add test for naked punctuation in tags; compare with quoting spaces.
  2012-12-21 13:08 [Patch v8 01/18] parse_tag_line: use enum for return value david
                   ` (14 preceding siblings ...)
  2012-12-21 13:08 ` [Patch v8 16/18] man: document notmuch tag --batch, --input options david
@ 2012-12-21 13:08 ` david
  2012-12-21 13:08 ` [Patch v8 18/18] more man fixup david
                   ` (2 subsequent siblings)
  18 siblings, 0 replies; 25+ messages in thread
From: david @ 2012-12-21 13:08 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

This test also serves as documentation of the quoting
requirements. The comment lines are so that it exactly matches the man
page. Nothing more embarrassing than having an example in the man page
fail.
---
 test/tagging |   23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/test/tagging b/test/tagging
index 88cd18b..e1aefdc 100755
--- a/test/tagging
+++ b/test/tagging
@@ -228,6 +228,29 @@ notmuch dump --format=batch-tag | sort > OUTPUT
 notmuch restore --format=batch-tag < BACKUP
 test_expect_equal_file EXPECTED OUTPUT
 
+test_begin_subtest "--batch: only space _needs_ to be quoted"
+notmuch dump --format=batch-tag > BACKUP
+
+notmuch tag --batch <<EOF
++winner *
++foo::bar -- One
++found::it -- tag:foo::bar
+# ignore this line and the next
+
++space%20in%20tags -- Two
++crazy( +tags\ +&are +#possible -- tag:space%20in%20tags
++match*crazy -- tag:crazy(
+EOF
+
+cat <<EOF > EXPECTED
++%23possible +%26are +crazy%28 +inbox +match%2acrazy +space%20in%20tags +tag4 +tag5 +tags%5c +unread +winner -- id:msg-002@notmuch-test-suite
++foo%3a%3abar +found%3a%3ait +inbox +tag5 +unread +winner -- id:msg-001@notmuch-test-suite
+EOF
+
+notmuch dump --format=batch-tag | sort > OUTPUT
+notmuch restore --format=batch-tag < BACKUP
+test_expect_equal_file EXPECTED OUTPUT
+
 test_begin_subtest '--batch: unicode message-ids'
 
 ${TEST_DIRECTORY}/random-corpus --config-path=${NOTMUCH_CONFIG} \
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [Patch v8 18/18] more man fixup.
  2012-12-21 13:08 [Patch v8 01/18] parse_tag_line: use enum for return value david
                   ` (15 preceding siblings ...)
  2012-12-21 13:08 ` [Patch v8 17/18] test/tagging: add test for naked punctuation in tags; compare with quoting spaces david
@ 2012-12-21 13:08 ` david
  2012-12-21 13:29 ` [Patch v8 01/18] parse_tag_line: use enum for return value David Bremner
  2012-12-22 21:48 ` Jani Nikula
  18 siblings, 0 replies; 25+ messages in thread
From: david @ 2012-12-21 13:08 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

---
 man/man1/notmuch-tag.1 |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/man/man1/notmuch-tag.1 b/man/man1/notmuch-tag.1
index 2459740..d14eeef 100644
--- a/man/man1/notmuch-tag.1
+++ b/man/man1/notmuch-tag.1
@@ -91,7 +91,8 @@ beginning with '#' are ignored.
 
 .SS EXAMPLE
 
-The following shows a valid input to batch tagging.
+The following shows a valid input to batch tagging. Note that only the
+isolated '*' acts as a wildcard.
 
 .RS
 .nf
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [Patch v8 01/18] parse_tag_line: use enum for return value.
  2012-12-21 13:08 [Patch v8 01/18] parse_tag_line: use enum for return value david
                   ` (16 preceding siblings ...)
  2012-12-21 13:08 ` [Patch v8 18/18] more man fixup david
@ 2012-12-21 13:29 ` David Bremner
  2012-12-22 23:47   ` Jani Nikula
  2012-12-22 21:48 ` Jani Nikula
  18 siblings, 1 reply; 25+ messages in thread
From: David Bremner @ 2012-12-21 13:29 UTC (permalink / raw)
  To: notmuch

david@tethera.net writes:

> From: David Bremner <bremner@debian.org>
>
> This is essentially cosmetic, since success=0 is promised by
> the comments in tag-utils.h.

In an amazing display of skill and style, in attempting to abort a git
send-email run so that I could rebase that last fixup away, I managed to
send the series without the cover letter. So, no point sending the whole
thing again just for that.

This ever-growing series obsoletes 

     id:1355492062-7546-1-git-send-email-david@tethera.net

I think I decided to ignore 

      id:871uettxln.fsf@qmul.ac.uk

Perhaps that should be documented, I'm not sure. Since the batch tagging
interface also allows you to remove strangely named tags, I think it is
ok.

Hopefully the new man page clears up what is and isn't allowed for
unencoded characters. Since spaces are the only thing used as
(top-level) delimiters now, they are the only thing "compressed" by
strtok_len.

For id:87vcc2q5n2.fsf@nikula.org, there is a seperate series
id:1355716788-2940-1-git-send-email-david@tethera.net to fix the memory
leak/allocation confusion. This series needs to be rebased onto the
final version of that one. I decided _not_ to relax the requirement that
the ':' marking a prefix be un-encoded, but rather update the
documentation.

A summary of changes to the series follows; I left out the interdiff for
the notmuch-tag man page, and things that got their own new commit.

----------------------------------------------------------------------
commit 30adcab7678296b22d86da06d472c3920c336747
Author: David Bremner <bremner@debian.org>
Date:   Sat Dec 15 15:17:40 2012 -0400

    fixup: clarify TAG_FLAG_ID_ONLY comments and name

diff --git a/notmuch-restore.c b/notmuch-restore.c
index 112f2f3..1b66e76 100644
--- a/notmuch-restore.c
+++ b/notmuch-restore.c
@@ -208,7 +208,7 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
 	if (input_format == DUMP_FORMAT_SUP) {
 	    ret = parse_sup_line (ctx, line, &query_string, tag_ops);
 	} else {
-	    ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS | TAG_FLAG_ID_ONLY,
+	    ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS | TAG_FLAG_ID_DIRECT,
 				  &query_string, tag_ops);
 	}
 
diff --git a/tag-util.c b/tag-util.c
index 8fea76c..37bffd5 100644
--- a/tag-util.c
+++ b/tag-util.c
@@ -201,7 +201,7 @@ parse_tag_line (void *ctx, char *line,
     }
 
     /* tok now points to the query string */
-    if (flags & TAG_FLAG_ID_ONLY) {
+    if (flags & TAG_FLAG_ID_DIRECT) {
 	/* this is under the assumption that any whitespace in the
 	 * message-id must be hex-encoded. The check is probably not
 	 * perfect for exotic unicode whitespace; as fallback the
diff --git a/tag-util.h b/tag-util.h
index 7674051..eec00cf 100644
--- a/tag-util.h
+++ b/tag-util.h
@@ -28,8 +28,10 @@ typedef enum {
      */
     TAG_FLAG_BE_GENEROUS = (1 << 3),
 
-    /* Query consists of a single id:$message-id */
-    TAG_FLAG_ID_ONLY = (1 << 4)
+    /* Directly look up messages by hex-decoded message-id, rather
+     * than parsing a general query. The query MUST be of the form
+     * id:$message-id. */
+    TAG_FLAG_ID_DIRECT = (1 << 4)
 
 } tag_op_flag_t;
 

commit 256ec05a347b949e6b2bda0d2b6902ed8ab3fab3
Author: David Bremner <bremner@debian.org>
Date:   Sat Dec 15 19:54:05 2012 -0400

    fixup for id:87sj778ajb.fsf@qmul.ac.uk and id:87zk1fot39.fsf@nikula.org

diff --git a/notmuch-tag.c b/notmuch-tag.c
index 44b5bb4..5c8bad4 100644
--- a/notmuch-tag.c
+++ b/notmuch-tag.c
@@ -65,14 +65,16 @@ _optimize_tag_query (void *ctx, const char *orig_query_string,
     else
 	query_string = talloc_asprintf (ctx, "( %s ) and (", orig_query_string);
 
-
     /* Boolean terms surrounded by double quotes can contain any
      * character.  Double quotes are quoted by doubling them. */
 
     for (i = 0; i < tag_op_list_size (list) && query_string; i++) {
-	double_quote_str (ctx,
-			  tag_op_list_tag (list, i),
-			  &escaped, &escaped_len);
+	/* XXX in case of OOM, query_string will be deallocated when
+	 * ctx is, which might be at shutdown */
+	if (double_quote_str (ctx,
+			      tag_op_list_tag (list, i),
+			      &escaped, &escaped_len))
+	    return NULL;
 
 	query_string = talloc_asprintf_append_buffer (
 	    query_string, "%s%stag:%s", join,
diff --git a/util/string-util.c b/util/string-util.c
index ea7c25b..b9039f4 100644
--- a/util/string-util.c
+++ b/util/string-util.c
@@ -46,8 +46,11 @@ double_quote_str (void *ctx, const char *str,
     for (in = str; *in; in++)
 	needed += (*in == '"') ? 2 : 1;
 
-    if (needed > *len)
-	*buf = talloc_realloc (ctx, *buf, char, 2*needed);
+    if ((*buf == NULL) || (needed > *len)) {
+	*len = 2 * needed;
+	*buf = talloc_realloc (ctx, *buf, char, *len);
+    }
+
 
     if (! *buf)
 	return 1;
@@ -62,7 +65,7 @@ double_quote_str (void *ctx, const char *str,
 	*out++ = *in++;
     }
     *out++ = '"';
-    *out = 0;
+    *out = '\0';
 
     return 0;
 }
diff --git a/util/string-util.h b/util/string-util.h
index b593bc7..4fc7942 100644
--- a/util/string-util.h
+++ b/util/string-util.h
@@ -23,7 +23,7 @@ char *strtok_len (char *s, const char *delim, size_t *len);
  * Any internal double-quotes are doubled, i.e. a"b -> "a""b"
  *
  * Output is into buf; it may be talloc_realloced
- * return 0 on success, non-zero on failure.
+ * Return: 0 on success, non-zero on memory allocation failure.
  */
 int double_quote_str (void *talloc_ctx, const char *str,
 		      char **buf, size_t *len);

commit 4b26ac6f99c74cced64cfae317e6ac4b6a8f706f
Author: David Bremner <bremner@debian.org>
Date:   Mon Dec 17 23:36:30 2012 -0400

    fixup: rewrite decode+quote function for queries.
    
    Rather than splitting on ':' at the top level, we examine each space
    delimited token for a prefix.

diff --git a/tag-util.c b/tag-util.c
index 37bffd5..b0a846b 100644
--- a/tag-util.c
+++ b/tag-util.c
@@ -56,9 +56,14 @@ illegal_tag (const char *tag, notmuch_bool_t remove)
     return NULL;
 }
 
+/* Input is a hex encoded string, presumed to be a query for Xapian.
+ *
+ * Space delimited tokens are decoded and quoted, with '*' and prefixes
+ * of the form "foo:" passed through unquoted.
+ */
 static tag_parse_status_t
-quote_and_decode_query (void *ctx, char *encoded, const char *line_for_error,
-			char **query_string)
+unhex_and_quote (void *ctx, char *encoded, const char *line_for_error,
+		 char **query_string)
 {
     char *tok = encoded;
     size_t tok_len = 0;
@@ -68,14 +73,43 @@ quote_and_decode_query (void *ctx, char *encoded, const char *line_for_error,
 
     *query_string = talloc_strdup (ctx, "");
 
-    while (*query_string &&
-	   (tok = strtok_len (tok + tok_len, ": ", &tok_len)) != NULL) {
-	char delim = tok[tok_len];
+    while ((tok = strtok_len (tok + tok_len, " ", &tok_len)) != NULL) {
+
+	size_t prefix_len;
+	char delim = *(tok + tok_len);
 
 	*(tok + tok_len++) = '\0';
 
-	if (strcspn (tok, "%") < tok_len - 1) {
-	    /* something to decode */
+	prefix_len = hex_invariant (tok, tok_len);
+
+	if ((strcmp (tok, "*") == 0) || prefix_len >= tok_len - 1) {
+
+	    /* pass some things through without quoting or decoding.
+	     * Note for '*' this is mandatory.
+	     */
+
+	    if (! (*query_string = talloc_asprintf_append_buffer (
+		       *query_string, "%s%c", tok, delim))) {
+
+		ret = line_error (TAG_PARSE_OUT_OF_MEMORY,
+				  line_for_error, "aborting");
+		goto DONE;
+	    }
+
+	} else {
+	    /* potential prefix: one for ':', then something after */
+	    if ((tok_len - prefix_len > 2) && *(tok + prefix_len) == ':') {
+		if (! (*query_string = talloc_strndup_append (*query_string,
+							      tok,
+							      prefix_len + 1))) {
+		    ret = line_error (TAG_PARSE_OUT_OF_MEMORY,
+				      line_for_error, "aborting");
+		    goto DONE;
+		}
+		tok += prefix_len + 1;
+		tok_len -= prefix_len + 1;
+	    }
+
 	    if (hex_decode_inplace (tok) != HEX_SUCCESS) {
 		ret = line_error (TAG_PARSE_INVALID, line_for_error,
 				  "hex decoding of token '%s' failed", tok);
@@ -87,17 +121,14 @@ quote_and_decode_query (void *ctx, char *encoded, const char *line_for_error,
 				  line_for_error, "aborting");
 		goto DONE;
 	    }
-	    *query_string = talloc_asprintf_append_buffer (
-		*query_string, "%s%c", buf, delim);
 
-	} else {
-	    /* This is not just an optimization, but used to preserve
-	     * prefixes like id:, which cannot be quoted.
-	     */
-	    *query_string = talloc_asprintf_append_buffer (
-		*query_string, "%s%c", tok, delim);
+	    if (! (*query_string = talloc_asprintf_append_buffer (
+		       *query_string, "%s%c", buf, delim))) {
+		ret = line_error (TAG_PARSE_OUT_OF_MEMORY,
+				  line_for_error, "aborting");
+		goto DONE;
+	    }
 	}
-
     }
 
   DONE:
@@ -220,7 +251,7 @@ parse_tag_line (void *ctx, char *line,
 	/* skip 'id:' */
 	*query_string = tok + 3;
     } else {
-	ret = quote_and_decode_query (ctx, tok, line_for_error, query_string);
+	ret = unhex_and_quote (ctx, tok, line_for_error, query_string);
     }
 
   DONE:

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [Patch v8 01/18] parse_tag_line: use enum for return value.
  2012-12-21 13:08 [Patch v8 01/18] parse_tag_line: use enum for return value david
                   ` (17 preceding siblings ...)
  2012-12-21 13:29 ` [Patch v8 01/18] parse_tag_line: use enum for return value David Bremner
@ 2012-12-22 21:48 ` Jani Nikula
  18 siblings, 0 replies; 25+ messages in thread
From: Jani Nikula @ 2012-12-22 21:48 UTC (permalink / raw)
  To: david, notmuch; +Cc: David Bremner

On Fri, 21 Dec 2012, david@tethera.net wrote:
> From: David Bremner <bremner@debian.org>
>
> This is essentially cosmetic, since success=0 is promised by
> the comments in tag-utils.h.

Squash this on top for completeness:

diff --git a/tag-util.c b/tag-util.c
index 12aab08..17d7ac2 100644
--- a/tag-util.c
+++ b/tag-util.c
@@ -47,7 +47,7 @@ parse_tag_line (void *ctx, char *line,
     line_for_error = talloc_strdup (ctx, line);
     if (line_for_error == NULL) {
 	fprintf (stderr, "Error: out of memory\n");
-	return -1;
+	return TAG_PARSE_OUT_OF_MEMORY;
     }
 
     /* remove leading space */

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [Patch v8 09/18] cli: add support for batch tagging operations to "notmuch tag"
  2012-12-21 13:08 ` [Patch v8 09/18] cli: add support for batch tagging operations to "notmuch tag" david
@ 2012-12-22 22:51   ` Jani Nikula
  0 siblings, 0 replies; 25+ messages in thread
From: Jani Nikula @ 2012-12-22 22:51 UTC (permalink / raw)
  To: david, notmuch

On Fri, 21 Dec 2012, david@tethera.net wrote:
> From: Jani Nikula <jani@nikula.org>
>
> Add support for batch tagging operations through stdin to "notmuch
> tag". This can be enabled with the new --batch command line option to
> "notmuch tag". The input must consist of lines of the format:
>
> +<tag>|-<tag> [...] [--] <search-term> [...]
>
> Each line is interpreted similarly to "notmuch tag" command line
> arguments. The delimiter is one or more spaces ' '. Any characters in
> <tag> and <search-term> MAY be hex encoded with %NN where NN is the
> hexadecimal value of the character. Any ' ' and '%' characters in
> <tag> and <search-term> MUST be hex encoded (using %20 and %25,
> respectively). Any characters that are not part of <tag> or
> <search-term> MUST NOT be hex encoded.
>
> Leading and trailing space ' ' is ignored. Empty lines and lines
> beginning with '#' are ignored.
>
> Signed-off-by: Jani Nikula <jani@nikula.org>
>
> Hacked-like-crazy-by: David Bremner <david@tethera.net>
> ---
>  notmuch-tag.c |   91 ++++++++++++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 83 insertions(+), 8 deletions(-)
>
> diff --git a/notmuch-tag.c b/notmuch-tag.c
> index a480215..5c8bad4 100644
> --- a/notmuch-tag.c
> +++ b/notmuch-tag.c
> @@ -132,6 +132,46 @@ tag_query (void *ctx, notmuch_database_t *notmuch, const char *query_string,
>      return interrupted;
>  }
>  
> +static int
> +tag_file (void *ctx, notmuch_database_t *notmuch, tag_op_flag_t flags,
> +	  FILE *input)
> +{
> +    char *line = NULL;
> +    char *query_string = NULL;
> +    size_t line_size = 0;
> +    ssize_t line_len;
> +    int ret = 0;
> +    tag_op_list_t *tag_ops;
> +
> +    tag_ops = tag_op_list_create (ctx);
> +    if (tag_ops == NULL) {
> +	fprintf (stderr, "Out of memory.\n");
> +	return 1;
> +    }
> +
> +    while ((line_len = getline (&line, &line_size, input)) != -1 &&
> +	   ! interrupted) {
> +
> +	ret = parse_tag_line (ctx, line, TAG_FLAG_NONE,
> +			      &query_string, tag_ops);
> +
> +	if (ret > 0)
> +	    continue;
> +
> +	if (ret < 0)
> +	    break;
> +
> +	ret = tag_query (ctx, notmuch, query_string, tag_ops, flags);
> +	if (ret)
> +	    break;
> +    }
> +
> +    if (line)
> +	free (line);
> +
> +    return ret;
> +}
> +
>  int
>  notmuch_tag_command (void *ctx, int argc, char *argv[])
>  {
> @@ -141,6 +181,10 @@ notmuch_tag_command (void *ctx, int argc, char *argv[])
>      notmuch_database_t *notmuch;
>      struct sigaction action;
>      tag_op_flag_t tag_flags = TAG_FLAG_NONE;
> +    notmuch_bool_t batch = FALSE;
> +    FILE *input = stdin;
> +    char *input_file_name = NULL;
> +    int opt_index;
>      int ret = 0;
>  
>      /* Setup our handler for SIGINT */
> @@ -150,15 +194,43 @@ notmuch_tag_command (void *ctx, int argc, char *argv[])
>      action.sa_flags = SA_RESTART;
>      sigaction (SIGINT, &action, NULL);
>  
> -    tag_ops = tag_op_list_create (ctx);
> -    if (tag_ops == NULL) {
> -	fprintf (stderr, "Out of memory.\n");
> +    notmuch_opt_desc_t options[] = {
> +	{ NOTMUCH_OPT_BOOLEAN, &batch, "batch", 0, 0 },
> +	{ NOTMUCH_OPT_STRING, &input_file_name, "input", 'i', 0 },
> +	{ 0, 0, 0, 0, 0 }
> +    };
> +
> +    opt_index = parse_arguments (argc, argv, options, 1);
> +    if (opt_index < 0)
>  	return 1;
> +
> +    if (input_file_name) {
> +	batch = TRUE;
> +	input = fopen (input_file_name, "r");
> +	if (input == NULL) {
> +	    fprintf (stderr, "Error opening %s for reading: %s\n",
> +		     input_file_name, strerror (errno));
> +	    return 1;
> +	}
>      }
>  
> -    if (parse_tag_command_line (ctx, argc - 1, argv + 1,
> -				&query_string, tag_ops))
> -	return 1;
> +    if (batch) {
> +	if (opt_index != argc) {
> +	    fprintf (stderr, "Can't specify both cmdline and stdin!\n");
> +	    return 1;
> +	}
> +    } else {
> +
> +	tag_ops = tag_op_list_create (ctx);
> +	if (tag_ops == NULL) {
> +	    fprintf (stderr, "Out of memory.\n");
> +	    return 1;
> +	}
> +
> +	if (parse_tag_command_line (ctx, argc - opt_index, argv + opt_index,
> +				    &query_string, tag_ops))
> +	    return 1;
> +    }
>  
>      config = notmuch_config_open (ctx, NULL, NULL);
>      if (config == NULL)
> @@ -171,9 +243,12 @@ notmuch_tag_command (void *ctx, int argc, char *argv[])
>      if (notmuch_config_get_maildir_synchronize_flags (config))
>  	tag_flags |= TAG_FLAG_MAILDIR_SYNC;
>  
> -    ret = tag_query (ctx, notmuch, query_string, tag_ops, tag_flags);
> +    if (batch)
> +	ret = tag_file (ctx, notmuch, tag_flags, input);
> +    else
> +	ret = tag_query (ctx, notmuch, query_string, tag_ops, tag_flags);
>  
>      notmuch_database_destroy (notmuch);

Nitpick, missing fclose if input != stdin.

BR,
Jani.

>  
> -    return ret;
> +    return ret || interrupted;
>  }
> -- 
> 1.7.10.4

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Patch v8 07/18] unhex_and_quote: new function to quote hex-decoded queries
  2012-12-21 13:08 ` [Patch v8 07/18] unhex_and_quote: new function to quote hex-decoded queries david
@ 2012-12-22 23:36   ` Jani Nikula
  2012-12-23  2:59     ` [PATCH] simplify unhex_and_quote david
  0 siblings, 1 reply; 25+ messages in thread
From: Jani Nikula @ 2012-12-22 23:36 UTC (permalink / raw)
  To: david, notmuch; +Cc: David Bremner

On Fri, 21 Dec 2012, david@tethera.net wrote:
> From: David Bremner <bremner@debian.org>
>
> Space delimited tokens are hex decoded and then quoted according to
> Xapian rules. Prefixes and '*' are passed through unquoted, as is
> anything that hex-decoding would not change.
> ---
>  tag-util.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 81 insertions(+)
>
> diff --git a/tag-util.c b/tag-util.c
> index f89669a..46aab4e 100644
> --- a/tag-util.c
> +++ b/tag-util.c
> @@ -56,6 +56,87 @@ illegal_tag (const char *tag, notmuch_bool_t remove)
>      return NULL;
>  }
>  
> +/* Input is a hex encoded string, presumed to be a query for Xapian.
> + *
> + * Space delimited tokens are decoded and quoted, with '*' and prefixes
> + * of the form "foo:" passed through unquoted.
> + */
> +static tag_parse_status_t
> +unhex_and_quote (void *ctx, char *encoded, const char *line_for_error,
> +		 char **query_string)
> +{
> +    char *tok = encoded;
> +    size_t tok_len = 0;
> +    char *buf = NULL;
> +    size_t buf_len = 0;
> +    tag_parse_status_t ret = TAG_PARSE_SUCCESS;
> +
> +    *query_string = talloc_strdup (ctx, "");
> +
> +    while ((tok = strtok_len (tok + tok_len, " ", &tok_len)) != NULL) {
> +
> +	size_t prefix_len;
> +	char delim = *(tok + tok_len);
> +
> +	*(tok + tok_len++) = '\0';

You need to do tok_len++ to satisfy the next round of strtok_len, but I
think for clarity of the code below you should move tok_len++ to the end
of the while block. And review tok_len usage below.

> +
> +	prefix_len = hex_invariant (tok, tok_len);

This, along with the doc comment "...initial segment of str that would
not be changed by hex encoding..." for hex_invariant, was the hardest
bit to understand. I don't follow how hex *encoding* matters here; the
input is only affected by hex *decoding*, and that depends on %NN only.

Should this be a function that counts the length of the initial segment
of str that consists of valid Xapian prefix characters and does not
contain % (I don't know if that's included in Xapian prefix
characters). In the end, the contents of the function may be (I don't
know) exactly the same as hex_invariant, but the function name would be
more self explanatory.

Does that make any sense to you...?

> +
> +	if ((strcmp (tok, "*") == 0) || prefix_len >= tok_len - 1) {

With the tok_len++ at the end, I think this should have "prefix_len ==
tok_len" for clarity.

> +
> +	    /* pass some things through without quoting or decoding.
> +	     * Note for '*' this is mandatory.
> +	     */
> +
> +	    if (! (*query_string = talloc_asprintf_append_buffer (
> +		       *query_string, "%s%c", tok, delim))) {
> +
> +		ret = line_error (TAG_PARSE_OUT_OF_MEMORY,
> +				  line_for_error, "aborting");
> +		goto DONE;
> +	    }
> +
> +	} else {
> +	    /* potential prefix: one for ':', then something after */
> +	    if ((tok_len - prefix_len > 2) && *(tok + prefix_len) == ':') {

I don't think this takes into account the tok_len++. So this should stay
as it is if you move tok_len++ to the end.

> +		if (! (*query_string = talloc_strndup_append (*query_string,
> +							      tok,
> +							      prefix_len + 1))) {
> +		    ret = line_error (TAG_PARSE_OUT_OF_MEMORY,
> +				      line_for_error, "aborting");
> +		    goto DONE;
> +		}
> +		tok += prefix_len + 1;
> +		tok_len -= prefix_len + 1;
> +	    }
> +
> +	    if (hex_decode_inplace (tok) != HEX_SUCCESS) {
> +		ret = line_error (TAG_PARSE_INVALID, line_for_error,
> +				  "hex decoding of token '%s' failed", tok);
> +		goto DONE;
> +	    }
> +
> +	    if (double_quote_str (ctx, tok, &buf, &buf_len)) {
> +		ret = line_error (TAG_PARSE_OUT_OF_MEMORY,
> +				  line_for_error, "aborting");
> +		goto DONE;
> +	    }
> +
> +	    if (! (*query_string = talloc_asprintf_append_buffer (
> +		       *query_string, "%s%c", buf, delim))) {
> +		ret = line_error (TAG_PARSE_OUT_OF_MEMORY,
> +				  line_for_error, "aborting");
> +		goto DONE;
> +	    }
> +	}

I think tok_len++ should be here.

BR,
Jani.

> +    }
> +
> +  DONE:
> +    if (ret != TAG_PARSE_SUCCESS && *query_string)
> +	talloc_free (*query_string);
> +    return ret;
> +}
> +
>  tag_parse_status_t
>  parse_tag_line (void *ctx, char *line,
>  		tag_op_flag_t flags,
> -- 
> 1.7.10.4
>
> _______________________________________________
> notmuch mailing list
> notmuch@notmuchmail.org
> http://notmuchmail.org/mailman/listinfo/notmuch

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Patch v8 01/18] parse_tag_line: use enum for return value.
  2012-12-21 13:29 ` [Patch v8 01/18] parse_tag_line: use enum for return value David Bremner
@ 2012-12-22 23:47   ` Jani Nikula
  0 siblings, 0 replies; 25+ messages in thread
From: Jani Nikula @ 2012-12-22 23:47 UTC (permalink / raw)
  To: David Bremner, notmuch

On Fri, 21 Dec 2012, David Bremner <david@tethera.net> wrote:
> david@tethera.net writes:
>
>> From: David Bremner <bremner@debian.org>
>>
>> This is essentially cosmetic, since success=0 is promised by
>> the comments in tag-utils.h.
>
> In an amazing display of skill and style, in attempting to abort a git
> send-email run so that I could rebase that last fixup away, I managed to
> send the series without the cover letter. So, no point sending the whole
> thing again just for that.
>
> This ever-growing series obsoletes 
>
>      id:1355492062-7546-1-git-send-email-david@tethera.net
>
> I think I decided to ignore 
>
>       id:871uettxln.fsf@qmul.ac.uk
>
> Perhaps that should be documented, I'm not sure. Since the batch tagging
> interface also allows you to remove strangely named tags, I think it is
> ok.
>
> Hopefully the new man page clears up what is and isn't allowed for
> unencoded characters. Since spaces are the only thing used as
> (top-level) delimiters now, they are the only thing "compressed" by
> strtok_len.
>
> For id:87vcc2q5n2.fsf@nikula.org, there is a seperate series
> id:1355716788-2940-1-git-send-email-david@tethera.net to fix the memory
> leak/allocation confusion. This series needs to be rebased onto the
> final version of that one. I decided _not_ to relax the requirement that
> the ':' marking a prefix be un-encoded, but rather update the
> documentation.

I think the series looks good, apart from the clarifications I asked for
in [1], and the rebase on the other series. Thanks for your persistence
in following through with this. It has proven to be much more work than
I envisioned when I sent the first patches.

BR,
Jani.


[1] id:87txrdhd7g.fsf@oiva.home.nikula.org


>
> A summary of changes to the series follows; I left out the interdiff for
> the notmuch-tag man page, and things that got their own new commit.
>
> ----------------------------------------------------------------------
> commit 30adcab7678296b22d86da06d472c3920c336747
> Author: David Bremner <bremner@debian.org>
> Date:   Sat Dec 15 15:17:40 2012 -0400
>
>     fixup: clarify TAG_FLAG_ID_ONLY comments and name
>
> diff --git a/notmuch-restore.c b/notmuch-restore.c
> index 112f2f3..1b66e76 100644
> --- a/notmuch-restore.c
> +++ b/notmuch-restore.c
> @@ -208,7 +208,7 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
>  	if (input_format == DUMP_FORMAT_SUP) {
>  	    ret = parse_sup_line (ctx, line, &query_string, tag_ops);
>  	} else {
> -	    ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS | TAG_FLAG_ID_ONLY,
> +	    ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS | TAG_FLAG_ID_DIRECT,
>  				  &query_string, tag_ops);
>  	}
>  
> diff --git a/tag-util.c b/tag-util.c
> index 8fea76c..37bffd5 100644
> --- a/tag-util.c
> +++ b/tag-util.c
> @@ -201,7 +201,7 @@ parse_tag_line (void *ctx, char *line,
>      }
>  
>      /* tok now points to the query string */
> -    if (flags & TAG_FLAG_ID_ONLY) {
> +    if (flags & TAG_FLAG_ID_DIRECT) {
>  	/* this is under the assumption that any whitespace in the
>  	 * message-id must be hex-encoded. The check is probably not
>  	 * perfect for exotic unicode whitespace; as fallback the
> diff --git a/tag-util.h b/tag-util.h
> index 7674051..eec00cf 100644
> --- a/tag-util.h
> +++ b/tag-util.h
> @@ -28,8 +28,10 @@ typedef enum {
>       */
>      TAG_FLAG_BE_GENEROUS = (1 << 3),
>  
> -    /* Query consists of a single id:$message-id */
> -    TAG_FLAG_ID_ONLY = (1 << 4)
> +    /* Directly look up messages by hex-decoded message-id, rather
> +     * than parsing a general query. The query MUST be of the form
> +     * id:$message-id. */
> +    TAG_FLAG_ID_DIRECT = (1 << 4)
>  
>  } tag_op_flag_t;
>  
>
> commit 256ec05a347b949e6b2bda0d2b6902ed8ab3fab3
> Author: David Bremner <bremner@debian.org>
> Date:   Sat Dec 15 19:54:05 2012 -0400
>
>     fixup for id:87sj778ajb.fsf@qmul.ac.uk and id:87zk1fot39.fsf@nikula.org
>
> diff --git a/notmuch-tag.c b/notmuch-tag.c
> index 44b5bb4..5c8bad4 100644
> --- a/notmuch-tag.c
> +++ b/notmuch-tag.c
> @@ -65,14 +65,16 @@ _optimize_tag_query (void *ctx, const char *orig_query_string,
>      else
>  	query_string = talloc_asprintf (ctx, "( %s ) and (", orig_query_string);
>  
> -
>      /* Boolean terms surrounded by double quotes can contain any
>       * character.  Double quotes are quoted by doubling them. */
>  
>      for (i = 0; i < tag_op_list_size (list) && query_string; i++) {
> -	double_quote_str (ctx,
> -			  tag_op_list_tag (list, i),
> -			  &escaped, &escaped_len);
> +	/* XXX in case of OOM, query_string will be deallocated when
> +	 * ctx is, which might be at shutdown */
> +	if (double_quote_str (ctx,
> +			      tag_op_list_tag (list, i),
> +			      &escaped, &escaped_len))
> +	    return NULL;
>  
>  	query_string = talloc_asprintf_append_buffer (
>  	    query_string, "%s%stag:%s", join,
> diff --git a/util/string-util.c b/util/string-util.c
> index ea7c25b..b9039f4 100644
> --- a/util/string-util.c
> +++ b/util/string-util.c
> @@ -46,8 +46,11 @@ double_quote_str (void *ctx, const char *str,
>      for (in = str; *in; in++)
>  	needed += (*in == '"') ? 2 : 1;
>  
> -    if (needed > *len)
> -	*buf = talloc_realloc (ctx, *buf, char, 2*needed);
> +    if ((*buf == NULL) || (needed > *len)) {
> +	*len = 2 * needed;
> +	*buf = talloc_realloc (ctx, *buf, char, *len);
> +    }
> +
>  
>      if (! *buf)
>  	return 1;
> @@ -62,7 +65,7 @@ double_quote_str (void *ctx, const char *str,
>  	*out++ = *in++;
>      }
>      *out++ = '"';
> -    *out = 0;
> +    *out = '\0';
>  
>      return 0;
>  }
> diff --git a/util/string-util.h b/util/string-util.h
> index b593bc7..4fc7942 100644
> --- a/util/string-util.h
> +++ b/util/string-util.h
> @@ -23,7 +23,7 @@ char *strtok_len (char *s, const char *delim, size_t *len);
>   * Any internal double-quotes are doubled, i.e. a"b -> "a""b"
>   *
>   * Output is into buf; it may be talloc_realloced
> - * return 0 on success, non-zero on failure.
> + * Return: 0 on success, non-zero on memory allocation failure.
>   */
>  int double_quote_str (void *talloc_ctx, const char *str,
>  		      char **buf, size_t *len);
>
> commit 4b26ac6f99c74cced64cfae317e6ac4b6a8f706f
> Author: David Bremner <bremner@debian.org>
> Date:   Mon Dec 17 23:36:30 2012 -0400
>
>     fixup: rewrite decode+quote function for queries.
>     
>     Rather than splitting on ':' at the top level, we examine each space
>     delimited token for a prefix.
>
> diff --git a/tag-util.c b/tag-util.c
> index 37bffd5..b0a846b 100644
> --- a/tag-util.c
> +++ b/tag-util.c
> @@ -56,9 +56,14 @@ illegal_tag (const char *tag, notmuch_bool_t remove)
>      return NULL;
>  }
>  
> +/* Input is a hex encoded string, presumed to be a query for Xapian.
> + *
> + * Space delimited tokens are decoded and quoted, with '*' and prefixes
> + * of the form "foo:" passed through unquoted.
> + */
>  static tag_parse_status_t
> -quote_and_decode_query (void *ctx, char *encoded, const char *line_for_error,
> -			char **query_string)
> +unhex_and_quote (void *ctx, char *encoded, const char *line_for_error,
> +		 char **query_string)
>  {
>      char *tok = encoded;
>      size_t tok_len = 0;
> @@ -68,14 +73,43 @@ quote_and_decode_query (void *ctx, char *encoded, const char *line_for_error,
>  
>      *query_string = talloc_strdup (ctx, "");
>  
> -    while (*query_string &&
> -	   (tok = strtok_len (tok + tok_len, ": ", &tok_len)) != NULL) {
> -	char delim = tok[tok_len];
> +    while ((tok = strtok_len (tok + tok_len, " ", &tok_len)) != NULL) {
> +
> +	size_t prefix_len;
> +	char delim = *(tok + tok_len);
>  
>  	*(tok + tok_len++) = '\0';
>  
> -	if (strcspn (tok, "%") < tok_len - 1) {
> -	    /* something to decode */
> +	prefix_len = hex_invariant (tok, tok_len);
> +
> +	if ((strcmp (tok, "*") == 0) || prefix_len >= tok_len - 1) {
> +
> +	    /* pass some things through without quoting or decoding.
> +	     * Note for '*' this is mandatory.
> +	     */
> +
> +	    if (! (*query_string = talloc_asprintf_append_buffer (
> +		       *query_string, "%s%c", tok, delim))) {
> +
> +		ret = line_error (TAG_PARSE_OUT_OF_MEMORY,
> +				  line_for_error, "aborting");
> +		goto DONE;
> +	    }
> +
> +	} else {
> +	    /* potential prefix: one for ':', then something after */
> +	    if ((tok_len - prefix_len > 2) && *(tok + prefix_len) == ':') {
> +		if (! (*query_string = talloc_strndup_append (*query_string,
> +							      tok,
> +							      prefix_len + 1))) {
> +		    ret = line_error (TAG_PARSE_OUT_OF_MEMORY,
> +				      line_for_error, "aborting");
> +		    goto DONE;
> +		}
> +		tok += prefix_len + 1;
> +		tok_len -= prefix_len + 1;
> +	    }
> +
>  	    if (hex_decode_inplace (tok) != HEX_SUCCESS) {
>  		ret = line_error (TAG_PARSE_INVALID, line_for_error,
>  				  "hex decoding of token '%s' failed", tok);
> @@ -87,17 +121,14 @@ quote_and_decode_query (void *ctx, char *encoded, const char *line_for_error,
>  				  line_for_error, "aborting");
>  		goto DONE;
>  	    }
> -	    *query_string = talloc_asprintf_append_buffer (
> -		*query_string, "%s%c", buf, delim);
>  
> -	} else {
> -	    /* This is not just an optimization, but used to preserve
> -	     * prefixes like id:, which cannot be quoted.
> -	     */
> -	    *query_string = talloc_asprintf_append_buffer (
> -		*query_string, "%s%c", tok, delim);
> +	    if (! (*query_string = talloc_asprintf_append_buffer (
> +		       *query_string, "%s%c", buf, delim))) {
> +		ret = line_error (TAG_PARSE_OUT_OF_MEMORY,
> +				  line_for_error, "aborting");
> +		goto DONE;
> +	    }
>  	}
> -
>      }
>  
>    DONE:
> @@ -220,7 +251,7 @@ parse_tag_line (void *ctx, char *line,
>  	/* skip 'id:' */
>  	*query_string = tok + 3;
>      } else {
> -	ret = quote_and_decode_query (ctx, tok, line_for_error, query_string);
> +	ret = unhex_and_quote (ctx, tok, line_for_error, query_string);
>      }
>  
>    DONE:
>
>
>
> _______________________________________________
> notmuch mailing list
> notmuch@notmuchmail.org
> http://notmuchmail.org/mailman/listinfo/notmuch

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH] simplify unhex_and_quote
  2012-12-22 23:36   ` Jani Nikula
@ 2012-12-23  2:59     ` david
  2012-12-23  8:19       ` Mark Walters
  0 siblings, 1 reply; 25+ messages in thread
From: david @ 2012-12-23  2:59 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@unb.ca>

the overgeneral definition of a prefix can be replaced by lower case
alphabetic, and still work fine with current notmuch query syntax.

token_len++ is moved to the end, and we restore the delimiter just so
we can leave the string as as we found it.
---

As always, Jani has a keen eye for muddle. Except he's wrong about 
tok_len - prefix_len, and Mark and I are right. Hopefully ;).

Restoring the delimiter at the end might be pointless (since the rest
of the input line is modified), but it is one less surprise for somebody 
repurposing the function.

Patches 5 and 6 can be ignored now.
 tag-util.c |   12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tag-util.c b/tag-util.c
index b0a846b..ee28512 100644
--- a/tag-util.c
+++ b/tag-util.c
@@ -78,11 +78,13 @@ unhex_and_quote (void *ctx, char *encoded, const char *line_for_error,
 	size_t prefix_len;
 	char delim = *(tok + tok_len);
 
-	*(tok + tok_len++) = '\0';
+	*(tok + tok_len) = '\0';
 
-	prefix_len = hex_invariant (tok, tok_len);
+	/* The following matches a superset of prefixes currently
+	 * used by notmuch */
+	prefix_len = strspn (tok, "abcdefghijklmnopqrstuvwxyz");
 
-	if ((strcmp (tok, "*") == 0) || prefix_len >= tok_len - 1) {
+	if ((strcmp (tok, "*") == 0) || prefix_len == tok_len) {
 
 	    /* pass some things through without quoting or decoding.
 	     * Note for '*' this is mandatory.
@@ -98,7 +100,7 @@ unhex_and_quote (void *ctx, char *encoded, const char *line_for_error,
 
 	} else {
 	    /* potential prefix: one for ':', then something after */
-	    if ((tok_len - prefix_len > 2) && *(tok + prefix_len) == ':') {
+	    if ((tok_len - prefix_len >= 2) && *(tok + prefix_len) == ':') {
 		if (! (*query_string = talloc_strndup_append (*query_string,
 							      tok,
 							      prefix_len + 1))) {
@@ -129,6 +131,8 @@ unhex_and_quote (void *ctx, char *encoded, const char *line_for_error,
 		goto DONE;
 	    }
 	}
+	/* restore the string and skip delimiter */
+	*(tok + tok_len++) = delim;
     }
 
   DONE:
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH] simplify unhex_and_quote
  2012-12-23  2:59     ` [PATCH] simplify unhex_and_quote david
@ 2012-12-23  8:19       ` Mark Walters
  0 siblings, 0 replies; 25+ messages in thread
From: Mark Walters @ 2012-12-23  8:19 UTC (permalink / raw)
  To: david, notmuch; +Cc: David Bremner


On Sun, 23 Dec 2012, david@tethera.net wrote:
> From: David Bremner <bremner@unb.ca>
>
> the overgeneral definition of a prefix can be replaced by lower case
> alphabetic, and still work fine with current notmuch query syntax.
>
> token_len++ is moved to the end, and we restore the delimiter just so
> we can leave the string as as we found it.
> ---
>
> As always, Jani has a keen eye for muddle. Except he's wrong about 
> tok_len - prefix_len, and Mark and I are right. Hopefully ;).
>
> Restoring the delimiter at the end might be pointless (since the rest
> of the input line is modified), but it is one less surprise for somebody 
> repurposing the function.

I am now worried about side bit of Xapian syntax, in particular, what
about brackets. I think we could have

(tag:inbox or tag:tag%20with%20spaces) and <something>

In which case the first token is (tag:inbox which does not
match. Additionally the third token is tag:tag%20with%20spaces) which
presumably gets quoted to tag:"tag with spaces)" and I am guessing
Xapian  treats this differently than with bracket after the quote.

Finally, I don't know if a query can contain a : without being a prefix
query. If it can that could end up being misquoted.

One possible way round the first problem might be to require the Xapian
syntax to be space separated from the rest but that does mean we are
diverging from the command line syntax.

(I am not very familiar with Xapian syntax nor with quite where this
function is used so I may be worrying about nothing)

Best wishes

Mark





>
> Patches 5 and 6 can be ignored now.
>  tag-util.c |   12 ++++++++----
>  1 file changed, 8 insertions(+), 4 deletions(-)
>
> diff --git a/tag-util.c b/tag-util.c
> index b0a846b..ee28512 100644
> --- a/tag-util.c
> +++ b/tag-util.c
> @@ -78,11 +78,13 @@ unhex_and_quote (void *ctx, char *encoded, const char *line_for_error,
>  	size_t prefix_len;
>  	char delim = *(tok + tok_len);
>  
> -	*(tok + tok_len++) = '\0';
> +	*(tok + tok_len) = '\0';
>  
> -	prefix_len = hex_invariant (tok, tok_len);
> +	/* The following matches a superset of prefixes currently
> +	 * used by notmuch */
> +	prefix_len = strspn (tok, "abcdefghijklmnopqrstuvwxyz");
>  
> -	if ((strcmp (tok, "*") == 0) || prefix_len >= tok_len - 1) {
> +	if ((strcmp (tok, "*") == 0) || prefix_len == tok_len) {
>  
>  	    /* pass some things through without quoting or decoding.
>  	     * Note for '*' this is mandatory.
> @@ -98,7 +100,7 @@ unhex_and_quote (void *ctx, char *encoded, const char *line_for_error,
>  
>  	} else {
>  	    /* potential prefix: one for ':', then something after */
> -	    if ((tok_len - prefix_len > 2) && *(tok + prefix_len) == ':') {
> +	    if ((tok_len - prefix_len >= 2) && *(tok + prefix_len) == ':') {
>  		if (! (*query_string = talloc_strndup_append (*query_string,
>  							      tok,
>  							      prefix_len + 1))) {
> @@ -129,6 +131,8 @@ unhex_and_quote (void *ctx, char *encoded, const char *line_for_error,
>  		goto DONE;
>  	    }
>  	}
> +	/* restore the string and skip delimiter */
> +	*(tok + tok_len++) = delim;
>      }
>  
>    DONE:
> -- 
> 1.7.10.4
>
> _______________________________________________
> notmuch mailing list
> notmuch@notmuchmail.org
> http://notmuchmail.org/mailman/listinfo/notmuch

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2012-12-23  8:19 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-12-21 13:08 [Patch v8 01/18] parse_tag_line: use enum for return value david
2012-12-21 13:08 ` [Patch v8 02/18] tag-util: factor out rules for illegal tags, use in parse_tag_line david
2012-12-21 13:08 ` [Patch v8 03/18] notmuch-tag.c: convert to use tag-utils david
2012-12-21 13:08 ` [Patch v8 04/18] notmuch-tag: factor out double quoting routine david
2012-12-21 13:08 ` [Patch v8 05/18] util/string-util: add strnspn david
2012-12-21 13:08 ` [Patch v8 06/18] util/hex-escape: add a function to check strings unchanged by hex encoding david
2012-12-21 13:08 ` [Patch v8 07/18] unhex_and_quote: new function to quote hex-decoded queries david
2012-12-22 23:36   ` Jani Nikula
2012-12-23  2:59     ` [PATCH] simplify unhex_and_quote david
2012-12-23  8:19       ` Mark Walters
2012-12-21 13:08 ` [Patch v8 08/18] notmuch-restore: move query handling for batch restore to parser david
2012-12-21 13:08 ` [Patch v8 09/18] cli: add support for batch tagging operations to "notmuch tag" david
2012-12-22 22:51   ` Jani Nikula
2012-12-21 13:08 ` [Patch v8 10/18] test/tagging: add test for error messages of tag --batch david
2012-12-21 13:08 ` [Patch v8 11/18] test/tagging: add basic tests for batch tagging functionality david
2012-12-21 13:08 ` [Patch v8 12/18] test/tagging: add tests for exotic tags david
2012-12-21 13:08 ` [Patch v8 13/18] test/tagging: add test for exotic message-ids and batch tagging david
2012-12-21 13:08 ` [Patch v8 14/18] test/tagging: add test for compound queries with " david
2012-12-21 13:08 ` [Patch v8 15/18] notmuch-tag.1: tidy synopsis formatting, reference david
2012-12-21 13:08 ` [Patch v8 16/18] man: document notmuch tag --batch, --input options david
2012-12-21 13:08 ` [Patch v8 17/18] test/tagging: add test for naked punctuation in tags; compare with quoting spaces david
2012-12-21 13:08 ` [Patch v8 18/18] more man fixup david
2012-12-21 13:29 ` [Patch v8 01/18] parse_tag_line: use enum for return value David Bremner
2012-12-22 23:47   ` Jani Nikula
2012-12-22 21:48 ` Jani Nikula

Code repositories for project(s) associated with this public inbox

	https://yhetil.org/notmuch.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).