unofficial mirror of notmuch@notmuchmail.org
 help / color / mirror / code / Atom feed
* [PATCH v5 0/6] Use Xapian query syntax for batch-tag dump/restore
@ 2013-01-06 20:22 Austin Clements
  2013-01-06 20:22 ` [PATCH v5 1/6] restore: Make missing messages non-fatal (again) Austin Clements
                   ` (7 more replies)
  0 siblings, 8 replies; 10+ messages in thread
From: Austin Clements @ 2013-01-06 20:22 UTC (permalink / raw)
  To: notmuch; +Cc: tomi.ollila

This obsoletes

  id:1356936162-2589-1-git-send-email-amdragon@mit.edu

v5 should address all of the comments on v4 except those I
specifically replied to (via the ML or IRC).  It also adds a new patch
at the beginning that makes missing message IDs non-fatal in restore,
like they were in 0.14.  This patch can be pushed separately; it's in
this series because later tests rely on it.

The diff from v4 follows.

diff --git a/notmuch-dump.c b/notmuch-dump.c
index bf01a39..a3244e0 100644
--- a/notmuch-dump.c
+++ b/notmuch-dump.c
@@ -103,6 +103,18 @@ notmuch_dump_command (unused (void *ctx), int argc, char *argv[])
 	message = notmuch_messages_get (messages);
 	message_id = notmuch_message_get_message_id (message);
 
+	if (output_format == DUMP_FORMAT_BATCH_TAG &&
+	    strchr (message_id, '\n')) {
+	    /* This will produce a line break in the output, which
+	     * would be difficult to handle in tools.  However, it's
+	     * also impossible to produce an email containing a line
+	     * break in a message ID because of unfolding, so we can
+	     * safely disallow it. */
+	    fprintf (stderr, "Warning: skipping message id containing line break: \"%s\"\n", message_id);
+	    notmuch_message_destroy (message);
+	    continue;
+	}
+
 	if (output_format == DUMP_FORMAT_SUP) {
 	    fprintf (output, "%s (", message_id);
 	}
@@ -133,19 +145,10 @@ notmuch_dump_command (unused (void *ctx), int argc, char *argv[])
 	if (output_format == DUMP_FORMAT_SUP) {
 	    fputs (")\n", output);
 	} else {
-	    if (strchr (message_id, '\n')) {
-		/* This will produce a line break in the output, which
-		 * would be difficult to handle in tools.  However,
-		 * it's also impossible to produce an email containing
-		 * a line break in a message ID because of unfolding,
-		 * so we can safely disallow it. */
-		fprintf (stderr, "Error: cannot dump message id containing line break: %s\n", message_id);
-		return 1;
-	    }
 	    if (make_boolean_term (notmuch, "id", message_id,
 				   &buffer, &buffer_size)) {
-		    fprintf (stderr, "Error: failed to quote message id %s\n",
-			     message_id);
+		    fprintf (stderr, "Error quoting message id %s: %s\n",
+			     message_id, strerror (errno));
 		    return 1;
 	    }
 	    fprintf (output, " -- %s\n", buffer);
diff --git a/notmuch-restore.c b/notmuch-restore.c
index 77a4c27..81d4d98 100644
--- a/notmuch-restore.c
+++ b/notmuch-restore.c
@@ -26,7 +26,8 @@
 static regex_t regex;
 
 /* Non-zero return indicates an error in retrieving the message,
- * or in applying the tags.
+ * or in applying the tags.  Missing messages are reported, but not
+ * considered errors.
  */
 static int
 tag_message (unused (void *ctx),
@@ -40,13 +41,17 @@ tag_message (unused (void *ctx),
     int ret = 0;
 
     status = notmuch_database_find_message (notmuch, message_id, &message);
-    if (status || message == NULL) {
-	fprintf (stderr, "Warning: cannot apply tags to %smessage: %s\n",
-		 message ? "" : "missing ", message_id);
-	if (status)
-	    fprintf (stderr, "%s\n", notmuch_status_to_string (status));
+    if (status) {
+	fprintf (stderr, "Error applying tags to message %s: %s\n",
+		 message_id, notmuch_status_to_string (status));
 	return 1;
     }
+    if (message == NULL) {
+	fprintf (stderr, "Warning: cannot apply tags to missing message: %s\n",
+		 message_id);
+	/* We consider this a non-fatal error. */
+	return 0;
+    }
 
     /* In order to detect missing messages, this check/optimization is
      * intentionally done *after* first finding the message. */
@@ -222,12 +227,17 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
 	    if (ret == 0) {
 		ret = parse_boolean_term (line_ctx, query_string,
 					  &prefix, &term);
-		if (ret) {
-		    fprintf (stderr, "Warning: cannot parse query: %s\n",
-			     query_string);
+		if (ret && errno == EINVAL) {
+		    fprintf (stderr, "Warning: cannot parse query: %s (skipping)\n", query_string);
 		    continue;
+		} else if (ret) {
+		    /* This is more fatal (e.g., out of memory) */
+		    fprintf (stderr, "Error parsing query: %s\n",
+			     strerror (errno));
+		    ret = 1;
+		    break;
 		} else if (strcmp ("id", prefix) != 0) {
-		    fprintf (stderr, "Warning: not an id query: %s\n", query_string);
+		    fprintf (stderr, "Warning: not an id query: %s (skipping)\n", query_string);
 		    continue;
 		}
 		query_string = term;
diff --git a/test/dump-restore b/test/dump-restore
index f9ae5b3..f076c12 100755
--- a/test/dump-restore
+++ b/test/dump-restore
@@ -202,18 +202,32 @@ a
 + +e -- id:20091117232137.GA7669@griffis1.net
 # valid id, but warning about missing message
 +e id:missing_message_id
+# exercise parser
++e -- id:some)stuff
++e -- id:some stuff
++e -- id:some"stuff
++e -- id:"a_message_id_with""_a_quote"
++e -- id:"a message id with spaces"
++e --  id:an_id_with_leading_and_trailing_ws \
+
 EOF
 
 cat <<EOF > EXPECTED
-Warning: cannot parse query: a
+Warning: cannot parse query: a (skipping)
 Warning: no query string [+0]
 Warning: no query string [+a +b]
 Warning: missing query string [+a +b ]
 Warning: no query string after -- [+c +d --]
 Warning: hex decoding of tag %zz failed [+%zz -- id:whatever]
-Warning: cannot parse query: id:"
-Warning: not an id query: tag:abc
+Warning: cannot parse query: id:" (skipping)
+Warning: not an id query: tag:abc (skipping)
 Warning: cannot apply tags to missing message: missing_message_id
+Warning: cannot parse query: id:some)stuff (skipping)
+Warning: cannot parse query: id:some stuff (skipping)
+Warning: cannot apply tags to missing message: some"stuff
+Warning: cannot apply tags to missing message: a_message_id_with"_a_quote
+Warning: cannot apply tags to missing message: a message id with spaces
+Warning: cannot apply tags to missing message: an_id_with_leading_and_trailing_ws
 EOF
 
 test_expect_equal_file EXPECTED OUTPUT
diff --git a/util/string-util.c b/util/string-util.c
index 52c7781..aba9aa8 100644
--- a/util/string-util.c
+++ b/util/string-util.c
@@ -23,6 +23,7 @@
 #include "talloc.h"
 
 #include <ctype.h>
+#include <errno.h>
 
 char *
 strtok_len (char *s, const char *delim, size_t *len)
@@ -36,6 +37,12 @@ strtok_len (char *s, const char *delim, size_t *len)
     return *len ? s : NULL;
 }
 
+static int
+is_unquoted_terminator (unsigned char c)
+{
+    return c == 0 || c <= ' ' || c == ')';
+}
+
 int
 make_boolean_term (void *ctx, const char *prefix, const char *term,
 		   char **buf, size_t *len)
@@ -49,7 +56,8 @@ make_boolean_term (void *ctx, const char *prefix, const char *term,
      * containing a quote, even though it only matters at the
      * beginning, and anything containing non-ASCII text. */
     for (in = term; *in && !need_quoting; in++)
-	if (*in <= ' ' || *in == ')' || *in == '"' || (unsigned char)*in > 127)
+	if (is_unquoted_terminator (*in) || *in == '"'
+	    || (unsigned char)*in > 127)
 	    need_quoting = 1;
 
     if (need_quoting)
@@ -67,8 +75,10 @@ make_boolean_term (void *ctx, const char *prefix, const char *term,
 	*buf = talloc_realloc (ctx, *buf, char, *len);
     }
 
-    if (! *buf)
-	return 1;
+    if (! *buf) {
+	errno = ENOMEM;
+	return -1;
+    }
 
     out = *buf;
 
@@ -102,7 +112,7 @@ make_boolean_term (void *ctx, const char *prefix, const char *term,
 static const char*
 skip_space (const char *str)
 {
-    while (*str && isspace (*str))
+    while (*str && isspace ((unsigned char) *str))
 	++str;
     return str;
 }
@@ -111,6 +121,7 @@ int
 parse_boolean_term (void *ctx, const char *str,
 		    char **prefix_out, char **term_out)
 {
+    int err = EINVAL;
     *prefix_out = *term_out = NULL;
 
     /* Parse prefix */
@@ -119,12 +130,20 @@ parse_boolean_term (void *ctx, const char *str,
     if (! pos)
 	goto FAIL;
     *prefix_out = talloc_strndup (ctx, str, pos - str);
+    if (! *prefix_out) {
+	err = ENOMEM;
+	goto FAIL;
+    }
     ++pos;
 
     /* Implement de-quoting compatible with make_boolean_term. */
     if (*pos == '"') {
 	char *out = talloc_array (ctx, char, strlen (pos));
 	int closed = 0;
+	if (! out) {
+	    err = ENOMEM;
+	    goto FAIL;
+	}
 	*term_out = out;
 	/* Skip the opening quote, find the closing quote, and
 	 * un-double doubled internal quotes. */
@@ -148,18 +167,25 @@ parse_boolean_term (void *ctx, const char *str,
     } else {
 	const char *start = pos;
 	/* Check for text after the boolean term. */
-	while (*pos > ' ' && *pos != ')')
+	while (! is_unquoted_terminator (*pos))
 	    ++pos;
-	if (*skip_space (pos))
+	if (*skip_space (pos)) {
+	    err = EINVAL;
 	    goto FAIL;
+	}
 	/* No trailing text; dup the string so the caller can free
 	 * it. */
 	*term_out = talloc_strndup (ctx, start, pos - start);
+	if (! *term_out) {
+	    err = ENOMEM;
+	    goto FAIL;
+	}
     }
     return 0;
 
  FAIL:
     talloc_free (*prefix_out);
     talloc_free (*term_out);
-    return 1;
+    errno = err;
+    return -1;
 }
diff --git a/util/string-util.h b/util/string-util.h
index 8b9fe50..0194607 100644
--- a/util/string-util.h
+++ b/util/string-util.h
@@ -28,7 +28,8 @@ char *strtok_len (char *s, const char *delim, size_t *len);
  * can be parsed by parse_boolean_term.
  *
  * Output is into buf; it may be talloc_realloced.
- * Return: 0 on success, non-zero on memory allocation failure.
+ * Return: 0 on success, -1 on error.  errno will be set to ENOMEM if
+ * there is an allocation failure.
  */
 int make_boolean_term (void *talloc_ctx, const char *prefix, const char *term,
 		       char **buf, size_t *len);
@@ -42,7 +43,8 @@ int make_boolean_term (void *talloc_ctx, const char *prefix, const char *term,
  * of the quoting styles supported by Xapian (and hence notmuch).
  * *prefix_out and *term_out will be talloc'd with context ctx.
  *
- * Return: 0 on success, non-zero on parse error.
+ * Return: 0 on success, -1 on error.  errno will be set to EINVAL if
+ * there is a parse error or ENOMEM if there is an allocation failure.
  */
 int
 parse_boolean_term (void *ctx, const char *str,

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v5 1/6] restore: Make missing messages non-fatal (again)
  2013-01-06 20:22 [PATCH v5 0/6] Use Xapian query syntax for batch-tag dump/restore Austin Clements
@ 2013-01-06 20:22 ` Austin Clements
  2013-01-06 20:22 ` [PATCH v5 2/6] util: Factor out boolean term quoting routine Austin Clements
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Austin Clements @ 2013-01-06 20:22 UTC (permalink / raw)
  To: notmuch; +Cc: tomi.ollila

Previously, restore would abort if a message ID in the dump was
missing.  Furthermore, it would only report this as a warning.  This
patch makes it distinguish abort-worthy lookup failures like
out-of-memory from non-fatal failure to find a message ID.  The former
is reported as an error and causes restore to abort, while the latter
is reported as a warning and does not cause an abort.

This restores 0.14's non-fatal handling of missing message IDs in
restore (though 0.14 also considered serious errors non-fatal; we
retain the new and better handling of serious errors).
---
 notmuch-restore.c |   17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/notmuch-restore.c b/notmuch-restore.c
index 9ed9b51..96834c0 100644
--- a/notmuch-restore.c
+++ b/notmuch-restore.c
@@ -26,7 +26,8 @@
 static regex_t regex;
 
 /* Non-zero return indicates an error in retrieving the message,
- * or in applying the tags.
+ * or in applying the tags.  Missing messages are reported, but not
+ * considered errors.
  */
 static int
 tag_message (unused (void *ctx),
@@ -40,13 +41,17 @@ tag_message (unused (void *ctx),
     int ret = 0;
 
     status = notmuch_database_find_message (notmuch, message_id, &message);
-    if (status || message == NULL) {
-	fprintf (stderr, "Warning: cannot apply tags to %smessage: %s\n",
-		 message ? "" : "missing ", message_id);
-	if (status)
-	    fprintf (stderr, "%s\n", notmuch_status_to_string (status));
+    if (status) {
+	fprintf (stderr, "Error applying tags to message %s: %s\n",
+		 message_id, notmuch_status_to_string (status));
 	return 1;
     }
+    if (message == NULL) {
+	fprintf (stderr, "Warning: cannot apply tags to missing message: %s\n",
+		 message_id);
+	/* We consider this a non-fatal error. */
+	return 0;
+    }
 
     /* In order to detect missing messages, this check/optimization is
      * intentionally done *after* first finding the message. */
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v5 2/6] util: Factor out boolean term quoting routine
  2013-01-06 20:22 [PATCH v5 0/6] Use Xapian query syntax for batch-tag dump/restore Austin Clements
  2013-01-06 20:22 ` [PATCH v5 1/6] restore: Make missing messages non-fatal (again) Austin Clements
@ 2013-01-06 20:22 ` Austin Clements
  2013-01-06 20:22 ` [PATCH v5 3/6] util: Function to parse boolean term queries Austin Clements
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Austin Clements @ 2013-01-06 20:22 UTC (permalink / raw)
  To: notmuch; +Cc: tomi.ollila

From: Austin Clements <amdragon@MIT.EDU>

This is now a generic boolean term quoting function.  It performs
minimal quoting to produce user-friendly queries.

This could live in tag-util as well, but it is really nothing specific
to tags (although the conventions are specific to Xapian).

The API is changed from "caller-allocates" to "readline-like".  The
scan for max tag length is pushed down into the quoting routine.
Furthermore, this now combines the term prefix with the quoted term;
arguably this is just as easy to do in the caller, but this will
nicely parallel the boolean term parsing function to be introduced
shortly.

This is an amalgamation of code written by David Bremner and myself.
---
 notmuch-tag.c      |   48 ++++++++++-----------------------
 util/string-util.c |   75 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 util/string-util.h |   15 +++++++++++
 3 files changed, 104 insertions(+), 34 deletions(-)

diff --git a/notmuch-tag.c b/notmuch-tag.c
index 88d559b..fc9d43a 100644
--- a/notmuch-tag.c
+++ b/notmuch-tag.c
@@ -19,6 +19,7 @@
  */
 
 #include "notmuch-client.h"
+#include "string-util.h"
 
 static volatile sig_atomic_t interrupted;
 
@@ -35,25 +36,6 @@ handle_sigint (unused (int sig))
     interrupted = 1;
 }
 
-static char *
-_escape_tag (char *buf, const char *tag)
-{
-    const char *in = tag;
-    char *out = buf;
-
-    /* Boolean terms surrounded by double quotes can contain any
-     * character.  Double quotes are quoted by doubling them. */
-    *out++ = '"';
-    while (*in) {
-	if (*in == '"')
-	    *out++ = '"';
-	*out++ = *in++;
-    }
-    *out++ = '"';
-    *out = 0;
-    return buf;
-}
-
 typedef struct {
     const char *tag;
     notmuch_bool_t remove;
@@ -71,25 +53,16 @@ _optimize_tag_query (void *ctx, const char *orig_query_string,
      * parenthesize and the exclusion part of the query must not use
      * the '-' operator (though the NOT operator is fine). */
 
-    char *escaped, *query_string;
+    char *escaped = NULL;
+    size_t escaped_len = 0;
+    char *query_string;
     const char *join = "";
-    int i;
-    unsigned int max_tag_len = 0;
+    size_t i;
 
     /* Don't optimize if there are no tag changes. */
     if (tag_ops[0].tag == NULL)
 	return talloc_strdup (ctx, orig_query_string);
 
-    /* Allocate a buffer for escaping tags.  This is large enough to
-     * hold a fully escaped tag with every character doubled plus
-     * enclosing quotes and a NUL. */
-    for (i = 0; tag_ops[i].tag; i++)
-	if (strlen (tag_ops[i].tag) > max_tag_len)
-	    max_tag_len = strlen (tag_ops[i].tag);
-    escaped = talloc_array (ctx, char, max_tag_len * 2 + 3);
-    if (! escaped)
-	return NULL;
-
     /* Build the new query string */
     if (strcmp (orig_query_string, "*") == 0)
 	query_string = talloc_strdup (ctx, "(");
@@ -97,10 +70,17 @@ _optimize_tag_query (void *ctx, const char *orig_query_string,
 	query_string = talloc_asprintf (ctx, "( %s ) and (", orig_query_string);
 
     for (i = 0; tag_ops[i].tag && query_string; i++) {
+	/* XXX in case of OOM, query_string will be deallocated when
+	 * ctx is, which might be at shutdown */
+	if (make_boolean_term (ctx,
+			       "tag", tag_ops[i].tag,
+			       &escaped, &escaped_len))
+	    return NULL;
+
 	query_string = talloc_asprintf_append_buffer (
-	    query_string, "%s%stag:%s", join,
+	    query_string, "%s%s%s", join,
 	    tag_ops[i].remove ? "" : "not ",
-	    _escape_tag (escaped, tag_ops[i].tag));
+	    escaped);
 	join = " or ";
     }
 
diff --git a/util/string-util.c b/util/string-util.c
index 44f8cd3..7a71049 100644
--- a/util/string-util.c
+++ b/util/string-util.c
@@ -20,6 +20,9 @@
 
 
 #include "string-util.h"
+#include "talloc.h"
+
+#include <errno.h>
 
 char *
 strtok_len (char *s, const char *delim, size_t *len)
@@ -32,3 +35,75 @@ strtok_len (char *s, const char *delim, size_t *len)
 
     return *len ? s : NULL;
 }
+
+static int
+is_unquoted_terminator (unsigned char c)
+{
+    return c == 0 || c <= ' ' || c == ')';
+}
+
+int
+make_boolean_term (void *ctx, const char *prefix, const char *term,
+		   char **buf, size_t *len)
+{
+    const char *in;
+    char *out;
+    size_t needed = 3;
+    int need_quoting = 0;
+
+    /* Do we need quoting?  To be paranoid, we quote anything
+     * containing a quote, even though it only matters at the
+     * beginning, and anything containing non-ASCII text. */
+    for (in = term; *in && !need_quoting; in++)
+	if (is_unquoted_terminator (*in) || *in == '"'
+	    || (unsigned char)*in > 127)
+	    need_quoting = 1;
+
+    if (need_quoting)
+	for (in = term; *in; in++)
+	    needed += (*in == '"') ? 2 : 1;
+    else
+	needed = strlen (term) + 1;
+
+    /* Reserve space for the prefix */
+    if (prefix)
+	needed += strlen (prefix) + 1;
+
+    if ((*buf == NULL) || (needed > *len)) {
+	*len = 2 * needed;
+	*buf = talloc_realloc (ctx, *buf, char, *len);
+    }
+
+    if (! *buf) {
+	errno = ENOMEM;
+	return -1;
+    }
+
+    out = *buf;
+
+    /* Copy in the prefix */
+    if (prefix) {
+	strcpy (out, prefix);
+	out += strlen (prefix);
+	*out++ = ':';
+    }
+
+    if (! need_quoting) {
+	strcpy (out, term);
+	return 0;
+    }
+
+    /* Quote term by enclosing it in double quotes and doubling any
+     * internal double quotes. */
+    *out++ = '"';
+    in = term;
+    while (*in) {
+	if (*in == '"')
+	    *out++ = '"';
+	*out++ = *in++;
+    }
+    *out++ = '"';
+    *out = '\0';
+
+    return 0;
+}
diff --git a/util/string-util.h b/util/string-util.h
index ac7676c..719c276 100644
--- a/util/string-util.h
+++ b/util/string-util.h
@@ -19,4 +19,19 @@
 
 char *strtok_len (char *s, const char *delim, size_t *len);
 
+/* Construct a boolean term query with the specified prefix (e.g.,
+ * "id") and search term, quoting term as necessary.  Specifically, if
+ * term contains any non-printable ASCII characters, non-ASCII
+ * characters, close parenthesis or double quotes, it will be enclosed
+ * in double quotes and any internal double quotes will be doubled
+ * (e.g. a"b -> "a""b").  The result will be a valid notmuch query and
+ * can be parsed by parse_boolean_term.
+ *
+ * Output is into buf; it may be talloc_realloced.
+ * Return: 0 on success, -1 on error.  errno will be set to ENOMEM if
+ * there is an allocation failure.
+ */
+int make_boolean_term (void *talloc_ctx, const char *prefix, const char *term,
+		       char **buf, size_t *len);
+
 #endif
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v5 3/6] util: Function to parse boolean term queries
  2013-01-06 20:22 [PATCH v5 0/6] Use Xapian query syntax for batch-tag dump/restore Austin Clements
  2013-01-06 20:22 ` [PATCH v5 1/6] restore: Make missing messages non-fatal (again) Austin Clements
  2013-01-06 20:22 ` [PATCH v5 2/6] util: Factor out boolean term quoting routine Austin Clements
@ 2013-01-06 20:22 ` Austin Clements
  2013-01-06 20:22 ` [PATCH v5 4/6] dump: Disallow \n in message IDs Austin Clements
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Austin Clements @ 2013-01-06 20:22 UTC (permalink / raw)
  To: notmuch; +Cc: tomi.ollila

This parses the subset of Xapian's boolean term quoting rules that are
used by make_boolean_term.  This is provided as a generic string
utility, but will be used shortly in notmuch restore to parse and
optimize for ID queries.
---
 util/string-util.c |   82 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 util/string-util.h |   16 ++++++++++
 2 files changed, 98 insertions(+)

diff --git a/util/string-util.c b/util/string-util.c
index 7a71049..aba9aa8 100644
--- a/util/string-util.c
+++ b/util/string-util.c
@@ -22,6 +22,7 @@
 #include "string-util.h"
 #include "talloc.h"
 
+#include <ctype.h>
 #include <errno.h>
 
 char *
@@ -107,3 +108,84 @@ make_boolean_term (void *ctx, const char *prefix, const char *term,
 
     return 0;
 }
+
+static const char*
+skip_space (const char *str)
+{
+    while (*str && isspace ((unsigned char) *str))
+	++str;
+    return str;
+}
+
+int
+parse_boolean_term (void *ctx, const char *str,
+		    char **prefix_out, char **term_out)
+{
+    int err = EINVAL;
+    *prefix_out = *term_out = NULL;
+
+    /* Parse prefix */
+    str = skip_space (str);
+    const char *pos = strchr (str, ':');
+    if (! pos)
+	goto FAIL;
+    *prefix_out = talloc_strndup (ctx, str, pos - str);
+    if (! *prefix_out) {
+	err = ENOMEM;
+	goto FAIL;
+    }
+    ++pos;
+
+    /* Implement de-quoting compatible with make_boolean_term. */
+    if (*pos == '"') {
+	char *out = talloc_array (ctx, char, strlen (pos));
+	int closed = 0;
+	if (! out) {
+	    err = ENOMEM;
+	    goto FAIL;
+	}
+	*term_out = out;
+	/* Skip the opening quote, find the closing quote, and
+	 * un-double doubled internal quotes. */
+	for (++pos; *pos; ) {
+	    if (*pos == '"') {
+		++pos;
+		if (*pos != '"') {
+		    /* Found the closing quote. */
+		    closed = 1;
+		    pos = skip_space (pos);
+		    break;
+		}
+	    }
+	    *out++ = *pos++;
+	}
+	/* Did the term terminate without a closing quote or is there
+	 * trailing text after the closing quote? */
+	if (!closed || *pos)
+	    goto FAIL;
+	*out = '\0';
+    } else {
+	const char *start = pos;
+	/* Check for text after the boolean term. */
+	while (! is_unquoted_terminator (*pos))
+	    ++pos;
+	if (*skip_space (pos)) {
+	    err = EINVAL;
+	    goto FAIL;
+	}
+	/* No trailing text; dup the string so the caller can free
+	 * it. */
+	*term_out = talloc_strndup (ctx, start, pos - start);
+	if (! *term_out) {
+	    err = ENOMEM;
+	    goto FAIL;
+	}
+    }
+    return 0;
+
+ FAIL:
+    talloc_free (*prefix_out);
+    talloc_free (*term_out);
+    errno = err;
+    return -1;
+}
diff --git a/util/string-util.h b/util/string-util.h
index 719c276..0194607 100644
--- a/util/string-util.h
+++ b/util/string-util.h
@@ -34,4 +34,20 @@ char *strtok_len (char *s, const char *delim, size_t *len);
 int make_boolean_term (void *talloc_ctx, const char *prefix, const char *term,
 		       char **buf, size_t *len);
 
+/* Parse a boolean term query consisting of a prefix, a colon, and a
+ * term that may be quoted as described for make_boolean_term.  If the
+ * term is not quoted, then it ends at the first whitespace or close
+ * parenthesis.  str may containing leading or trailing whitespace,
+ * but anything else is considered a parse error.  This is compatible
+ * with anything produced by make_boolean_term, and supports a subset
+ * of the quoting styles supported by Xapian (and hence notmuch).
+ * *prefix_out and *term_out will be talloc'd with context ctx.
+ *
+ * Return: 0 on success, -1 on error.  errno will be set to EINVAL if
+ * there is a parse error or ENOMEM if there is an allocation failure.
+ */
+int
+parse_boolean_term (void *ctx, const char *str,
+		    char **prefix_out, char **term_out);
+
 #endif
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v5 4/6] dump: Disallow \n in message IDs
  2013-01-06 20:22 [PATCH v5 0/6] Use Xapian query syntax for batch-tag dump/restore Austin Clements
                   ` (2 preceding siblings ...)
  2013-01-06 20:22 ` [PATCH v5 3/6] util: Function to parse boolean term queries Austin Clements
@ 2013-01-06 20:22 ` Austin Clements
  2013-01-06 20:22 ` [PATCH v5 5/6] dump/restore: Use Xapian queries for batch-tag format Austin Clements
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Austin Clements @ 2013-01-06 20:22 UTC (permalink / raw)
  To: notmuch; +Cc: tomi.ollila

When we switch to using regular Xapian queries in the dump format, \n
will cause problems, so we disallow it.  Specially, while Xapian can
quote and parse queries containing \n without difficultly, quoted
queries containing \n still span multiple lines, which breaks the
line-orientedness of the dump format.  Strictly speaking, we could
still round-trip these, but it would significantly complicate restore
as well as scripts that deal with tag dumps.  This complexity would
come at absolutely no benefit: because of the RFC 2822 unfolding
rules, no amount of standards negligence can produce a message with a
message ID containing a line break (not even Outlook can do it!).

Hence, we simply disallow it.
---
 notmuch-dump.c       |   12 ++++++++++++
 test/random-corpus.c |    4 +++-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/notmuch-dump.c b/notmuch-dump.c
index d2dad40..5bbda36 100644
--- a/notmuch-dump.c
+++ b/notmuch-dump.c
@@ -102,6 +102,18 @@ notmuch_dump_command (unused (void *ctx), int argc, char *argv[])
 	message = notmuch_messages_get (messages);
 	message_id = notmuch_message_get_message_id (message);
 
+	if (output_format == DUMP_FORMAT_BATCH_TAG &&
+	    strchr (message_id, '\n')) {
+	    /* This will produce a line break in the output, which
+	     * would be difficult to handle in tools.  However, it's
+	     * also impossible to produce an email containing a line
+	     * break in a message ID because of unfolding, so we can
+	     * safely disallow it. */
+	    fprintf (stderr, "Warning: skipping message id containing line break: \"%s\"\n", message_id);
+	    notmuch_message_destroy (message);
+	    continue;
+	}
+
 	if (output_format == DUMP_FORMAT_SUP) {
 	    fprintf (output, "%s (", message_id);
 	}
diff --git a/test/random-corpus.c b/test/random-corpus.c
index f354d4b..8b7748e 100644
--- a/test/random-corpus.c
+++ b/test/random-corpus.c
@@ -96,7 +96,9 @@ random_utf8_string (void *ctx, size_t char_count)
 	    buf = talloc_realloc (ctx, buf, gchar, buf_size);
 	}
 
-	randomchar = random_unichar ();
+	do {
+	    randomchar = random_unichar ();
+	} while (randomchar == '\n');
 
 	written = g_unichar_to_utf8 (randomchar, buf + offset);
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v5 5/6] dump/restore: Use Xapian queries for batch-tag format
  2013-01-06 20:22 [PATCH v5 0/6] Use Xapian query syntax for batch-tag dump/restore Austin Clements
                   ` (3 preceding siblings ...)
  2013-01-06 20:22 ` [PATCH v5 4/6] dump: Disallow \n in message IDs Austin Clements
@ 2013-01-06 20:22 ` Austin Clements
  2013-01-06 20:22 ` [PATCH v5 6/6] man: Update notmuch-dump(1) and notmuch-restore(1) Austin Clements
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Austin Clements @ 2013-01-06 20:22 UTC (permalink / raw)
  To: notmuch; +Cc: tomi.ollila

This switches the new batch-tag format away from using a home-grown
hex-encoding scheme for message IDs in the dump to simply using Xapian
queries with Xapian quoting syntax.

This has a variety of advantages beyond presenting a cleaner and more
consistent interface.  Foremost is that it will dramatically simplify
the quoting for batch tagging, which shares the same input format.
While the hex-encoding is no better or worse for the simple ID queries
used by dump/restore, it becomes onerous for general-purpose queries
used in batch tagging.  It also better handles strange cases like
"id:foo and bar", since this is no longer syntactically valid.
---
 notmuch-dump.c    |   11 ++++++-----
 notmuch-restore.c |   27 +++++++++++++++------------
 tag-util.c        |    6 ------
 test/dump-restore |   28 ++++++++++++++++++++++------
 4 files changed, 43 insertions(+), 29 deletions(-)

diff --git a/notmuch-dump.c b/notmuch-dump.c
index 5bbda36..a3244e0 100644
--- a/notmuch-dump.c
+++ b/notmuch-dump.c
@@ -20,6 +20,7 @@
 
 #include "notmuch-client.h"
 #include "dump-restore-private.h"
+#include "string-util.h"
 
 int
 notmuch_dump_command (unused (void *ctx), int argc, char *argv[])
@@ -144,13 +145,13 @@ notmuch_dump_command (unused (void *ctx), int argc, char *argv[])
 	if (output_format == DUMP_FORMAT_SUP) {
 	    fputs (")\n", output);
 	} else {
-	    if (hex_encode (notmuch, message_id,
-			    &buffer, &buffer_size) != HEX_SUCCESS) {
-		    fprintf (stderr, "Error: failed to hex-encode msg-id %s\n",
-			     message_id);
+	    if (make_boolean_term (notmuch, "id", message_id,
+				   &buffer, &buffer_size)) {
+		    fprintf (stderr, "Error quoting message id %s: %s\n",
+			     message_id, strerror (errno));
 		    return 1;
 	    }
-	    fprintf (output, " -- id:%s\n", buffer);
+	    fprintf (output, " -- %s\n", buffer);
 	}
 
 	notmuch_message_destroy (message);
diff --git a/notmuch-restore.c b/notmuch-restore.c
index 96834c0..81d4d98 100644
--- a/notmuch-restore.c
+++ b/notmuch-restore.c
@@ -212,7 +212,7 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
 	    INTERNAL_ERROR ("compile time constant regex failed.");
 
     do {
-	char *query_string;
+	char *query_string, *prefix, *term;
 
 	if (line_ctx != NULL)
 	    talloc_free (line_ctx);
@@ -225,19 +225,22 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
 				  &query_string, tag_ops);
 
 	    if (ret == 0) {
-		if (strncmp ("id:", query_string, 3) != 0) {
-		    fprintf (stderr, "Warning: unsupported query: %s\n", query_string);
+		ret = parse_boolean_term (line_ctx, query_string,
+					  &prefix, &term);
+		if (ret && errno == EINVAL) {
+		    fprintf (stderr, "Warning: cannot parse query: %s (skipping)\n", query_string);
+		    continue;
+		} else if (ret) {
+		    /* This is more fatal (e.g., out of memory) */
+		    fprintf (stderr, "Error parsing query: %s\n",
+			     strerror (errno));
+		    ret = 1;
+		    break;
+		} else if (strcmp ("id", prefix) != 0) {
+		    fprintf (stderr, "Warning: not an id query: %s (skipping)\n", query_string);
 		    continue;
 		}
-		/* delete id: from front of string; tag_message
-		 * expects a raw message-id.
-		 *
-		 * XXX: Note that query string id:foo and bar will be
-		 * interpreted as a message id "foo and bar". This
-		 * should eventually be fixed to give a better error
-		 * message.
-		 */
-		query_string = query_string + 3;
+		query_string = term;
 	    }
 	}
 
diff --git a/tag-util.c b/tag-util.c
index 705b7ba..e4e5dda 100644
--- a/tag-util.c
+++ b/tag-util.c
@@ -124,12 +124,6 @@ parse_tag_line (void *ctx, char *line,
     }
 
     /* tok now points to the query string */
-    if (hex_decode_inplace (tok) != HEX_SUCCESS) {
-	ret = line_error (TAG_PARSE_INVALID, line_for_error,
-			  "hex decoding of query %s failed", tok);
-	goto DONE;
-    }
-
     *query_string = tok;
 
   DONE:
diff --git a/test/dump-restore b/test/dump-restore
index 6a989b6..f076c12 100755
--- a/test/dump-restore
+++ b/test/dump-restore
@@ -195,23 +195,39 @@ a
 
 # the previous line was blank; also no yelling please
 +%zz -- id:whatever
-+e +f id:%yy
++e +f id:"
++e +f tag:abc
 # the next non-comment line should report an an empty tag error for
 # batch tagging, but not for restore
 + +e -- id:20091117232137.GA7669@griffis1.net
-# highlight the sketchy id parsing; this should be last
-+g -- id:foo and bar
+# valid id, but warning about missing message
++e id:missing_message_id
+# exercise parser
++e -- id:some)stuff
++e -- id:some stuff
++e -- id:some"stuff
++e -- id:"a_message_id_with""_a_quote"
++e -- id:"a message id with spaces"
++e --  id:an_id_with_leading_and_trailing_ws \
+
 EOF
 
 cat <<EOF > EXPECTED
-Warning: unsupported query: a
+Warning: cannot parse query: a (skipping)
 Warning: no query string [+0]
 Warning: no query string [+a +b]
 Warning: missing query string [+a +b ]
 Warning: no query string after -- [+c +d --]
 Warning: hex decoding of tag %zz failed [+%zz -- id:whatever]
-Warning: hex decoding of query id:%yy failed [+e +f id:%yy]
-Warning: cannot apply tags to missing message: foo and bar
+Warning: cannot parse query: id:" (skipping)
+Warning: not an id query: tag:abc (skipping)
+Warning: cannot apply tags to missing message: missing_message_id
+Warning: cannot parse query: id:some)stuff (skipping)
+Warning: cannot parse query: id:some stuff (skipping)
+Warning: cannot apply tags to missing message: some"stuff
+Warning: cannot apply tags to missing message: a_message_id_with"_a_quote
+Warning: cannot apply tags to missing message: a message id with spaces
+Warning: cannot apply tags to missing message: an_id_with_leading_and_trailing_ws
 EOF
 
 test_expect_equal_file EXPECTED OUTPUT
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v5 6/6] man: Update notmuch-dump(1) and notmuch-restore(1)
  2013-01-06 20:22 [PATCH v5 0/6] Use Xapian query syntax for batch-tag dump/restore Austin Clements
                   ` (4 preceding siblings ...)
  2013-01-06 20:22 ` [PATCH v5 5/6] dump/restore: Use Xapian queries for batch-tag format Austin Clements
@ 2013-01-06 20:22 ` Austin Clements
  2013-01-06 22:01 ` [PATCH v5 0/6] Use Xapian query syntax for batch-tag dump/restore Jani Nikula
  2013-01-07  2:52 ` David Bremner
  7 siblings, 0 replies; 10+ messages in thread
From: Austin Clements @ 2013-01-06 20:22 UTC (permalink / raw)
  To: notmuch; +Cc: tomi.ollila

Describe the new batch-tag format.  For notmuch-restore, rather than
half-heartedly duplicating the description, we now cite notmuch-dump.
---
 man/man1/notmuch-dump.1    |   11 +++++++----
 man/man1/notmuch-restore.1 |    6 ++----
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/man/man1/notmuch-dump.1 b/man/man1/notmuch-dump.1
index 770b00f..7bd6def 100644
--- a/man/man1/notmuch-dump.1
+++ b/man/man1/notmuch-dump.1
@@ -64,13 +64,16 @@ and tags containing whitespace or non-\fBascii\fR(7) characters.
 Each line has the form
 
 .RS 4
-.RI "+<" "encoded-tag" "> " "" "+<" "encoded-tag" "> ... -- " "" " id:<" encoded-message-id >
+.RI "+<" "encoded-tag" "> " "" "+<" "encoded-tag" "> ... -- " "" " id:<" quoted-message-id >
 
-where encoded means that every byte not matching the regex
+Tags are hex-encoded by replacing every byte not matching the regex
 .B [A-Za-z0-9@=.,_+-]
-is replace by
+with
 .B %nn
-where nn is the two digit hex encoding.
+where nn is the two digit hex encoding.  The message ID is a valid Xapian
+query, quoted using Xapian boolean term quoting rules: if the ID contains
+whitespace or a close paren or starts with a double quote, it must be
+enclosed in double quotes and double quotes inside the ID must be doubled.
 The astute reader will notice this is a special case of the batch input
 format for \fBnotmuch-tag\fR(1); note that the single message-id query is
 mandatory for \fBnotmuch-restore\fR(1).
diff --git a/man/man1/notmuch-restore.1 b/man/man1/notmuch-restore.1
index 6bba628..78fef52 100644
--- a/man/man1/notmuch-restore.1
+++ b/man/man1/notmuch-restore.1
@@ -57,10 +57,8 @@ sup calls them).
 The
 .B batch-tag
 dump format is intended to more robust against malformed message-ids
-and tags containing whitespace or non-\fBascii\fR(7) characters.  This
-format hex-escapes all characters those outside of a small character
-set, intended to be suitable for e.g. pathnames in most UNIX-like
-systems.
+and tags containing whitespace or non-\fBascii\fR(7) characters.  See
+\fBnotmuch-dump\fR(1) for details on this format.
 
 .B "notmuch restore"
 updates the maildir flags according to tag changes if the
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v5 0/6] Use Xapian query syntax for batch-tag dump/restore
  2013-01-06 20:22 [PATCH v5 0/6] Use Xapian query syntax for batch-tag dump/restore Austin Clements
                   ` (5 preceding siblings ...)
  2013-01-06 20:22 ` [PATCH v5 6/6] man: Update notmuch-dump(1) and notmuch-restore(1) Austin Clements
@ 2013-01-06 22:01 ` Jani Nikula
  2013-01-06 22:05   ` Mark Walters
  2013-01-07  2:52 ` David Bremner
  7 siblings, 1 reply; 10+ messages in thread
From: Jani Nikula @ 2013-01-06 22:01 UTC (permalink / raw)
  To: Austin Clements, notmuch; +Cc: tomi.ollila

On Sun, 06 Jan 2013, Austin Clements <amdragon@MIT.EDU> wrote:
> This obsoletes
>
>   id:1356936162-2589-1-git-send-email-amdragon@mit.edu
>
> v5 should address all of the comments on v4 except those I
> specifically replied to (via the ML or IRC).  It also adds a new patch
> at the beginning that makes missing message IDs non-fatal in restore,
> like they were in 0.14.  This patch can be pushed separately; it's in
> this series because later tests rely on it.

The series LGMT,
Jani.

>
> The diff from v4 follows.
>
> diff --git a/notmuch-dump.c b/notmuch-dump.c
> index bf01a39..a3244e0 100644
> --- a/notmuch-dump.c
> +++ b/notmuch-dump.c
> @@ -103,6 +103,18 @@ notmuch_dump_command (unused (void *ctx), int argc, char *argv[])
>  	message = notmuch_messages_get (messages);
>  	message_id = notmuch_message_get_message_id (message);
>  
> +	if (output_format == DUMP_FORMAT_BATCH_TAG &&
> +	    strchr (message_id, '\n')) {
> +	    /* This will produce a line break in the output, which
> +	     * would be difficult to handle in tools.  However, it's
> +	     * also impossible to produce an email containing a line
> +	     * break in a message ID because of unfolding, so we can
> +	     * safely disallow it. */
> +	    fprintf (stderr, "Warning: skipping message id containing line break: \"%s\"\n", message_id);
> +	    notmuch_message_destroy (message);
> +	    continue;
> +	}
> +
>  	if (output_format == DUMP_FORMAT_SUP) {
>  	    fprintf (output, "%s (", message_id);
>  	}
> @@ -133,19 +145,10 @@ notmuch_dump_command (unused (void *ctx), int argc, char *argv[])
>  	if (output_format == DUMP_FORMAT_SUP) {
>  	    fputs (")\n", output);
>  	} else {
> -	    if (strchr (message_id, '\n')) {
> -		/* This will produce a line break in the output, which
> -		 * would be difficult to handle in tools.  However,
> -		 * it's also impossible to produce an email containing
> -		 * a line break in a message ID because of unfolding,
> -		 * so we can safely disallow it. */
> -		fprintf (stderr, "Error: cannot dump message id containing line break: %s\n", message_id);
> -		return 1;
> -	    }
>  	    if (make_boolean_term (notmuch, "id", message_id,
>  				   &buffer, &buffer_size)) {
> -		    fprintf (stderr, "Error: failed to quote message id %s\n",
> -			     message_id);
> +		    fprintf (stderr, "Error quoting message id %s: %s\n",
> +			     message_id, strerror (errno));
>  		    return 1;
>  	    }
>  	    fprintf (output, " -- %s\n", buffer);
> diff --git a/notmuch-restore.c b/notmuch-restore.c
> index 77a4c27..81d4d98 100644
> --- a/notmuch-restore.c
> +++ b/notmuch-restore.c
> @@ -26,7 +26,8 @@
>  static regex_t regex;
>  
>  /* Non-zero return indicates an error in retrieving the message,
> - * or in applying the tags.
> + * or in applying the tags.  Missing messages are reported, but not
> + * considered errors.
>   */
>  static int
>  tag_message (unused (void *ctx),
> @@ -40,13 +41,17 @@ tag_message (unused (void *ctx),
>      int ret = 0;
>  
>      status = notmuch_database_find_message (notmuch, message_id, &message);
> -    if (status || message == NULL) {
> -	fprintf (stderr, "Warning: cannot apply tags to %smessage: %s\n",
> -		 message ? "" : "missing ", message_id);
> -	if (status)
> -	    fprintf (stderr, "%s\n", notmuch_status_to_string (status));
> +    if (status) {
> +	fprintf (stderr, "Error applying tags to message %s: %s\n",
> +		 message_id, notmuch_status_to_string (status));
>  	return 1;
>      }
> +    if (message == NULL) {
> +	fprintf (stderr, "Warning: cannot apply tags to missing message: %s\n",
> +		 message_id);
> +	/* We consider this a non-fatal error. */
> +	return 0;
> +    }
>  
>      /* In order to detect missing messages, this check/optimization is
>       * intentionally done *after* first finding the message. */
> @@ -222,12 +227,17 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
>  	    if (ret == 0) {
>  		ret = parse_boolean_term (line_ctx, query_string,
>  					  &prefix, &term);
> -		if (ret) {
> -		    fprintf (stderr, "Warning: cannot parse query: %s\n",
> -			     query_string);
> +		if (ret && errno == EINVAL) {
> +		    fprintf (stderr, "Warning: cannot parse query: %s (skipping)\n", query_string);
>  		    continue;
> +		} else if (ret) {
> +		    /* This is more fatal (e.g., out of memory) */
> +		    fprintf (stderr, "Error parsing query: %s\n",
> +			     strerror (errno));
> +		    ret = 1;
> +		    break;
>  		} else if (strcmp ("id", prefix) != 0) {
> -		    fprintf (stderr, "Warning: not an id query: %s\n", query_string);
> +		    fprintf (stderr, "Warning: not an id query: %s (skipping)\n", query_string);
>  		    continue;
>  		}
>  		query_string = term;
> diff --git a/test/dump-restore b/test/dump-restore
> index f9ae5b3..f076c12 100755
> --- a/test/dump-restore
> +++ b/test/dump-restore
> @@ -202,18 +202,32 @@ a
>  + +e -- id:20091117232137.GA7669@griffis1.net
>  # valid id, but warning about missing message
>  +e id:missing_message_id
> +# exercise parser
> ++e -- id:some)stuff
> ++e -- id:some stuff
> ++e -- id:some"stuff
> ++e -- id:"a_message_id_with""_a_quote"
> ++e -- id:"a message id with spaces"
> ++e --  id:an_id_with_leading_and_trailing_ws \
> +
>  EOF
>  
>  cat <<EOF > EXPECTED
> -Warning: cannot parse query: a
> +Warning: cannot parse query: a (skipping)
>  Warning: no query string [+0]
>  Warning: no query string [+a +b]
>  Warning: missing query string [+a +b ]
>  Warning: no query string after -- [+c +d --]
>  Warning: hex decoding of tag %zz failed [+%zz -- id:whatever]
> -Warning: cannot parse query: id:"
> -Warning: not an id query: tag:abc
> +Warning: cannot parse query: id:" (skipping)
> +Warning: not an id query: tag:abc (skipping)
>  Warning: cannot apply tags to missing message: missing_message_id
> +Warning: cannot parse query: id:some)stuff (skipping)
> +Warning: cannot parse query: id:some stuff (skipping)
> +Warning: cannot apply tags to missing message: some"stuff
> +Warning: cannot apply tags to missing message: a_message_id_with"_a_quote
> +Warning: cannot apply tags to missing message: a message id with spaces
> +Warning: cannot apply tags to missing message: an_id_with_leading_and_trailing_ws
>  EOF
>  
>  test_expect_equal_file EXPECTED OUTPUT
> diff --git a/util/string-util.c b/util/string-util.c
> index 52c7781..aba9aa8 100644
> --- a/util/string-util.c
> +++ b/util/string-util.c
> @@ -23,6 +23,7 @@
>  #include "talloc.h"
>  
>  #include <ctype.h>
> +#include <errno.h>
>  
>  char *
>  strtok_len (char *s, const char *delim, size_t *len)
> @@ -36,6 +37,12 @@ strtok_len (char *s, const char *delim, size_t *len)
>      return *len ? s : NULL;
>  }
>  
> +static int
> +is_unquoted_terminator (unsigned char c)
> +{
> +    return c == 0 || c <= ' ' || c == ')';
> +}
> +
>  int
>  make_boolean_term (void *ctx, const char *prefix, const char *term,
>  		   char **buf, size_t *len)
> @@ -49,7 +56,8 @@ make_boolean_term (void *ctx, const char *prefix, const char *term,
>       * containing a quote, even though it only matters at the
>       * beginning, and anything containing non-ASCII text. */
>      for (in = term; *in && !need_quoting; in++)
> -	if (*in <= ' ' || *in == ')' || *in == '"' || (unsigned char)*in > 127)
> +	if (is_unquoted_terminator (*in) || *in == '"'
> +	    || (unsigned char)*in > 127)
>  	    need_quoting = 1;
>  
>      if (need_quoting)
> @@ -67,8 +75,10 @@ make_boolean_term (void *ctx, const char *prefix, const char *term,
>  	*buf = talloc_realloc (ctx, *buf, char, *len);
>      }
>  
> -    if (! *buf)
> -	return 1;
> +    if (! *buf) {
> +	errno = ENOMEM;
> +	return -1;
> +    }
>  
>      out = *buf;
>  
> @@ -102,7 +112,7 @@ make_boolean_term (void *ctx, const char *prefix, const char *term,
>  static const char*
>  skip_space (const char *str)
>  {
> -    while (*str && isspace (*str))
> +    while (*str && isspace ((unsigned char) *str))
>  	++str;
>      return str;
>  }
> @@ -111,6 +121,7 @@ int
>  parse_boolean_term (void *ctx, const char *str,
>  		    char **prefix_out, char **term_out)
>  {
> +    int err = EINVAL;
>      *prefix_out = *term_out = NULL;
>  
>      /* Parse prefix */
> @@ -119,12 +130,20 @@ parse_boolean_term (void *ctx, const char *str,
>      if (! pos)
>  	goto FAIL;
>      *prefix_out = talloc_strndup (ctx, str, pos - str);
> +    if (! *prefix_out) {
> +	err = ENOMEM;
> +	goto FAIL;
> +    }
>      ++pos;
>  
>      /* Implement de-quoting compatible with make_boolean_term. */
>      if (*pos == '"') {
>  	char *out = talloc_array (ctx, char, strlen (pos));
>  	int closed = 0;
> +	if (! out) {
> +	    err = ENOMEM;
> +	    goto FAIL;
> +	}
>  	*term_out = out;
>  	/* Skip the opening quote, find the closing quote, and
>  	 * un-double doubled internal quotes. */
> @@ -148,18 +167,25 @@ parse_boolean_term (void *ctx, const char *str,
>      } else {
>  	const char *start = pos;
>  	/* Check for text after the boolean term. */
> -	while (*pos > ' ' && *pos != ')')
> +	while (! is_unquoted_terminator (*pos))
>  	    ++pos;
> -	if (*skip_space (pos))
> +	if (*skip_space (pos)) {
> +	    err = EINVAL;
>  	    goto FAIL;
> +	}
>  	/* No trailing text; dup the string so the caller can free
>  	 * it. */
>  	*term_out = talloc_strndup (ctx, start, pos - start);
> +	if (! *term_out) {
> +	    err = ENOMEM;
> +	    goto FAIL;
> +	}
>      }
>      return 0;
>  
>   FAIL:
>      talloc_free (*prefix_out);
>      talloc_free (*term_out);
> -    return 1;
> +    errno = err;
> +    return -1;
>  }
> diff --git a/util/string-util.h b/util/string-util.h
> index 8b9fe50..0194607 100644
> --- a/util/string-util.h
> +++ b/util/string-util.h
> @@ -28,7 +28,8 @@ char *strtok_len (char *s, const char *delim, size_t *len);
>   * can be parsed by parse_boolean_term.
>   *
>   * Output is into buf; it may be talloc_realloced.
> - * Return: 0 on success, non-zero on memory allocation failure.
> + * Return: 0 on success, -1 on error.  errno will be set to ENOMEM if
> + * there is an allocation failure.
>   */
>  int make_boolean_term (void *talloc_ctx, const char *prefix, const char *term,
>  		       char **buf, size_t *len);
> @@ -42,7 +43,8 @@ int make_boolean_term (void *talloc_ctx, const char *prefix, const char *term,
>   * of the quoting styles supported by Xapian (and hence notmuch).
>   * *prefix_out and *term_out will be talloc'd with context ctx.
>   *
> - * Return: 0 on success, non-zero on parse error.
> + * Return: 0 on success, -1 on error.  errno will be set to EINVAL if
> + * there is a parse error or ENOMEM if there is an allocation failure.
>   */
>  int
>  parse_boolean_term (void *ctx, const char *str,

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v5 0/6] Use Xapian query syntax for batch-tag dump/restore
  2013-01-06 22:01 ` [PATCH v5 0/6] Use Xapian query syntax for batch-tag dump/restore Jani Nikula
@ 2013-01-06 22:05   ` Mark Walters
  0 siblings, 0 replies; 10+ messages in thread
From: Mark Walters @ 2013-01-06 22:05 UTC (permalink / raw)
  To: Jani Nikula, Austin Clements, notmuch; +Cc: tomi.ollila


LGTM too +1

Mark

On Sun, 06 Jan 2013, Jani Nikula <jani@nikula.org> wrote:
> On Sun, 06 Jan 2013, Austin Clements <amdragon@MIT.EDU> wrote:
>> This obsoletes
>>
>>   id:1356936162-2589-1-git-send-email-amdragon@mit.edu
>>
>> v5 should address all of the comments on v4 except those I
>> specifically replied to (via the ML or IRC).  It also adds a new patch
>> at the beginning that makes missing message IDs non-fatal in restore,
>> like they were in 0.14.  This patch can be pushed separately; it's in
>> this series because later tests rely on it.
>
> The series LGMT,
> Jani.
>
>>
>> The diff from v4 follows.
>>
>> diff --git a/notmuch-dump.c b/notmuch-dump.c
>> index bf01a39..a3244e0 100644
>> --- a/notmuch-dump.c
>> +++ b/notmuch-dump.c
>> @@ -103,6 +103,18 @@ notmuch_dump_command (unused (void *ctx), int argc, char *argv[])
>>  	message = notmuch_messages_get (messages);
>>  	message_id = notmuch_message_get_message_id (message);
>>  
>> +	if (output_format == DUMP_FORMAT_BATCH_TAG &&
>> +	    strchr (message_id, '\n')) {
>> +	    /* This will produce a line break in the output, which
>> +	     * would be difficult to handle in tools.  However, it's
>> +	     * also impossible to produce an email containing a line
>> +	     * break in a message ID because of unfolding, so we can
>> +	     * safely disallow it. */
>> +	    fprintf (stderr, "Warning: skipping message id containing line break: \"%s\"\n", message_id);
>> +	    notmuch_message_destroy (message);
>> +	    continue;
>> +	}
>> +
>>  	if (output_format == DUMP_FORMAT_SUP) {
>>  	    fprintf (output, "%s (", message_id);
>>  	}
>> @@ -133,19 +145,10 @@ notmuch_dump_command (unused (void *ctx), int argc, char *argv[])
>>  	if (output_format == DUMP_FORMAT_SUP) {
>>  	    fputs (")\n", output);
>>  	} else {
>> -	    if (strchr (message_id, '\n')) {
>> -		/* This will produce a line break in the output, which
>> -		 * would be difficult to handle in tools.  However,
>> -		 * it's also impossible to produce an email containing
>> -		 * a line break in a message ID because of unfolding,
>> -		 * so we can safely disallow it. */
>> -		fprintf (stderr, "Error: cannot dump message id containing line break: %s\n", message_id);
>> -		return 1;
>> -	    }
>>  	    if (make_boolean_term (notmuch, "id", message_id,
>>  				   &buffer, &buffer_size)) {
>> -		    fprintf (stderr, "Error: failed to quote message id %s\n",
>> -			     message_id);
>> +		    fprintf (stderr, "Error quoting message id %s: %s\n",
>> +			     message_id, strerror (errno));
>>  		    return 1;
>>  	    }
>>  	    fprintf (output, " -- %s\n", buffer);
>> diff --git a/notmuch-restore.c b/notmuch-restore.c
>> index 77a4c27..81d4d98 100644
>> --- a/notmuch-restore.c
>> +++ b/notmuch-restore.c
>> @@ -26,7 +26,8 @@
>>  static regex_t regex;
>>  
>>  /* Non-zero return indicates an error in retrieving the message,
>> - * or in applying the tags.
>> + * or in applying the tags.  Missing messages are reported, but not
>> + * considered errors.
>>   */
>>  static int
>>  tag_message (unused (void *ctx),
>> @@ -40,13 +41,17 @@ tag_message (unused (void *ctx),
>>      int ret = 0;
>>  
>>      status = notmuch_database_find_message (notmuch, message_id, &message);
>> -    if (status || message == NULL) {
>> -	fprintf (stderr, "Warning: cannot apply tags to %smessage: %s\n",
>> -		 message ? "" : "missing ", message_id);
>> -	if (status)
>> -	    fprintf (stderr, "%s\n", notmuch_status_to_string (status));
>> +    if (status) {
>> +	fprintf (stderr, "Error applying tags to message %s: %s\n",
>> +		 message_id, notmuch_status_to_string (status));
>>  	return 1;
>>      }
>> +    if (message == NULL) {
>> +	fprintf (stderr, "Warning: cannot apply tags to missing message: %s\n",
>> +		 message_id);
>> +	/* We consider this a non-fatal error. */
>> +	return 0;
>> +    }
>>  
>>      /* In order to detect missing messages, this check/optimization is
>>       * intentionally done *after* first finding the message. */
>> @@ -222,12 +227,17 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
>>  	    if (ret == 0) {
>>  		ret = parse_boolean_term (line_ctx, query_string,
>>  					  &prefix, &term);
>> -		if (ret) {
>> -		    fprintf (stderr, "Warning: cannot parse query: %s\n",
>> -			     query_string);
>> +		if (ret && errno == EINVAL) {
>> +		    fprintf (stderr, "Warning: cannot parse query: %s (skipping)\n", query_string);
>>  		    continue;
>> +		} else if (ret) {
>> +		    /* This is more fatal (e.g., out of memory) */
>> +		    fprintf (stderr, "Error parsing query: %s\n",
>> +			     strerror (errno));
>> +		    ret = 1;
>> +		    break;
>>  		} else if (strcmp ("id", prefix) != 0) {
>> -		    fprintf (stderr, "Warning: not an id query: %s\n", query_string);
>> +		    fprintf (stderr, "Warning: not an id query: %s (skipping)\n", query_string);
>>  		    continue;
>>  		}
>>  		query_string = term;
>> diff --git a/test/dump-restore b/test/dump-restore
>> index f9ae5b3..f076c12 100755
>> --- a/test/dump-restore
>> +++ b/test/dump-restore
>> @@ -202,18 +202,32 @@ a
>>  + +e -- id:20091117232137.GA7669@griffis1.net
>>  # valid id, but warning about missing message
>>  +e id:missing_message_id
>> +# exercise parser
>> ++e -- id:some)stuff
>> ++e -- id:some stuff
>> ++e -- id:some"stuff
>> ++e -- id:"a_message_id_with""_a_quote"
>> ++e -- id:"a message id with spaces"
>> ++e --  id:an_id_with_leading_and_trailing_ws \
>> +
>>  EOF
>>  
>>  cat <<EOF > EXPECTED
>> -Warning: cannot parse query: a
>> +Warning: cannot parse query: a (skipping)
>>  Warning: no query string [+0]
>>  Warning: no query string [+a +b]
>>  Warning: missing query string [+a +b ]
>>  Warning: no query string after -- [+c +d --]
>>  Warning: hex decoding of tag %zz failed [+%zz -- id:whatever]
>> -Warning: cannot parse query: id:"
>> -Warning: not an id query: tag:abc
>> +Warning: cannot parse query: id:" (skipping)
>> +Warning: not an id query: tag:abc (skipping)
>>  Warning: cannot apply tags to missing message: missing_message_id
>> +Warning: cannot parse query: id:some)stuff (skipping)
>> +Warning: cannot parse query: id:some stuff (skipping)
>> +Warning: cannot apply tags to missing message: some"stuff
>> +Warning: cannot apply tags to missing message: a_message_id_with"_a_quote
>> +Warning: cannot apply tags to missing message: a message id with spaces
>> +Warning: cannot apply tags to missing message: an_id_with_leading_and_trailing_ws
>>  EOF
>>  
>>  test_expect_equal_file EXPECTED OUTPUT
>> diff --git a/util/string-util.c b/util/string-util.c
>> index 52c7781..aba9aa8 100644
>> --- a/util/string-util.c
>> +++ b/util/string-util.c
>> @@ -23,6 +23,7 @@
>>  #include "talloc.h"
>>  
>>  #include <ctype.h>
>> +#include <errno.h>
>>  
>>  char *
>>  strtok_len (char *s, const char *delim, size_t *len)
>> @@ -36,6 +37,12 @@ strtok_len (char *s, const char *delim, size_t *len)
>>      return *len ? s : NULL;
>>  }
>>  
>> +static int
>> +is_unquoted_terminator (unsigned char c)
>> +{
>> +    return c == 0 || c <= ' ' || c == ')';
>> +}
>> +
>>  int
>>  make_boolean_term (void *ctx, const char *prefix, const char *term,
>>  		   char **buf, size_t *len)
>> @@ -49,7 +56,8 @@ make_boolean_term (void *ctx, const char *prefix, const char *term,
>>       * containing a quote, even though it only matters at the
>>       * beginning, and anything containing non-ASCII text. */
>>      for (in = term; *in && !need_quoting; in++)
>> -	if (*in <= ' ' || *in == ')' || *in == '"' || (unsigned char)*in > 127)
>> +	if (is_unquoted_terminator (*in) || *in == '"'
>> +	    || (unsigned char)*in > 127)
>>  	    need_quoting = 1;
>>  
>>      if (need_quoting)
>> @@ -67,8 +75,10 @@ make_boolean_term (void *ctx, const char *prefix, const char *term,
>>  	*buf = talloc_realloc (ctx, *buf, char, *len);
>>      }
>>  
>> -    if (! *buf)
>> -	return 1;
>> +    if (! *buf) {
>> +	errno = ENOMEM;
>> +	return -1;
>> +    }
>>  
>>      out = *buf;
>>  
>> @@ -102,7 +112,7 @@ make_boolean_term (void *ctx, const char *prefix, const char *term,
>>  static const char*
>>  skip_space (const char *str)
>>  {
>> -    while (*str && isspace (*str))
>> +    while (*str && isspace ((unsigned char) *str))
>>  	++str;
>>      return str;
>>  }
>> @@ -111,6 +121,7 @@ int
>>  parse_boolean_term (void *ctx, const char *str,
>>  		    char **prefix_out, char **term_out)
>>  {
>> +    int err = EINVAL;
>>      *prefix_out = *term_out = NULL;
>>  
>>      /* Parse prefix */
>> @@ -119,12 +130,20 @@ parse_boolean_term (void *ctx, const char *str,
>>      if (! pos)
>>  	goto FAIL;
>>      *prefix_out = talloc_strndup (ctx, str, pos - str);
>> +    if (! *prefix_out) {
>> +	err = ENOMEM;
>> +	goto FAIL;
>> +    }
>>      ++pos;
>>  
>>      /* Implement de-quoting compatible with make_boolean_term. */
>>      if (*pos == '"') {
>>  	char *out = talloc_array (ctx, char, strlen (pos));
>>  	int closed = 0;
>> +	if (! out) {
>> +	    err = ENOMEM;
>> +	    goto FAIL;
>> +	}
>>  	*term_out = out;
>>  	/* Skip the opening quote, find the closing quote, and
>>  	 * un-double doubled internal quotes. */
>> @@ -148,18 +167,25 @@ parse_boolean_term (void *ctx, const char *str,
>>      } else {
>>  	const char *start = pos;
>>  	/* Check for text after the boolean term. */
>> -	while (*pos > ' ' && *pos != ')')
>> +	while (! is_unquoted_terminator (*pos))
>>  	    ++pos;
>> -	if (*skip_space (pos))
>> +	if (*skip_space (pos)) {
>> +	    err = EINVAL;
>>  	    goto FAIL;
>> +	}
>>  	/* No trailing text; dup the string so the caller can free
>>  	 * it. */
>>  	*term_out = talloc_strndup (ctx, start, pos - start);
>> +	if (! *term_out) {
>> +	    err = ENOMEM;
>> +	    goto FAIL;
>> +	}
>>      }
>>      return 0;
>>  
>>   FAIL:
>>      talloc_free (*prefix_out);
>>      talloc_free (*term_out);
>> -    return 1;
>> +    errno = err;
>> +    return -1;
>>  }
>> diff --git a/util/string-util.h b/util/string-util.h
>> index 8b9fe50..0194607 100644
>> --- a/util/string-util.h
>> +++ b/util/string-util.h
>> @@ -28,7 +28,8 @@ char *strtok_len (char *s, const char *delim, size_t *len);
>>   * can be parsed by parse_boolean_term.
>>   *
>>   * Output is into buf; it may be talloc_realloced.
>> - * Return: 0 on success, non-zero on memory allocation failure.
>> + * Return: 0 on success, -1 on error.  errno will be set to ENOMEM if
>> + * there is an allocation failure.
>>   */
>>  int make_boolean_term (void *talloc_ctx, const char *prefix, const char *term,
>>  		       char **buf, size_t *len);
>> @@ -42,7 +43,8 @@ int make_boolean_term (void *talloc_ctx, const char *prefix, const char *term,
>>   * of the quoting styles supported by Xapian (and hence notmuch).
>>   * *prefix_out and *term_out will be talloc'd with context ctx.
>>   *
>> - * Return: 0 on success, non-zero on parse error.
>> + * Return: 0 on success, -1 on error.  errno will be set to EINVAL if
>> + * there is a parse error or ENOMEM if there is an allocation failure.
>>   */
>>  int
>>  parse_boolean_term (void *ctx, const char *str,

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v5 0/6] Use Xapian query syntax for batch-tag dump/restore
  2013-01-06 20:22 [PATCH v5 0/6] Use Xapian query syntax for batch-tag dump/restore Austin Clements
                   ` (6 preceding siblings ...)
  2013-01-06 22:01 ` [PATCH v5 0/6] Use Xapian query syntax for batch-tag dump/restore Jani Nikula
@ 2013-01-07  2:52 ` David Bremner
  7 siblings, 0 replies; 10+ messages in thread
From: David Bremner @ 2013-01-07  2:52 UTC (permalink / raw)
  To: Austin Clements, notmuch; +Cc: tomi.ollila

Austin Clements <amdragon@MIT.EDU> writes:

> This obsoletes
>
>   id:1356936162-2589-1-git-send-email-amdragon@mit.edu
>
> v5 should address all of the comments on v4 except those I
> specifically replied to (via the ML or IRC).  It also adds a new patch
> at the beginning that makes missing message IDs non-fatal in restore,
> like they were in 0.14.  This patch can be pushed separately; it's in
> this series because later tests rely on it.

pushed.

d

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2013-01-07  2:52 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-01-06 20:22 [PATCH v5 0/6] Use Xapian query syntax for batch-tag dump/restore Austin Clements
2013-01-06 20:22 ` [PATCH v5 1/6] restore: Make missing messages non-fatal (again) Austin Clements
2013-01-06 20:22 ` [PATCH v5 2/6] util: Factor out boolean term quoting routine Austin Clements
2013-01-06 20:22 ` [PATCH v5 3/6] util: Function to parse boolean term queries Austin Clements
2013-01-06 20:22 ` [PATCH v5 4/6] dump: Disallow \n in message IDs Austin Clements
2013-01-06 20:22 ` [PATCH v5 5/6] dump/restore: Use Xapian queries for batch-tag format Austin Clements
2013-01-06 20:22 ` [PATCH v5 6/6] man: Update notmuch-dump(1) and notmuch-restore(1) Austin Clements
2013-01-06 22:01 ` [PATCH v5 0/6] Use Xapian query syntax for batch-tag dump/restore Jani Nikula
2013-01-06 22:05   ` Mark Walters
2013-01-07  2:52 ` David Bremner

Code repositories for project(s) associated with this public inbox

	https://yhetil.org/notmuch.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).