unofficial mirror of notmuch@notmuchmail.org
 help / color / mirror / code / Atom feed
* V3b of batch tagging/dump/restore patches
@ 2012-12-07  1:26 david
  2012-12-07  1:26 ` [Patch v3b 1/9] notmuch-dump: add --format=(batch-tag|sup) david
                   ` (8 more replies)
  0 siblings, 9 replies; 21+ messages in thread
From: david @ 2012-12-07  1:26 UTC (permalink / raw)
  To: notmuch

Here is a second piece of the tagging/dump/restore series.

it obsoletes 8 of the patches in the series 

   id:1353792017-31459-1-git-send-email-david@tethera.net

 [Patch v3b 1/9] notmuch-dump: add --format=(batch-tag|sup)
 [Patch v3b 3/9] util: add string-util.[ch]
 [Patch v3b 4/9] tag-util.[ch]: New files for common tagging routines
 [Patch v3b 5/9] notmuch-restore: add support for input format
 [Patch v3b 6/9] test: update dump-restore roundtripping test for
 [Patch v3b 7/9] test: second set of dump/restore --format=batch-tag
 [Patch v3b 8/9] notmuch-{dump,restore}.1: document new format
 [Patch v3b 9/9] tag-util: optimization of tag application

It adds one new patch

 [Patch v3b 2/9] test: add sanity check for dump --format=batch-tag.

I still have to work through some of the comments on the batch
tagging; I still intend for that to follow fairly shortly, I just want
to break the series at a logical point.

Most of the changes are detailed in the following log (of changes
before I squashed them)

commit 5045d2f58beb4c3bc8e10f9419341e1c1b7748f2
Author: David Bremner <bremner@debian.org>
Date:   Tue Dec 4 13:39:48 2012 -0400

    fixup for tag-util error messages

diff --git a/tag-util.c b/tag-util.c
index 2bb8355..ea05ee5 100644
--- a/tag-util.c
+++ b/tag-util.c
@@ -86,9 +86,8 @@ parse_tag_line (void *ctx, char *line,
 
     /* tok now points to the query string */
     if (hex_decode_inplace (tok) != HEX_SUCCESS) {
-	/* FIXME: line has been modified! */
-	fprintf (stderr, "Warning: Ignoring invalid input line: %s\n",
-		 line);
+	fprintf (stderr, "Hex decoding of %s failed\n",
+		 tok);
 	return 1;
     }
 

commit 5a1d697dc408c67424d586b6377976fdfb86d4ed
Author: David Bremner <bremner@debian.org>
Date:   Tue Dec 4 13:40:09 2012 -0400

    fixup for restore error messages

diff --git a/notmuch-restore.c b/notmuch-restore.c
index 22fcd2d..e7584bb 100644
--- a/notmuch-restore.c
+++ b/notmuch-restore.c
@@ -77,7 +77,7 @@ parse_sup_line (void *ctx, char *line,
 
     rerr = xregexec (&regex, line, 3, match, 0);
     if (rerr == REG_NOMATCH) {
-	fprintf (stderr, "Warning: Ignoring invalid input line: %s\n",
+	fprintf (stderr, "Warning: Ignoring invalid sup format line: %s\n",
 		 line);
 	return 1;
     }

commit 7136d3b4e974a4ba8e247f328c71362efd0c9e11
Author: David Bremner <bremner@debian.org>
Date:   Tue Dec 4 22:35:30 2012 -0400

    fixup for tag-util error handling and permit the null set of tag operations

diff --git a/tag-util.c b/tag-util.c
index ea05ee5..de7ecc8 100644
--- a/tag-util.c
+++ b/tag-util.c
@@ -21,6 +21,8 @@ parse_tag_line (void *ctx, char *line,
 {
     char *tok = line;
     size_t tok_len = 0;
+    char *line_for_error=talloc_strdup (ctx, line);
+    int ret=0;
 
     chomp_newline (line);
 
@@ -29,8 +31,10 @@ parse_tag_line (void *ctx, char *line,
 	tok++;
 
     /* Skip empty and comment lines. */
-    if (*tok == '\0' || *tok == '#')
-	    return 1;
+    if (*tok == '\0' || *tok == '#') {
+	ret=1;
+	goto DONE;
+    }
 
     tag_op_list_reset (tag_ops);
 
@@ -51,8 +55,9 @@ parse_tag_line (void *ctx, char *line,
 
 	/* If tag is terminated by NUL, there's no query string. */
 	if (*(tok + tok_len) == '\0') {
-	    tok = NULL;
-	    break;
+	    fprintf (stderr, "no query string: %s\n", line_for_error);
+	    ret = 1;
+	    goto DONE;
 	}
 
 	/* Terminate, and start next token after terminator. */
@@ -63,37 +68,43 @@ parse_tag_line (void *ctx, char *line,
 
 	/* Maybe refuse empty tags. */
 	if (!(flags & TAG_FLAG_BE_GENEROUS) && *tag == '\0') {
-	    tok = NULL;
-	    break;
+	    fprintf (stderr, "Error: empty tag: %s\n", line_for_error);
+	    goto DONE;
 	}
 
 	/* Decode tag. */
 	if (hex_decode_inplace (tag) != HEX_SUCCESS) {
-	    tok = NULL;
-	    break;
+	    fprintf (stderr, "Hex decoding of tag %s failed\n",
+		 tag);
+	    ret = 1;
+	    goto DONE;
 	}
 
-	if (tag_op_list_append (ctx, tag_ops, tag, remove))
-	    return -1;
+	if (tag_op_list_append (ctx, tag_ops, tag, remove)) {
+	    ret = -1;
+	    goto DONE;
+	}
     }
 
-    if (tok == NULL || tag_ops->count == 0) {
-	/* FIXME: line has been modified! */
+    if (tok == NULL) {
 	fprintf (stderr, "Warning: Ignoring invalid input line: %s\n",
-		 line);
-	return 1;
+		 line_for_error);
+	ret = 1;
+	goto DONE;
     }
 
     /* tok now points to the query string */
     if (hex_decode_inplace (tok) != HEX_SUCCESS) {
-	fprintf (stderr, "Hex decoding of %s failed\n",
+	fprintf (stderr, "Hex decoding of query %s failed\n",
 		 tok);
-	return 1;
+	ret = 1;
+	goto DONE;
     }
 
     *query_string = tok;
-
-    return 0;
+ DONE:
+    talloc_free (line_for_error);
+    return ret;
 }
 
 static inline void

commit 5912c738d3683aae24ca5529839eb0513520d190
Author: David Bremner <bremner@debian.org>
Date:   Thu Dec 6 07:49:15 2012 -0400

    fixup for id:87wqx1qrmq.fsf@nikula.org; use size_t for tag_op_list count

diff --git a/tag-util.c b/tag-util.c
index de7ecc8..1a0cf53 100644
--- a/tag-util.c
+++ b/tag-util.c
@@ -1,6 +1,7 @@
 #include "string-util.h"
 #include "tag-util.h"
 #include "hex-escape.h"
+#include <assert.h>
 
 struct _tag_operation_t {
     const char *tag;
@@ -9,8 +10,8 @@ struct _tag_operation_t {
 
 struct _tag_op_list_t {
     tag_operation_t *ops;
-    int count;
-    int size;
+    size_t count;
+    size_t size;
 };
 
 int
@@ -44,7 +45,7 @@ parse_tag_line (void *ctx, char *line,
 	char *tag;
 
 	/* Optional explicit end of tags marker. */
-	if (strncmp (tok, "--", tok_len) == 0) {
+	if (tok_len == 2 && strncmp (tok, "--", tok_len) == 0) {
 	    tok = strtok_len (tok + tok_len, " ", &tok_len);
 	    break;
 	}
@@ -126,17 +127,16 @@ makes_changes (notmuch_message_t *message,
 	       tag_op_list_t *list,
 	       tag_op_flag_t flags)
 {
-
-    int i;
-
     notmuch_tags_t *tags;
     notmuch_bool_t changes = FALSE;
+    size_t i;
 
     /* First, do we delete an existing tag? */
     changes = FALSE;
     for (tags = notmuch_message_get_tags (message);
 	 ! changes && notmuch_tags_valid (tags);
 	 notmuch_tags_move_to_next (tags)) {
+
 	const char *cur_tag = notmuch_tags_get (tags);
 	int last_op =  (flags & TAG_FLAG_REMOVE_ALL) ? -1 : 0;
 
@@ -182,8 +182,7 @@ tag_op_list_apply (notmuch_message_t *message,
 		   tag_op_list_t *list,
 		   tag_op_flag_t flags)
 {
-    int i;
-
+    size_t i;
     notmuch_status_t status = 0;
     tag_operation_t *tag_ops = list->ops;
 
@@ -199,7 +198,7 @@ tag_op_list_apply (notmuch_message_t *message,
     if (flags & TAG_FLAG_REMOVE_ALL) {
 	status = notmuch_message_remove_all_tags (message);
 	if (status) {
-	    message_error (message, status, "removing all tags" );
+	    message_error (message, status, "removing all tags");
 	    return status;
 	}
     }
@@ -241,8 +240,8 @@ tag_op_list_apply (notmuch_message_t *message,
 }
 
 
-/* Array of tagging operations (add or remove), terminated with an
- * empty element. Size will be increased as necessary. */
+/* Array of tagging operations (add or remove.  Size will be increased
+ * as necessary. */
 
 tag_op_list_t *
 tag_op_list_create (void *ctx)
@@ -299,6 +298,7 @@ tag_op_list_append (void *ctx,
 notmuch_bool_t
 tag_op_list_isremove (const tag_op_list_t *list, size_t i)
 {
+    assert (i < list->count);
     return list->ops[i].remove;
 }
 
@@ -329,5 +329,6 @@ tag_op_list_size (const tag_op_list_t *list)
 const char *
 tag_op_list_tag (const tag_op_list_t *list, size_t i)
 {
+    assert (i < list->count);
     return list->ops[i].tag;
 }

commit e1af69d57854d5c6e927b0870be97e9d2e2f28ea
Author: David Bremner <bremner@debian.org>
Date:   Thu Dec 6 07:51:43 2012 -0400

    changes for id:87wqx1qrmq.fsf@nikula.org. tag_op_list_t.count -> size_t

diff --git a/tag-util.c b/tag-util.c
index 1a0cf53..ad13147 100644
--- a/tag-util.c
+++ b/tag-util.c
@@ -22,8 +22,8 @@ parse_tag_line (void *ctx, char *line,
 {
     char *tok = line;
     size_t tok_len = 0;
-    char *line_for_error=talloc_strdup (ctx, line);
-    int ret=0;
+    char *line_for_error = talloc_strdup (ctx, line);
+    int ret = 0;
 
     chomp_newline (line);
 
@@ -33,7 +33,7 @@ parse_tag_line (void *ctx, char *line,
 
     /* Skip empty and comment lines. */
     if (*tok == '\0' || *tok == '#') {
-	ret=1;
+	ret = 1;
 	goto DONE;
     }
 
@@ -68,7 +68,7 @@ parse_tag_line (void *ctx, char *line,
 	tag = tok + 1;
 
 	/* Maybe refuse empty tags. */
-	if (!(flags & TAG_FLAG_BE_GENEROUS) && *tag == '\0') {
+	if (! (flags & TAG_FLAG_BE_GENEROUS) && *tag == '\0') {
 	    fprintf (stderr, "Error: empty tag: %s\n", line_for_error);
 	    goto DONE;
 	}
@@ -76,7 +76,7 @@ parse_tag_line (void *ctx, char *line,
 	/* Decode tag. */
 	if (hex_decode_inplace (tag) != HEX_SUCCESS) {
 	    fprintf (stderr, "Hex decoding of tag %s failed\n",
-		 tag);
+		     tag);
 	    ret = 1;
 	    goto DONE;
 	}
@@ -103,7 +103,7 @@ parse_tag_line (void *ctx, char *line,
     }
 
     *query_string = tok;
- DONE:
+  DONE:
     talloc_free (line_for_error);
     return ret;
 }

commit 3f00fa4eba68876635df86ea54f60b68d172f580
Author: David Bremner <bremner@debian.org>
Date:   Thu Dec 6 08:30:58 2012 -0400

    changes for id:87zk1wd1ko.fsf@nikula.org

diff --git a/notmuch-restore.c b/notmuch-restore.c
index e7584bb..41b742f 100644
--- a/notmuch-restore.c
+++ b/notmuch-restore.c
@@ -48,11 +48,10 @@ tag_message (unused (void *ctx),
 
     /* In order to detect missing messages, this check/optimization is
      * intentionally done *after* first finding the message. */
-    if ( (flags & TAG_FLAG_REMOVE_ALL) || (tag_op_list_size (tag_ops)))
+    if ((flags & TAG_FLAG_REMOVE_ALL) || tag_op_list_size (tag_ops))
 	tag_op_list_apply (message, tag_ops, flags);
 
-    if (message)
-	notmuch_message_destroy (message);
+    notmuch_message_destroy (message);
 
     return ret;
 }
@@ -184,6 +183,12 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
     if (line_len == 0)
 	return 0;
 
+    tag_ops = tag_op_list_create (ctx);
+    if (tag_ops == NULL) {
+	fprintf (stderr, "Out of memory.\n");
+	return 1;
+    }
+
     for (p = line; *p; p++) {
 	if (*p == '(')
 	    input_format = DUMP_FORMAT_SUP;
@@ -198,28 +203,28 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
 		       REG_EXTENDED) )
 	    INTERNAL_ERROR ("compile time constant regex failed.");
 
-    tag_ops = tag_op_list_create (ctx);
-    if (tag_ops == NULL) {
-	fprintf (stderr, "Out of memory.\n");
-	return 1;
-    }
-
     do {
 	char *query_string;
 
 	if (input_format == DUMP_FORMAT_SUP) {
-	    ret =  parse_sup_line (ctx, line, &query_string, tag_ops);
+	    ret = parse_sup_line (ctx, line, &query_string, tag_ops);
 	} else {
-	    ret =  parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS,
-				   &query_string, tag_ops);
+	    ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS,
+				  &query_string, tag_ops);
 
 	    if (ret == 0) {
-		if ( strncmp ("id:", query_string, 3) != 0) {
+		if (strncmp ("id:", query_string, 3) != 0) {
 		    fprintf (stderr, "Unsupported query: %s\n", query_string);
 		    continue;
 		}
-		/* delete id: from front of string; tag_message expects a
-		 * raw message-id */
+		/* delete id: from front of string; tag_message
+		 * expects a raw message-id.
+		 *
+		 * XXX: Note that query string id:foo and bar will be
+		 * interpreted as a message id "foo and bar". This
+		 * should eventually be fixed to give a better error
+		 * message.
+		 */
 		query_string = query_string + 3;
 	    }
 	}
@@ -233,8 +238,8 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
 
     }  while ((line_len = getline (&line, &line_size, input)) != -1);
 
-
-    regfree (&regex);
+    if (input_format == DUMP_FORMAT_SUP)
+	regfree (&regex);
 
     if (line)
 	free (line);

commit 473fa928931080004706cff169c7cc9337601172
Author: David Bremner <bremner@debian.org>
Date:   Thu Dec 6 13:33:42 2012 -0400

    fixup: notmuch-restore only auto-detect in auto mode

diff --git a/notmuch-restore.c b/notmuch-restore.c
index 41b742f..ceec2d3 100644
--- a/notmuch-restore.c
+++ b/notmuch-restore.c
@@ -189,7 +189,7 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
 	return 1;
     }
 
-    for (p = line; *p; p++) {
+    for (p = line; (input_format == DUMP_FORMAT_AUTO) && *p; p++) {
 	if (*p == '(')
 	    input_format = DUMP_FORMAT_SUP;
     }

commit ee7d25521e3f6f70b3f4fb79f586a11d39efec15
Author: David Bremner <bremner@debian.org>
Date:   Thu Dec 6 19:39:37 2012 -0400

    changes for id:87wqx0d124.fsf@nikula.org; no deprecation for the moment

diff --git a/man/man1/notmuch-dump.1 b/man/man1/notmuch-dump.1
index 9f59905..770b00f 100644
--- a/man/man1/notmuch-dump.1
+++ b/man/man1/notmuch-dump.1
@@ -64,15 +64,16 @@ and tags containing whitespace or non-\fBascii\fR(7) characters.
 Each line has the form
 
 .RS 4
-.RI "+<" "encoded-tag" "> " "" "+<" "encoded-tag" "> ... -- " "" " <" encoded-message-id >
+.RI "+<" "encoded-tag" "> " "" "+<" "encoded-tag" "> ... -- " "" " id:<" encoded-message-id >
 
 where encoded means that every byte not matching the regex
-.B [A-Za-z0-9+-_@=.:,]
+.B [A-Za-z0-9@=.,_+-]
 is replace by
 .B %nn
 where nn is the two digit hex encoding.
 The astute reader will notice this is a special case of the batch input
-format for \fBnotmuch-tag\fR(1).
+format for \fBnotmuch-tag\fR(1); note that the single message-id query is
+mandatory for \fBnotmuch-restore\fR(1).
 
 .RE
 
diff --git a/man/man1/notmuch-restore.1 b/man/man1/notmuch-restore.1
index 3860829..6bba628 100644
--- a/man/man1/notmuch-restore.1
+++ b/man/man1/notmuch-restore.1
@@ -32,8 +32,8 @@ replacing each message's tags as they are read in from the dump file.
 .TP 4
 .B \-\-format=(sup|batch-tag|auto)
 
-Notmuch restore supports two plain text dump formats, with one message-id
-per line, and a list of tags.
+Notmuch restore supports two plain text dump formats, with each line
+specifying a message-id and a set of tags.
 For details of the actual formats, see \fBnotmuch-dump\fR(1).
 
 .RS 4

commit 96c383be46cdb8ceaf7ed15590ef876799d6357e
Author: David Bremner <bremner@debian.org>
Date:   Thu Dec 6 20:40:55 2012 -0400

    Changes for id:87txs4cy7v.fsf@nikula.org

diff --git a/tag-util.c b/tag-util.c
index ad13147..9ab07e9 100644
--- a/tag-util.c
+++ b/tag-util.c
@@ -140,9 +140,11 @@ makes_changes (notmuch_message_t *message,
 	const char *cur_tag = notmuch_tags_get (tags);
 	int last_op =  (flags & TAG_FLAG_REMOVE_ALL) ? -1 : 0;
 
-	for (i = 0; i < list->count; i++) {
+	/* slight contortions to count down with an unsigned index */
+	for (i = list->count; i-- > 0; /*nothing*/) {
 	    if (strcmp (cur_tag, list->ops[i].tag) == 0) {
 		last_op = list->ops[i].remove ? -1 : 1;
+		break;
 	    }
 	}
 
@@ -157,6 +159,9 @@ makes_changes (notmuch_message_t *message,
     for (i = 0; i < list->count; i++) {
 	notmuch_bool_t exists = FALSE;
 
+	if (list->ops[i].remove)
+	    continue;
+
 	for (tags = notmuch_message_get_tags (message);
 	     notmuch_tags_valid (tags);
 	     notmuch_tags_move_to_next (tags)) {
@@ -168,9 +173,11 @@ makes_changes (notmuch_message_t *message,
 	}
 	notmuch_tags_destroy (tags);
 
-	/* the following test is conservative, it's ok to think we
-	 * make changes when we don't */
-	if ( ! exists && ! list->ops[i].remove )
+	/* the following test is conservative,
+	 * in the sense it ignores cases like +foo ... -foo
+	 * but this is OK from a correctness point of view
+	 */
+	if (! exists)
 	    return TRUE;
     }
     return FALSE;

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Patch v3b 1/9] notmuch-dump: add --format=(batch-tag|sup)
  2012-12-07  1:26 V3b of batch tagging/dump/restore patches david
@ 2012-12-07  1:26 ` david
  2012-12-07 22:21   ` Jani Nikula
  2012-12-07  1:26 ` [Patch v3b 2/9] test: add sanity check for dump --format=batch-tag david
                   ` (7 subsequent siblings)
  8 siblings, 1 reply; 21+ messages in thread
From: david @ 2012-12-07  1:26 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

sup is the old format, and remains the default, at least until
restore is converted to parse this format.

Each line of the batch-tag format is modelled on the syntax of notmuch tag:
- "notmuch tag" is omitted from the front of the line
- The dump format only uses query strings of a single message-id.
- Each space seperated tag/message-id is 'hex-encoded' to remove
  trouble-making characters.
- It is permitted (and will be useful) for there to be no tags before
  the query.

In particular this format won't have the same problem with e.g. spaces
in message-ids or tags; they will be round-trip-able.
---
 dump-restore-private.h |   13 +++++++++++++
 notmuch-dump.c         |   48 ++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 55 insertions(+), 6 deletions(-)
 create mode 100644 dump-restore-private.h

diff --git a/dump-restore-private.h b/dump-restore-private.h
new file mode 100644
index 0000000..896a004
--- /dev/null
+++ b/dump-restore-private.h
@@ -0,0 +1,13 @@
+#ifndef DUMP_RESTORE_PRIVATE_H
+#define DUMP_RESTORE_PRIVATE_H
+
+#include "hex-escape.h"
+#include "command-line-arguments.h"
+
+typedef enum dump_formats {
+    DUMP_FORMAT_AUTO,
+    DUMP_FORMAT_BATCH_TAG,
+    DUMP_FORMAT_SUP
+} dump_format_t;
+
+#endif
diff --git a/notmuch-dump.c b/notmuch-dump.c
index 88f598a..d2dad40 100644
--- a/notmuch-dump.c
+++ b/notmuch-dump.c
@@ -19,6 +19,7 @@
  */
 
 #include "notmuch-client.h"
+#include "dump-restore-private.h"
 
 int
 notmuch_dump_command (unused (void *ctx), int argc, char *argv[])
@@ -43,7 +44,13 @@ notmuch_dump_command (unused (void *ctx), int argc, char *argv[])
     char *output_file_name = NULL;
     int opt_index;
 
+    int output_format = DUMP_FORMAT_SUP;
+
     notmuch_opt_desc_t options[] = {
+	{ NOTMUCH_OPT_KEYWORD, &output_format, "format", 'f',
+	  (notmuch_keyword_t []){ { "sup", DUMP_FORMAT_SUP },
+				  { "batch-tag", DUMP_FORMAT_BATCH_TAG },
+				  { 0, 0 } } },
 	{ NOTMUCH_OPT_STRING, &output_file_name, "output", 'o', 0  },
 	{ 0, 0, 0, 0, 0 }
     };
@@ -83,27 +90,56 @@ notmuch_dump_command (unused (void *ctx), int argc, char *argv[])
      */
     notmuch_query_set_sort (query, NOTMUCH_SORT_UNSORTED);
 
+    char *buffer = NULL;
+    size_t buffer_size = 0;
+
     for (messages = notmuch_query_search_messages (query);
 	 notmuch_messages_valid (messages);
 	 notmuch_messages_move_to_next (messages)) {
 	int first = 1;
+	const char *message_id;
+
 	message = notmuch_messages_get (messages);
+	message_id = notmuch_message_get_message_id (message);
 
-	fprintf (output,
-		 "%s (", notmuch_message_get_message_id (message));
+	if (output_format == DUMP_FORMAT_SUP) {
+	    fprintf (output, "%s (", message_id);
+	}
 
 	for (tags = notmuch_message_get_tags (message);
 	     notmuch_tags_valid (tags);
 	     notmuch_tags_move_to_next (tags)) {
-	    if (! first)
-		fprintf (output, " ");
+	    const char *tag_str = notmuch_tags_get (tags);
 
-	    fprintf (output, "%s", notmuch_tags_get (tags));
+	    if (! first)
+		fputs (" ", output);
 
 	    first = 0;
+
+	    if (output_format == DUMP_FORMAT_SUP) {
+		fputs (tag_str, output);
+	    } else {
+		if (hex_encode (notmuch, tag_str,
+				&buffer, &buffer_size) != HEX_SUCCESS) {
+		    fprintf (stderr, "Error: failed to hex-encode tag %s\n",
+			     tag_str);
+		    return 1;
+		}
+		fprintf (output, "+%s", buffer);
+	    }
 	}
 
-	fprintf (output, ")\n");
+	if (output_format == DUMP_FORMAT_SUP) {
+	    fputs (")\n", output);
+	} else {
+	    if (hex_encode (notmuch, message_id,
+			    &buffer, &buffer_size) != HEX_SUCCESS) {
+		    fprintf (stderr, "Error: failed to hex-encode msg-id %s\n",
+			     message_id);
+		    return 1;
+	    }
+	    fprintf (output, " -- id:%s\n", buffer);
+	}
 
 	notmuch_message_destroy (message);
     }
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Patch v3b 2/9] test: add sanity check for dump --format=batch-tag.
  2012-12-07  1:26 V3b of batch tagging/dump/restore patches david
  2012-12-07  1:26 ` [Patch v3b 1/9] notmuch-dump: add --format=(batch-tag|sup) david
@ 2012-12-07  1:26 ` david
  2012-12-07 22:26   ` Jani Nikula
  2012-12-07  1:26 ` [Patch v3b 3/9] util: add string-util.[ch] david
                   ` (6 subsequent siblings)
  8 siblings, 1 reply; 21+ messages in thread
From: david @ 2012-12-07  1:26 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

It's important this does not rely on restore, since it hasn't been
written yet.
---
 test/dump-restore |   13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/test/dump-restore b/test/dump-restore
index bf31266..b4c807f 100755
--- a/test/dump-restore
+++ b/test/dump-restore
@@ -85,6 +85,19 @@ test_begin_subtest "dump --output=outfile -- from:cworth"
 notmuch dump --output=dump-outfile-dash-inbox.actual -- from:cworth
 test_expect_equal_file dump-cworth.expected dump-outfile-dash-inbox.actual
 
+test_begin_subtest "Check for a safe set of message-ids"
+notmuch search --output=messages from:cworth | sed s/^id:// > EXPECTED
+notmuch search --output=messages from:cworth | sed s/^id:// |\
+	$TEST_DIRECTORY/hex-xcode --direction=encode > OUTPUT
+test_expect_equal_file OUTPUT EXPECTED
+
+test_begin_subtest "format=batch-tag, dump sanity check."
+notmuch dump --format=sup from:cworth | cut -f1 -d' ' | \
+    sort > EXPECTED.$test_count
+notmuch dump --format=batch-tag from:cworth | sed 's/^.*-- id://' | \
+    sort > OUTPUT.$test_count
+test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
+
 test_begin_subtest 'roundtripping random message-ids and tags'
     test_subtest_known_broken
     ${TEST_DIRECTORY}/random-corpus --config-path=${NOTMUCH_CONFIG} \
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Patch v3b 3/9] util: add string-util.[ch]
  2012-12-07  1:26 V3b of batch tagging/dump/restore patches david
  2012-12-07  1:26 ` [Patch v3b 1/9] notmuch-dump: add --format=(batch-tag|sup) david
  2012-12-07  1:26 ` [Patch v3b 2/9] test: add sanity check for dump --format=batch-tag david
@ 2012-12-07  1:26 ` david
  2012-12-07 22:30   ` Jani Nikula
  2012-12-08 10:23   ` Mark Walters
  2012-12-07  1:26 ` [Patch v3b 4/9] tag-util.[ch]: New files for common tagging routines david
                   ` (5 subsequent siblings)
  8 siblings, 2 replies; 21+ messages in thread
From: david @ 2012-12-07  1:26 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

This is to give a home to strtok_len. It's a bit silly to add a header
for one routine, but it needs to be shared between several compilation
units (or at least that's the most natural design).
---
 util/Makefile.local |    3 ++-
 util/string-util.c  |   34 ++++++++++++++++++++++++++++++++++
 util/string-util.h  |   19 +++++++++++++++++++
 3 files changed, 55 insertions(+), 1 deletion(-)
 create mode 100644 util/string-util.c
 create mode 100644 util/string-util.h

diff --git a/util/Makefile.local b/util/Makefile.local
index 3ca623e..a11e35b 100644
--- a/util/Makefile.local
+++ b/util/Makefile.local
@@ -3,7 +3,8 @@
 dir := util
 extra_cflags += -I$(srcdir)/$(dir)
 
-libutil_c_srcs := $(dir)/xutil.c $(dir)/error_util.c $(dir)/hex-escape.c
+libutil_c_srcs := $(dir)/xutil.c $(dir)/error_util.c $(dir)/hex-escape.c \
+		  $(dir)/string-util.c
 
 libutil_modules := $(libutil_c_srcs:.c=.o)
 
diff --git a/util/string-util.c b/util/string-util.c
new file mode 100644
index 0000000..44f8cd3
--- /dev/null
+++ b/util/string-util.c
@@ -0,0 +1,34 @@
+/* string-util.c -  Extra or enhanced routines for null terminated strings.
+ *
+ * Copyright (c) 2012 Jani Nikula
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/ .
+ *
+ * Author: Jani Nikula <jani@nikula.org>
+ */
+
+
+#include "string-util.h"
+
+char *
+strtok_len (char *s, const char *delim, size_t *len)
+{
+    /* skip initial delims */
+    s += strspn (s, delim);
+
+    /* length of token */
+    *len = strcspn (s, delim);
+
+    return *len ? s : NULL;
+}
diff --git a/util/string-util.h b/util/string-util.h
new file mode 100644
index 0000000..696da40
--- /dev/null
+++ b/util/string-util.h
@@ -0,0 +1,19 @@
+#ifndef _STRING_UTIL_H
+#define _STRING_UTIL_H
+
+#include <string.h>
+
+/* like strtok(3), but without state, and doesn't modify s. usage pattern:
+ *
+ * const char *tok = input;
+ * const char *delim = " \t";
+ * size_t tok_len = 0;
+ *
+ * while ((tok = strtok_len (tok + tok_len, delim, &tok_len)) != NULL) {
+ *     // do stuff with string tok of length tok_len
+ * }
+ */
+
+char *strtok_len (char *s, const char *delim, size_t *len);
+
+#endif
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Patch v3b 4/9] tag-util.[ch]: New files for common tagging routines
  2012-12-07  1:26 V3b of batch tagging/dump/restore patches david
                   ` (2 preceding siblings ...)
  2012-12-07  1:26 ` [Patch v3b 3/9] util: add string-util.[ch] david
@ 2012-12-07  1:26 ` david
  2012-12-07 22:45   ` Jani Nikula
  2012-12-08 10:50   ` Mark Walters
  2012-12-07  1:26 ` [Patch v3b 5/9] notmuch-restore: add support for input format 'batch-tag' david
                   ` (4 subsequent siblings)
  8 siblings, 2 replies; 21+ messages in thread
From: david @ 2012-12-07  1:26 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

These are meant to be shared between notmuch-tag and notmuch-restore.

The bulk of the routines implement a "tag operation list" abstract
data type act as a structured representation of a set of tag
operations (typically coming from a single tag command or line of
input).
---
 Makefile.local |    1 +
 tag-util.c     |  278 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 tag-util.h     |  119 ++++++++++++++++++++++++
 3 files changed, 398 insertions(+)
 create mode 100644 tag-util.c
 create mode 100644 tag-util.h

diff --git a/Makefile.local b/Makefile.local
index 2b91946..854867d 100644
--- a/Makefile.local
+++ b/Makefile.local
@@ -274,6 +274,7 @@ notmuch_client_srcs =		\
 	query-string.c		\
 	mime-node.c		\
 	crypto.c		\
+	tag-util.c
 
 notmuch_client_modules = $(notmuch_client_srcs:.c=.o)
 
diff --git a/tag-util.c b/tag-util.c
new file mode 100644
index 0000000..932ee7f
--- /dev/null
+++ b/tag-util.c
@@ -0,0 +1,278 @@
+#include <assert.h>
+#include "string-util.h"
+#include "tag-util.h"
+#include "hex-escape.h"
+
+#define TAG_OP_LIST_INITIAL_SIZE 10
+
+struct _tag_operation_t {
+    const char *tag;
+    notmuch_bool_t remove;
+};
+
+struct _tag_op_list_t {
+    tag_operation_t *ops;
+    size_t count;
+    size_t size;
+};
+
+int
+parse_tag_line (void *ctx, char *line,
+		tag_op_flag_t flags,
+		char **query_string,
+		tag_op_list_t *tag_ops)
+{
+    char *tok = line;
+    size_t tok_len = 0;
+    char *line_for_error = talloc_strdup (ctx, line);
+    int ret = 0;
+
+    chomp_newline (line);
+
+    /* remove leading space */
+    while (*tok == ' ' || *tok == '\t')
+	tok++;
+
+    /* Skip empty and comment lines. */
+    if (*tok == '\0' || *tok == '#') {
+	ret = 1;
+	goto DONE;
+    }
+
+    tag_op_list_reset (tag_ops);
+
+    /* Parse tags. */
+    while ((tok = strtok_len (tok + tok_len, " ", &tok_len)) != NULL) {
+	notmuch_bool_t remove;
+	char *tag;
+
+	/* Optional explicit end of tags marker. */
+	if (tok_len == 2 && strncmp (tok, "--", tok_len) == 0) {
+	    tok = strtok_len (tok + tok_len, " ", &tok_len);
+	    break;
+	}
+
+	/* Implicit end of tags. */
+	if (*tok != '-' && *tok != '+')
+	    break;
+
+	/* If tag is terminated by NUL, there's no query string. */
+	if (*(tok + tok_len) == '\0') {
+	    fprintf (stderr, "no query string: %s\n", line_for_error);
+	    ret = 1;
+	    goto DONE;
+	}
+
+	/* Terminate, and start next token after terminator. */
+	*(tok + tok_len++) = '\0';
+
+	remove = (*tok == '-');
+	tag = tok + 1;
+
+	/* Maybe refuse empty tags. */
+	if (! (flags & TAG_FLAG_BE_GENEROUS) && *tag == '\0') {
+	    fprintf (stderr, "Error: empty tag: %s\n", line_for_error);
+	    goto DONE;
+	}
+
+	/* Decode tag. */
+	if (hex_decode_inplace (tag) != HEX_SUCCESS) {
+	    fprintf (stderr, "Hex decoding of tag %s failed\n",
+		     tag);
+	    ret = 1;
+	    goto DONE;
+	}
+
+	if (tag_op_list_append (ctx, tag_ops, tag, remove)) {
+	    ret = -1;
+	    goto DONE;
+	}
+    }
+
+    if (tok == NULL) {
+	fprintf (stderr, "Warning: Ignoring invalid input line: %s\n",
+		 line_for_error);
+	ret = 1;
+	goto DONE;
+    }
+
+    /* tok now points to the query string */
+    if (hex_decode_inplace (tok) != HEX_SUCCESS) {
+	fprintf (stderr, "Hex decoding of query %s failed\n",
+		 tok);
+	ret = 1;
+	goto DONE;
+    }
+
+    *query_string = tok;
+  DONE:
+    talloc_free (line_for_error);
+    return ret;
+}
+
+static inline void
+message_error (notmuch_message_t *message,
+	       notmuch_status_t status,
+	       const char *format, ...)
+{
+    va_list va_args;
+
+    va_start (va_args, format);
+
+    vfprintf (stderr, format, va_args);
+    fprintf (stderr, "Message-ID: %s\n", notmuch_message_get_message_id (message));
+    fprintf (stderr, "Status: %s\n", notmuch_status_to_string (status));
+}
+
+notmuch_status_t
+tag_op_list_apply (notmuch_message_t *message,
+		   tag_op_list_t *list,
+		   tag_op_flag_t flags)
+{
+    size_t i;
+    notmuch_status_t status = 0;
+    tag_operation_t *tag_ops = list->ops;
+
+    status = notmuch_message_freeze (message);
+    if (status) {
+	message_error (message, status, "freezing message");
+	return status;
+    }
+
+    if (flags & TAG_FLAG_REMOVE_ALL) {
+	status = notmuch_message_remove_all_tags (message);
+	if (status) {
+	    message_error (message, status, "removing all tags");
+	    return status;
+	}
+    }
+
+    for (i = 0; i < list->count; i++) {
+	if (tag_ops[i].remove) {
+	    status = notmuch_message_remove_tag (message, tag_ops[i].tag);
+	    if (status) {
+		message_error (message, status, "removing tag %s", tag_ops[i].tag);
+		return status;
+	    }
+	} else {
+	    status = notmuch_message_add_tag (message, tag_ops[i].tag);
+	    if (status) {
+		message_error (message, status, "adding tag %s", tag_ops[i].tag);
+		return status;
+	    }
+
+	}
+    }
+
+    status = notmuch_message_thaw (message);
+    if (status) {
+	message_error (message, status, "thawing message");
+	return status;
+    }
+
+
+    if (flags & TAG_FLAG_MAILDIR_SYNC) {
+	status = notmuch_message_tags_to_maildir_flags (message);
+	if (status) {
+	    message_error (message, status, "synching tags to maildir");
+	    return status;
+	}
+    }
+
+    return NOTMUCH_STATUS_SUCCESS;
+
+}
+
+
+/* Array of tagging operations (add or remove.  Size will be increased
+ * as necessary. */
+
+tag_op_list_t *
+tag_op_list_create (void *ctx)
+{
+    tag_op_list_t *list;
+
+    list = talloc (ctx, tag_op_list_t);
+    if (list == NULL)
+	return NULL;
+
+    list->size = TAG_OP_LIST_INITIAL_SIZE;
+    list->count = 0;
+
+    list->ops = talloc_array (ctx, tag_operation_t, list->size);
+    if (list->ops == NULL)
+	return NULL;
+
+    return list;
+}
+
+
+int
+tag_op_list_append (void *ctx,
+		    tag_op_list_t *list,
+		    const char *tag,
+		    notmuch_bool_t remove)
+{
+    /* Make room if current array is full.  This should be a fairly
+     * rare case, considering the initial array size.
+     */
+
+    if (list->count == list->size) {
+	list->size *= 2;
+	list->ops = talloc_realloc (ctx, list->ops, tag_operation_t,
+				    list->size);
+	if (list->ops == NULL) {
+	    fprintf (stderr, "Out of memory.\n");
+	    return 1;
+	}
+    }
+
+    /* add the new operation */
+
+    list->ops[list->count].tag = tag;
+    list->ops[list->count].remove = remove;
+    list->count++;
+    return 0;
+}
+
+/*
+ *   Is the i'th tag operation a remove?
+ */
+
+notmuch_bool_t
+tag_op_list_isremove (const tag_op_list_t *list, size_t i)
+{
+    assert (i < list->count);
+    return list->ops[i].remove;
+}
+
+/*
+ * Reset a list to contain no operations
+ */
+
+void
+tag_op_list_reset (tag_op_list_t *list)
+{
+    list->count = 0;
+}
+
+/*
+ * Return the number of operations in a list
+ */
+
+size_t
+tag_op_list_size (const tag_op_list_t *list)
+{
+    return list->count;
+}
+
+/*
+ *   return the i'th tag in the list
+ */
+
+const char *
+tag_op_list_tag (const tag_op_list_t *list, size_t i)
+{
+    assert (i < list->count);
+    return list->ops[i].tag;
+}
diff --git a/tag-util.h b/tag-util.h
new file mode 100644
index 0000000..df05d72
--- /dev/null
+++ b/tag-util.h
@@ -0,0 +1,119 @@
+#ifndef _TAG_UTIL_H
+#define _TAG_UTIL_H
+
+#include "notmuch-client.h"
+
+typedef struct _tag_operation_t tag_operation_t;
+typedef struct _tag_op_list_t tag_op_list_t;
+
+/* Use powers of 2 */
+typedef enum {
+    TAG_FLAG_NONE = 0,
+
+    /* Operations are synced to maildir, if possible.
+     */
+    TAG_FLAG_MAILDIR_SYNC = (1 << 0),
+
+    /* Remove all tags from message before applying list.
+     */
+    TAG_FLAG_REMOVE_ALL = (1 << 1),
+
+    /* Don't try to avoid database operations. Useful when we
+     * know that message passed needs these operations.
+      */
+    TAG_FLAG_PRE_OPTIMIZED = (1 << 2),
+
+    /* Accept strange tags that might be user error;
+     * intended for use by notmuch-restore.
+     */
+    TAG_FLAG_BE_GENEROUS = (1 << 3)
+
+} tag_op_flag_t;
+
+/* Parse a string of the following format:
+ *
+ * +<tag>|-<tag> [...] [--] <search-terms>
+ *
+ * Each line is interpreted similarly to "notmuch tag" command line
+ * arguments. The delimiter is one or more spaces ' '. Any characters
+ * in <tag> and <search-terms> MAY be hex encoded with %NN where NN is
+ * the hexadecimal value of the character. Any ' ' and '%' characters
+ * in <tag> and <search-terms> MUST be hex encoded (using %20 and %25,
+ * respectively). Any characters that are not part of <tag> or
+ * <search-terms> MUST NOT be hex encoded.
+ *
+ * Leading and trailing space ' ' is ignored. Empty lines and lines
+ * beginning with '#' are ignored.
+ *
+ * Returns: 0 for OK, 1 for skipped line, -1 for fatal(ish) error.
+ *
+ * Output Parameters:
+ *	ops	contains a list of tag operations
+ *	query_str the search terms.
+ */
+int
+parse_tag_line (void *ctx, char *line,
+		tag_op_flag_t flags,
+		char **query_str, tag_op_list_t *ops);
+
+/*
+ * Create an empty list of tag operations
+ *
+ * ctx is passed to talloc
+ */
+
+tag_op_list_t
+*tag_op_list_create (void *ctx);
+
+/*
+ * Add a tag operation (delete iff remove == TRUE) to a list.
+ * The list is expanded as necessary.
+ */
+
+int
+tag_op_list_append (void *ctx,
+		    tag_op_list_t *list,
+		    const char *tag,
+		    notmuch_bool_t remove);
+
+/*
+ * Apply a list of tag operations, in order, to a given message.
+ *
+ * Flags can be bitwise ORed; see enum above for possibilies.
+ */
+
+notmuch_status_t
+tag_op_list_apply (notmuch_message_t *message,
+		   tag_op_list_t *tag_ops,
+		   tag_op_flag_t flags);
+
+/*
+ * Return the number of operations in a list
+ */
+
+size_t
+tag_op_list_size (const tag_op_list_t *list);
+
+/*
+ * Reset a list to contain no operations
+ */
+
+void
+tag_op_list_reset (tag_op_list_t *list);
+
+
+ /*
+  *   return the i'th tag in the list
+  */
+
+const char *
+tag_op_list_tag (const tag_op_list_t *list, size_t i);
+
+/*
+ *   Is the i'th tag operation a remove?
+ */
+
+notmuch_bool_t
+tag_op_list_isremove (const tag_op_list_t *list, size_t i);
+
+#endif
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Patch v3b 5/9] notmuch-restore: add support for input format 'batch-tag'
  2012-12-07  1:26 V3b of batch tagging/dump/restore patches david
                   ` (3 preceding siblings ...)
  2012-12-07  1:26 ` [Patch v3b 4/9] tag-util.[ch]: New files for common tagging routines david
@ 2012-12-07  1:26 ` david
  2012-12-07 22:50   ` Jani Nikula
  2012-12-08 11:43   ` Mark Walters
  2012-12-07  1:26 ` [Patch v3b 6/9] test: update dump-restore roundtripping test for batch-tag format david
                   ` (3 subsequent siblings)
  8 siblings, 2 replies; 21+ messages in thread
From: david @ 2012-12-07  1:26 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

This can be enabled with the new --format=batch-tag command line
option to "notmuch restore". The input must consist of lines of the
format:

    +<tag>|-<tag> [...] [--] id:<msg-id>

Each line is interpreted similarly to "notmuch tag" command line
arguments. The delimiter is one or more spaces ' '. Any characters in
<tag> and <search-terms> MAY be hex encoded with %NN where NN is the
hexadecimal value of the character. Any ' ' and '%' characters in
<tag> and <msg-id> MUST be hex encoded (using %20 and %25,
respectively). Any characters that are not part of <tag> or
<search-terms> MUST NOT be hex encoded.

Leading and trailing space ' ' is ignored. Empty lines and lines
beginning with '#' are ignored.

Commit message mainly stolen from Jani's batch tagging commit, to
follow.
---
 notmuch-restore.c |  206 ++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 131 insertions(+), 75 deletions(-)

diff --git a/notmuch-restore.c b/notmuch-restore.c
index f03dcac..ceec2d3 100644
--- a/notmuch-restore.c
+++ b/notmuch-restore.c
@@ -19,18 +19,22 @@
  */
 
 #include "notmuch-client.h"
+#include "dump-restore-private.h"
+#include "tag-util.h"
+#include "string-util.h"
+
+static volatile sig_atomic_t interrupted;
+static regex_t regex;
 
 static int
-tag_message (notmuch_database_t *notmuch, const char *message_id,
-	     char *file_tags, notmuch_bool_t remove_all,
-	     notmuch_bool_t synchronize_flags)
+tag_message (unused (void *ctx),
+	     notmuch_database_t *notmuch,
+	     const char *message_id,
+	     tag_op_list_t *tag_ops,
+	     tag_op_flag_t flags)
 {
     notmuch_status_t status;
-    notmuch_tags_t *db_tags;
-    char *db_tags_str;
     notmuch_message_t *message = NULL;
-    const char *tag;
-    char *next;
     int ret = 0;
 
     status = notmuch_database_find_message (notmuch, message_id, &message);
@@ -44,55 +48,62 @@ tag_message (notmuch_database_t *notmuch, const char *message_id,
 
     /* In order to detect missing messages, this check/optimization is
      * intentionally done *after* first finding the message. */
-    if (! remove_all && (file_tags == NULL || *file_tags == '\0'))
-	goto DONE;
-
-    db_tags_str = NULL;
-    for (db_tags = notmuch_message_get_tags (message);
-	 notmuch_tags_valid (db_tags);
-	 notmuch_tags_move_to_next (db_tags)) {
-	tag = notmuch_tags_get (db_tags);
-
-	if (db_tags_str)
-	    db_tags_str = talloc_asprintf_append (db_tags_str, " %s", tag);
-	else
-	    db_tags_str = talloc_strdup (message, tag);
-    }
+    if ((flags & TAG_FLAG_REMOVE_ALL) || tag_op_list_size (tag_ops))
+	tag_op_list_apply (message, tag_ops, flags);
 
-    if (((file_tags == NULL || *file_tags == '\0') &&
-	 (db_tags_str == NULL || *db_tags_str == '\0')) ||
-	(file_tags && db_tags_str && strcmp (file_tags, db_tags_str) == 0))
-	goto DONE;
+    notmuch_message_destroy (message);
 
-    notmuch_message_freeze (message);
+    return ret;
+}
 
-    if (remove_all)
-	notmuch_message_remove_all_tags (message);
+static int
+parse_sup_line (void *ctx, char *line,
+		char **query_str, tag_op_list_t *tag_ops)
+{
 
-    next = file_tags;
-    while (next) {
-	tag = strsep (&next, " ");
-	if (*tag == '\0')
-	    continue;
-	status = notmuch_message_add_tag (message, tag);
-	if (status) {
-	    fprintf (stderr, "Error applying tag %s to message %s:\n",
-		     tag, message_id);
-	    fprintf (stderr, "%s\n", notmuch_status_to_string (status));
-	    ret = 1;
-	}
+    regmatch_t match[3];
+    char *file_tags;
+    int rerr;
+
+    tag_op_list_reset (tag_ops);
+
+    chomp_newline (line);
+
+    /* Silently ignore blank lines */
+    if (line[0] == '\0') {
+	return 1;
+    }
+
+    rerr = xregexec (&regex, line, 3, match, 0);
+    if (rerr == REG_NOMATCH) {
+	fprintf (stderr, "Warning: Ignoring invalid sup format line: %s\n",
+		 line);
+	return 1;
     }
 
-    notmuch_message_thaw (message);
+    *query_str = talloc_strndup (ctx, line + match[1].rm_so,
+				 match[1].rm_eo - match[1].rm_so);
+    file_tags = talloc_strndup (ctx, line + match[2].rm_so,
+				match[2].rm_eo - match[2].rm_so);
 
-    if (synchronize_flags)
-	notmuch_message_tags_to_maildir_flags (message);
+    char *tok = file_tags;
+    size_t tok_len = 0;
 
-  DONE:
-    if (message)
-	notmuch_message_destroy (message);
+    tag_op_list_reset (tag_ops);
+
+    while ((tok = strtok_len (tok + tok_len, " ", &tok_len)) != NULL) {
+
+	if (*(tok + tok_len) != '\0') {
+	    *(tok + tok_len) = '\0';
+	    tok_len++;
+	}
+
+	if (tag_op_list_append (ctx, tag_ops, tok, FALSE))
+	    return -1;
+    }
+
+    return 0;
 
-    return ret;
 }
 
 int
@@ -100,16 +111,19 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
 {
     notmuch_config_t *config;
     notmuch_database_t *notmuch;
-    notmuch_bool_t synchronize_flags;
     notmuch_bool_t accumulate = FALSE;
+    tag_op_flag_t flags = 0;
+    tag_op_list_t *tag_ops;
+
     char *input_file_name = NULL;
     FILE *input = stdin;
     char *line = NULL;
     size_t line_size;
     ssize_t line_len;
-    regex_t regex;
-    int rerr;
+
+    int ret = 0;
     int opt_index;
+    int input_format = DUMP_FORMAT_AUTO;
 
     config = notmuch_config_open (ctx, NULL, NULL);
     if (config == NULL)
@@ -119,9 +133,15 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
 			       NOTMUCH_DATABASE_MODE_READ_WRITE, &notmuch))
 	return 1;
 
-    synchronize_flags = notmuch_config_get_maildir_synchronize_flags (config);
+    if (notmuch_config_get_maildir_synchronize_flags (config))
+	flags |= TAG_FLAG_MAILDIR_SYNC;
 
     notmuch_opt_desc_t options[] = {
+	{ NOTMUCH_OPT_KEYWORD, &input_format, "format", 'f',
+	  (notmuch_keyword_t []){ { "auto", DUMP_FORMAT_AUTO },
+				  { "batch-tag", DUMP_FORMAT_BATCH_TAG },
+				  { "sup", DUMP_FORMAT_SUP },
+				  { 0, 0 } } },
 	{ NOTMUCH_OPT_STRING, &input_file_name, "input", 'i', 0 },
 	{ NOTMUCH_OPT_BOOLEAN,  &accumulate, "accumulate", 'a', 0 },
 	{ 0, 0, 0, 0, 0 }
@@ -134,6 +154,9 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
 	return 1;
     }
 
+    if (! accumulate)
+	flags |= TAG_FLAG_REMOVE_ALL;
+
     if (input_file_name) {
 	input = fopen (input_file_name, "r");
 	if (input == NULL) {
@@ -154,44 +177,77 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
      * non-space characters for the message-id, then one or more
      * spaces, then a list of space-separated tags as a sequence of
      * characters within literal '(' and ')'. */
-    if ( xregcomp (&regex,
-		   "^([^ ]+) \\(([^)]*)\\)$",
-		   REG_EXTENDED) )
-	INTERNAL_ERROR ("compile time constant regex failed.");
+    char *p;
 
-    while ((line_len = getline (&line, &line_size, input)) != -1) {
-	regmatch_t match[3];
-	char *message_id, *file_tags;
+    line_len = getline (&line, &line_size, input);
+    if (line_len == 0)
+	return 0;
 
-	chomp_newline (line);
+    tag_ops = tag_op_list_create (ctx);
+    if (tag_ops == NULL) {
+	fprintf (stderr, "Out of memory.\n");
+	return 1;
+    }
 
-	rerr = xregexec (&regex, line, 3, match, 0);
-	if (rerr == REG_NOMATCH) {
-	    fprintf (stderr, "Warning: Ignoring invalid input line: %s\n",
-		     line);
-	    continue;
+    for (p = line; (input_format == DUMP_FORMAT_AUTO) && *p; p++) {
+	if (*p == '(')
+	    input_format = DUMP_FORMAT_SUP;
+    }
+
+    if (input_format == DUMP_FORMAT_AUTO)
+	input_format = DUMP_FORMAT_BATCH_TAG;
+
+    if (input_format == DUMP_FORMAT_SUP)
+	if ( xregcomp (&regex,
+		       "^([^ ]+) \\(([^)]*)\\)$",
+		       REG_EXTENDED) )
+	    INTERNAL_ERROR ("compile time constant regex failed.");
+
+    do {
+	char *query_string;
+
+	if (input_format == DUMP_FORMAT_SUP) {
+	    ret = parse_sup_line (ctx, line, &query_string, tag_ops);
+	} else {
+	    ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS,
+				  &query_string, tag_ops);
+
+	    if (ret == 0) {
+		if (strncmp ("id:", query_string, 3) != 0) {
+		    fprintf (stderr, "Unsupported query: %s\n", query_string);
+		    continue;
+		}
+		/* delete id: from front of string; tag_message
+		 * expects a raw message-id.
+		 *
+		 * XXX: Note that query string id:foo and bar will be
+		 * interpreted as a message id "foo and bar". This
+		 * should eventually be fixed to give a better error
+		 * message.
+		 */
+		query_string = query_string + 3;
+	    }
 	}
 
-	message_id = xstrndup (line + match[1].rm_so,
-			       match[1].rm_eo - match[1].rm_so);
-	file_tags = xstrndup (line + match[2].rm_so,
-			      match[2].rm_eo - match[2].rm_so);
+	if (ret > 0)
+	    continue;
 
-	tag_message (notmuch, message_id, file_tags, ! accumulate,
-		     synchronize_flags);
+	if (ret < 0 || tag_message (ctx, notmuch, query_string,
+				    tag_ops, flags))
+	    break;
 
-	free (message_id);
-	free (file_tags);
-    }
+    }  while ((line_len = getline (&line, &line_size, input)) != -1);
 
-    regfree (&regex);
+    if (input_format == DUMP_FORMAT_SUP)
+	regfree (&regex);
 
     if (line)
 	free (line);
 
     notmuch_database_destroy (notmuch);
+
     if (input != stdin)
 	fclose (input);
 
-    return 0;
+    return ret;
 }
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Patch v3b 6/9] test: update dump-restore roundtripping test for batch-tag format
  2012-12-07  1:26 V3b of batch tagging/dump/restore patches david
                   ` (4 preceding siblings ...)
  2012-12-07  1:26 ` [Patch v3b 5/9] notmuch-restore: add support for input format 'batch-tag' david
@ 2012-12-07  1:26 ` david
  2012-12-07 22:56   ` Jani Nikula
  2012-12-07  1:26 ` [Patch v3b 7/9] test: second set of dump/restore --format=batch-tag tests david
                   ` (2 subsequent siblings)
  8 siblings, 1 reply; 21+ messages in thread
From: david @ 2012-12-07  1:26 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

Now we can actually round trip these crazy tags and and message ids.
hex-xcode is no longer needed as it's built in.
---
 test/dump-restore |   17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/test/dump-restore b/test/dump-restore
index b4c807f..ce81e6f 100755
--- a/test/dump-restore
+++ b/test/dump-restore
@@ -99,23 +99,22 @@ notmuch dump --format=batch-tag from:cworth | sed 's/^.*-- id://' | \
 test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
 
 test_begin_subtest 'roundtripping random message-ids and tags'
-    test_subtest_known_broken
-    ${TEST_DIRECTORY}/random-corpus --config-path=${NOTMUCH_CONFIG} \
-			--num-messages=10
 
-     notmuch dump| \
-	 ${TEST_DIRECTORY}/hex-xcode --direction=encode| \
+    ${TEST_DIRECTORY}/random-corpus --config-path=${NOTMUCH_CONFIG}
+			--num-messages=100
+
+     notmuch dump --format=batch-tag| \
 	 sort > EXPECTED.$test_count
 
      notmuch tag +this_tag_is_very_unlikely_to_be_random '*'
 
-     ${TEST_DIRECTORY}/hex-xcode --direction=decode < EXPECTED.$test_count | \
-	 notmuch restore 2>/dev/null
+     notmuch restore --format=batch-tag < EXPECTED.$test_count
 
-     notmuch dump| \
-	 ${TEST_DIRECTORY}/hex-xcode --direction=encode| \
+     notmuch dump --format=batch-tag| \
 	 sort > OUTPUT.$test_count
 
 test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
 
 test_done
+
+# Note the database is "poisoned" for sup format at this point.
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Patch v3b 7/9] test: second set of dump/restore --format=batch-tag tests
  2012-12-07  1:26 V3b of batch tagging/dump/restore patches david
                   ` (5 preceding siblings ...)
  2012-12-07  1:26 ` [Patch v3b 6/9] test: update dump-restore roundtripping test for batch-tag format david
@ 2012-12-07  1:26 ` david
  2012-12-07  1:26 ` [Patch v3b 8/9] notmuch-{dump, restore}.1: document new format options david
  2012-12-07  1:26 ` [Patch v3b 9/9] tag-util: optimization of tag application david
  8 siblings, 0 replies; 21+ messages in thread
From: david @ 2012-12-07  1:26 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

These one need the completed functionality in notmuch-restore. Fairly
exotic tags are tested, but no weird message id's.

We test each possible input to autodetection, both explicit (with
--format=auto) and implicit (without --format).
---
 test/dump-restore |   83 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)

diff --git a/test/dump-restore b/test/dump-restore
index ce81e6f..ff02b29 100755
--- a/test/dump-restore
+++ b/test/dump-restore
@@ -98,6 +98,89 @@ notmuch dump --format=batch-tag from:cworth | sed 's/^.*-- id://' | \
     sort > OUTPUT.$test_count
 test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
 
+test_begin_subtest "format=batch-tag, # round-trip"
+notmuch dump --format=sup | sort > EXPECTED.$test_count
+notmuch dump --format=batch-tag | notmuch restore --format=batch-tag
+notmuch dump --format=sup | sort > OUTPUT.$test_count
+test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
+
+test_begin_subtest "format=batch-tag, # blank lines and comments"
+notmuch dump --format=batch-tag| sort > EXPECTED.$test_count
+notmuch restore <<EOF
+# this line is a comment; the next has only white space
+ 	 
+
+# the previous line is empty
+EOF
+notmuch dump --format=batch-tag | sort > OUTPUT.$test_count
+test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
+
+test_begin_subtest "format=batch-tag, # reverse-round-trip empty tag"
+cat <<EOF >EXPECTED.$test_count
++ -- id:20091117232137.GA7669@griffis1.net
+EOF
+notmuch restore --format=batch-tag < EXPECTED.$test_count
+notmuch dump --format=batch-tag id:20091117232137.GA7669@griffis1.net > OUTPUT.$test_count
+test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
+
+tag1='comic_swear=$&^%$^%\\//-+$^%$'
+enc1=$($TEST_DIRECTORY/hex-xcode --direction=encode "$tag1")
+
+tag2=$(printf 'this\n tag\t has\n spaces')
+enc2=$($TEST_DIRECTORY/hex-xcode --direction=encode "$tag2")
+
+enc3='%c3%91%c3%a5%c3%b0%c3%a3%c3%a5%c3%a9-%c3%8f%c3%8a'
+tag3=$($TEST_DIRECTORY/hex-xcode --direction=decode $enc3)
+
+notmuch dump --format=batch-tag > BACKUP
+
+notmuch tag +"$tag1" +"$tag2" +"$tag3" -inbox -unread "*"
+
+test_begin_subtest 'format=batch-tag, round trip with strange tags'
+notmuch dump --format=batch-tag > EXPECTED.$test_count
+notmuch dump --format=batch-tag | notmuch restore --format=batch-tag
+notmuch dump --format=batch-tag > OUTPUT.$test_count
+test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
+
+test_begin_subtest 'format=batch-tag, checking encoded output'
+notmuch dump --format=batch-tag -- from:cworth |\
+	 awk "{ print \"+$enc1 +$enc2 +$enc3 -- \" \$5 }" > EXPECTED.$test_count
+notmuch dump --format=batch-tag -- from:cworth  > OUTPUT.$test_count
+test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
+
+test_begin_subtest 'restoring sane tags'
+notmuch restore --format=batch-tag < BACKUP
+notmuch dump --format=batch-tag > OUTPUT.$test_count
+test_expect_equal_file BACKUP OUTPUT.$test_count
+
+test_begin_subtest 'format=batch-tag, restore=auto'
+notmuch dump --format=batch-tag > EXPECTED.$test_count
+notmuch tag -inbox -unread "*"
+notmuch restore --format=auto < EXPECTED.$test_count
+notmuch dump --format=batch-tag > OUTPUT.$test_count
+test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
+
+test_begin_subtest 'format=sup, restore=auto'
+notmuch dump --format=sup > EXPECTED.$test_count
+notmuch tag -inbox -unread "*"
+notmuch restore --format=auto < EXPECTED.$test_count
+notmuch dump --format=sup > OUTPUT.$test_count
+test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
+
+test_begin_subtest 'format=batch-tag, restore=default'
+notmuch dump --format=batch-tag > EXPECTED.$test_count
+notmuch tag -inbox -unread "*"
+notmuch restore < EXPECTED.$test_count
+notmuch dump --format=batch-tag > OUTPUT.$test_count
+test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
+
+test_begin_subtest 'format=sup, restore=default'
+notmuch dump --format=sup > EXPECTED.$test_count
+notmuch tag -inbox -unread "*"
+notmuch restore < EXPECTED.$test_count
+notmuch dump --format=sup > OUTPUT.$test_count
+test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
+
 test_begin_subtest 'roundtripping random message-ids and tags'
 
     ${TEST_DIRECTORY}/random-corpus --config-path=${NOTMUCH_CONFIG}
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Patch v3b 8/9] notmuch-{dump, restore}.1: document new format options
  2012-12-07  1:26 V3b of batch tagging/dump/restore patches david
                   ` (6 preceding siblings ...)
  2012-12-07  1:26 ` [Patch v3b 7/9] test: second set of dump/restore --format=batch-tag tests david
@ 2012-12-07  1:26 ` david
  2012-12-07  1:26 ` [Patch v3b 9/9] tag-util: optimization of tag application david
  8 siblings, 0 replies; 21+ messages in thread
From: david @ 2012-12-07  1:26 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

More or less arbitrarily, notmuch-dump.1 gets the more detailed
description of the format.
---
 man/man1/notmuch-dump.1    |   59 ++++++++++++++++++++++++++++++++++++++++++++
 man/man1/notmuch-restore.1 |   59 +++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 112 insertions(+), 6 deletions(-)

diff --git a/man/man1/notmuch-dump.1 b/man/man1/notmuch-dump.1
index 230deec..770b00f 100644
--- a/man/man1/notmuch-dump.1
+++ b/man/man1/notmuch-dump.1
@@ -5,6 +5,7 @@ notmuch-dump \- creates a plain-text dump of the tags of each message
 .SH SYNOPSIS
 
 .B "notmuch dump"
+.RB  [ "\-\-format=(sup|batch-tag)"  "] [--]"
 .RI "[ --output=<" filename "> ] [--]"
 .RI "[ <" search-term ">...]"
 
@@ -19,6 +20,64 @@ recreated from the messages themselves.  The output of notmuch dump is
 therefore the only critical thing to backup (and much more friendly to
 incremental backup than the native database files.)
 
+.TP 4
+.B \-\-format=(sup|batch-tag)
+
+Notmuch restore supports two plain text dump formats, both with one message-id
+per line, followed by a list of tags.
+
+.RS 4
+.TP 4
+.B sup
+
+The
+.B sup
+dump file format is specifically chosen to be
+compatible with the format of files produced by sup-dump.
+So if you've previously been using sup for mail, then the
+.B "notmuch restore"
+command provides you a way to import all of your tags (or labels as
+sup calls them).
+Each line has the following form
+
+.RS 4
+.RI < message-id >
+.B (
+.RI < tag "> ..."
+.B )
+
+with zero or more tags are separated by spaces. Note that (malformed)
+message-ids may contain arbitrary non-null characters. Note also
+that tags with spaces will not be correctly restored with this format.
+
+.RE
+
+.RE
+.RS 4
+.TP 4
+.B batch-tag
+
+The
+.B batch-tag
+dump format is intended to more robust against malformed message-ids
+and tags containing whitespace or non-\fBascii\fR(7) characters.
+Each line has the form
+
+.RS 4
+.RI "+<" "encoded-tag" "> " "" "+<" "encoded-tag" "> ... -- " "" " id:<" encoded-message-id >
+
+where encoded means that every byte not matching the regex
+.B [A-Za-z0-9@=.,_+-]
+is replace by
+.B %nn
+where nn is the two digit hex encoding.
+The astute reader will notice this is a special case of the batch input
+format for \fBnotmuch-tag\fR(1); note that the single message-id query is
+mandatory for \fBnotmuch-restore\fR(1).
+
+.RE
+
+
 With no search terms, a dump of all messages in the database will be
 generated.  A "--" argument instructs notmuch that the
 remaining arguments are search terms.
diff --git a/man/man1/notmuch-restore.1 b/man/man1/notmuch-restore.1
index 2fa8733..6bba628 100644
--- a/man/man1/notmuch-restore.1
+++ b/man/man1/notmuch-restore.1
@@ -6,6 +6,7 @@ notmuch-restore \- restores the tags from the given file (see notmuch dump)
 
 .B "notmuch restore"
 .RB [ "--accumulate" ]
+.RB [ "--format=(auto|batch-tag|sup)" ]
 .RI "[ --input=<" filename "> ]"
 
 .SH DESCRIPTION
@@ -15,19 +16,51 @@ Restores the tags from the given file (see
 
 The input is read from the given filename, if any, or from stdin.
 
-Note: The dump file format is specifically chosen to be
+
+Supported options for
+.B restore
+include
+.RS 4
+.TP 4
+.B \-\-accumulate
+
+The union of the existing and new tags is applied, instead of
+replacing each message's tags as they are read in from the dump file.
+
+.RE
+.RS 4
+.TP 4
+.B \-\-format=(sup|batch-tag|auto)
+
+Notmuch restore supports two plain text dump formats, with each line
+specifying a message-id and a set of tags.
+For details of the actual formats, see \fBnotmuch-dump\fR(1).
+
+.RS 4
+.TP 4
+.B sup
+
+The
+.B sup
+dump file format is specifically chosen to be
 compatible with the format of files produced by sup-dump.
 So if you've previously been using sup for mail, then the
 .B "notmuch restore"
 command provides you a way to import all of your tags (or labels as
 sup calls them).
 
-The --accumulate switch causes the union of the existing and new tags to be
-applied, instead of replacing each message's tags as they are read in from the
-dump file.
+.RE
+.RS 4
+.TP 4
+.B batch-tag
 
-See \fBnotmuch-search-terms\fR(7)
-for details of the supported syntax for <search-terms>.
+The
+.B batch-tag
+dump format is intended to more robust against malformed message-ids
+and tags containing whitespace or non-\fBascii\fR(7) characters.  This
+format hex-escapes all characters those outside of a small character
+set, intended to be suitable for e.g. pathnames in most UNIX-like
+systems.
 
 .B "notmuch restore"
 updates the maildir flags according to tag changes if the
@@ -36,6 +69,20 @@ configuration option is enabled. See \fBnotmuch-config\fR(1) for
 details.
 
 .RE
+
+.RS 4
+.TP 4
+.B auto
+
+This option (the default) tries to guess the format from the
+input. For correctly formed input in either supported format, this
+heuristic, based the fact that batch-tag format contains no parentheses,
+should be accurate.
+
+.RE
+
+.RE
+
 .SH SEE ALSO
 
 \fBnotmuch\fR(1), \fBnotmuch-config\fR(1), \fBnotmuch-count\fR(1),
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Patch v3b 9/9] tag-util: optimization of tag application
  2012-12-07  1:26 V3b of batch tagging/dump/restore patches david
                   ` (7 preceding siblings ...)
  2012-12-07  1:26 ` [Patch v3b 8/9] notmuch-{dump, restore}.1: document new format options david
@ 2012-12-07  1:26 ` david
  2012-12-07 23:08   ` Jani Nikula
  2012-12-08 11:22   ` Mark Walters
  8 siblings, 2 replies; 21+ messages in thread
From: david @ 2012-12-07  1:26 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

The idea is not to bother with restore operations if they don't change
the set of tags. This is actually a relatively common case.

In order to avoid fancy datastructures, this method is quadratic in
the number of tags; at least on my mail database this doesn't seem to
be a big problem.
---
 tag-util.c |   66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/tag-util.c b/tag-util.c
index 932ee7f..3d54e9e 100644
--- a/tag-util.c
+++ b/tag-util.c
@@ -124,6 +124,69 @@ message_error (notmuch_message_t *message,
     fprintf (stderr, "Status: %s\n", notmuch_status_to_string (status));
 }
 
+static int
+makes_changes (notmuch_message_t *message,
+	       tag_op_list_t *list,
+	       tag_op_flag_t flags)
+{
+
+    size_t i;
+
+    notmuch_tags_t *tags;
+    notmuch_bool_t changes = FALSE;
+
+    /* First, do we delete an existing tag? */
+    changes = FALSE;
+    for (tags = notmuch_message_get_tags (message);
+	 ! changes && notmuch_tags_valid (tags);
+	 notmuch_tags_move_to_next (tags)) {
+	const char *cur_tag = notmuch_tags_get (tags);
+	int last_op =  (flags & TAG_FLAG_REMOVE_ALL) ? -1 : 0;
+
+	/* slight contortions to count down with an unsigned index */
+	for (i = list->count; i-- > 0; /*nothing*/) {
+	    if (strcmp (cur_tag, list->ops[i].tag) == 0) {
+		last_op = list->ops[i].remove ? -1 : 1;
+		break;
+	    }
+	}
+
+	changes = (last_op == -1);
+    }
+    notmuch_tags_destroy (tags);
+
+    if (changes)
+	return TRUE;
+
+    /* Now check for adding new tags */
+    for (i = 0; i < list->count; i++) {
+	notmuch_bool_t exists = FALSE;
+
+	if (list->ops[i].remove)
+	    continue;
+
+	for (tags = notmuch_message_get_tags (message);
+	     notmuch_tags_valid (tags);
+	     notmuch_tags_move_to_next (tags)) {
+	    const char *cur_tag = notmuch_tags_get (tags);
+	    if (strcmp (cur_tag, list->ops[i].tag) == 0) {
+		exists = TRUE;
+		break;
+	    }
+	}
+	notmuch_tags_destroy (tags);
+
+	/* the following test is conservative,
+	 * in the sense it ignores cases like +foo ... -foo
+	 * but this is OK from a correctness point of view
+	 */
+	if (! exists)
+	    return TRUE;
+    }
+    return FALSE;
+
+}
+
 notmuch_status_t
 tag_op_list_apply (notmuch_message_t *message,
 		   tag_op_list_t *list,
@@ -133,6 +196,9 @@ tag_op_list_apply (notmuch_message_t *message,
     notmuch_status_t status = 0;
     tag_operation_t *tag_ops = list->ops;
 
+    if (! (flags & TAG_FLAG_PRE_OPTIMIZED) && ! makes_changes (message, list, flags))
+	return NOTMUCH_STATUS_SUCCESS;
+
     status = notmuch_message_freeze (message);
     if (status) {
 	message_error (message, status, "freezing message");
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [Patch v3b 1/9] notmuch-dump: add --format=(batch-tag|sup)
  2012-12-07  1:26 ` [Patch v3b 1/9] notmuch-dump: add --format=(batch-tag|sup) david
@ 2012-12-07 22:21   ` Jani Nikula
  0 siblings, 0 replies; 21+ messages in thread
From: Jani Nikula @ 2012-12-07 22:21 UTC (permalink / raw)
  To: david, notmuch; +Cc: David Bremner


LGTM.

On Fri, 07 Dec 2012, david@tethera.net wrote:
> From: David Bremner <bremner@debian.org>
>
> sup is the old format, and remains the default, at least until
> restore is converted to parse this format.
>
> Each line of the batch-tag format is modelled on the syntax of notmuch tag:
> - "notmuch tag" is omitted from the front of the line
> - The dump format only uses query strings of a single message-id.
> - Each space seperated tag/message-id is 'hex-encoded' to remove
>   trouble-making characters.
> - It is permitted (and will be useful) for there to be no tags before
>   the query.
>
> In particular this format won't have the same problem with e.g. spaces
> in message-ids or tags; they will be round-trip-able.
> ---
>  dump-restore-private.h |   13 +++++++++++++
>  notmuch-dump.c         |   48 ++++++++++++++++++++++++++++++++++++++++++------
>  2 files changed, 55 insertions(+), 6 deletions(-)
>  create mode 100644 dump-restore-private.h
>
> diff --git a/dump-restore-private.h b/dump-restore-private.h
> new file mode 100644
> index 0000000..896a004
> --- /dev/null
> +++ b/dump-restore-private.h
> @@ -0,0 +1,13 @@
> +#ifndef DUMP_RESTORE_PRIVATE_H
> +#define DUMP_RESTORE_PRIVATE_H
> +
> +#include "hex-escape.h"
> +#include "command-line-arguments.h"
> +
> +typedef enum dump_formats {
> +    DUMP_FORMAT_AUTO,
> +    DUMP_FORMAT_BATCH_TAG,
> +    DUMP_FORMAT_SUP
> +} dump_format_t;
> +
> +#endif
> diff --git a/notmuch-dump.c b/notmuch-dump.c
> index 88f598a..d2dad40 100644
> --- a/notmuch-dump.c
> +++ b/notmuch-dump.c
> @@ -19,6 +19,7 @@
>   */
>  
>  #include "notmuch-client.h"
> +#include "dump-restore-private.h"
>  
>  int
>  notmuch_dump_command (unused (void *ctx), int argc, char *argv[])
> @@ -43,7 +44,13 @@ notmuch_dump_command (unused (void *ctx), int argc, char *argv[])
>      char *output_file_name = NULL;
>      int opt_index;
>  
> +    int output_format = DUMP_FORMAT_SUP;
> +
>      notmuch_opt_desc_t options[] = {
> +	{ NOTMUCH_OPT_KEYWORD, &output_format, "format", 'f',
> +	  (notmuch_keyword_t []){ { "sup", DUMP_FORMAT_SUP },
> +				  { "batch-tag", DUMP_FORMAT_BATCH_TAG },
> +				  { 0, 0 } } },
>  	{ NOTMUCH_OPT_STRING, &output_file_name, "output", 'o', 0  },
>  	{ 0, 0, 0, 0, 0 }
>      };
> @@ -83,27 +90,56 @@ notmuch_dump_command (unused (void *ctx), int argc, char *argv[])
>       */
>      notmuch_query_set_sort (query, NOTMUCH_SORT_UNSORTED);
>  
> +    char *buffer = NULL;
> +    size_t buffer_size = 0;
> +
>      for (messages = notmuch_query_search_messages (query);
>  	 notmuch_messages_valid (messages);
>  	 notmuch_messages_move_to_next (messages)) {
>  	int first = 1;
> +	const char *message_id;
> +
>  	message = notmuch_messages_get (messages);
> +	message_id = notmuch_message_get_message_id (message);
>  
> -	fprintf (output,
> -		 "%s (", notmuch_message_get_message_id (message));
> +	if (output_format == DUMP_FORMAT_SUP) {
> +	    fprintf (output, "%s (", message_id);
> +	}
>  
>  	for (tags = notmuch_message_get_tags (message);
>  	     notmuch_tags_valid (tags);
>  	     notmuch_tags_move_to_next (tags)) {
> -	    if (! first)
> -		fprintf (output, " ");
> +	    const char *tag_str = notmuch_tags_get (tags);
>  
> -	    fprintf (output, "%s", notmuch_tags_get (tags));
> +	    if (! first)
> +		fputs (" ", output);
>  
>  	    first = 0;
> +
> +	    if (output_format == DUMP_FORMAT_SUP) {
> +		fputs (tag_str, output);
> +	    } else {
> +		if (hex_encode (notmuch, tag_str,
> +				&buffer, &buffer_size) != HEX_SUCCESS) {
> +		    fprintf (stderr, "Error: failed to hex-encode tag %s\n",
> +			     tag_str);
> +		    return 1;
> +		}
> +		fprintf (output, "+%s", buffer);
> +	    }
>  	}
>  
> -	fprintf (output, ")\n");
> +	if (output_format == DUMP_FORMAT_SUP) {
> +	    fputs (")\n", output);
> +	} else {
> +	    if (hex_encode (notmuch, message_id,
> +			    &buffer, &buffer_size) != HEX_SUCCESS) {
> +		    fprintf (stderr, "Error: failed to hex-encode msg-id %s\n",
> +			     message_id);
> +		    return 1;
> +	    }
> +	    fprintf (output, " -- id:%s\n", buffer);
> +	}
>  
>  	notmuch_message_destroy (message);
>      }
> -- 
> 1.7.10.4
>
> _______________________________________________
> notmuch mailing list
> notmuch@notmuchmail.org
> http://notmuchmail.org/mailman/listinfo/notmuch

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Patch v3b 2/9] test: add sanity check for dump --format=batch-tag.
  2012-12-07  1:26 ` [Patch v3b 2/9] test: add sanity check for dump --format=batch-tag david
@ 2012-12-07 22:26   ` Jani Nikula
  0 siblings, 0 replies; 21+ messages in thread
From: Jani Nikula @ 2012-12-07 22:26 UTC (permalink / raw)
  To: david, notmuch; +Cc: David Bremner


LGTM.

On Fri, 07 Dec 2012, david@tethera.net wrote:
> From: David Bremner <bremner@debian.org>
>
> It's important this does not rely on restore, since it hasn't been
> written yet.
> ---
>  test/dump-restore |   13 +++++++++++++
>  1 file changed, 13 insertions(+)
>
> diff --git a/test/dump-restore b/test/dump-restore
> index bf31266..b4c807f 100755
> --- a/test/dump-restore
> +++ b/test/dump-restore
> @@ -85,6 +85,19 @@ test_begin_subtest "dump --output=outfile -- from:cworth"
>  notmuch dump --output=dump-outfile-dash-inbox.actual -- from:cworth
>  test_expect_equal_file dump-cworth.expected dump-outfile-dash-inbox.actual
>  
> +test_begin_subtest "Check for a safe set of message-ids"
> +notmuch search --output=messages from:cworth | sed s/^id:// > EXPECTED
> +notmuch search --output=messages from:cworth | sed s/^id:// |\
> +	$TEST_DIRECTORY/hex-xcode --direction=encode > OUTPUT
> +test_expect_equal_file OUTPUT EXPECTED
> +
> +test_begin_subtest "format=batch-tag, dump sanity check."
> +notmuch dump --format=sup from:cworth | cut -f1 -d' ' | \
> +    sort > EXPECTED.$test_count
> +notmuch dump --format=batch-tag from:cworth | sed 's/^.*-- id://' | \
> +    sort > OUTPUT.$test_count
> +test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
> +
>  test_begin_subtest 'roundtripping random message-ids and tags'
>      test_subtest_known_broken
>      ${TEST_DIRECTORY}/random-corpus --config-path=${NOTMUCH_CONFIG} \
> -- 
> 1.7.10.4
>
> _______________________________________________
> notmuch mailing list
> notmuch@notmuchmail.org
> http://notmuchmail.org/mailman/listinfo/notmuch

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Patch v3b 3/9] util: add string-util.[ch]
  2012-12-07  1:26 ` [Patch v3b 3/9] util: add string-util.[ch] david
@ 2012-12-07 22:30   ` Jani Nikula
  2012-12-08 10:23   ` Mark Walters
  1 sibling, 0 replies; 21+ messages in thread
From: Jani Nikula @ 2012-12-07 22:30 UTC (permalink / raw)
  To: david, notmuch; +Cc: David Bremner


LGTM.

On Fri, 07 Dec 2012, david@tethera.net wrote:
> From: David Bremner <bremner@debian.org>
>
> This is to give a home to strtok_len. It's a bit silly to add a header
> for one routine, but it needs to be shared between several compilation
> units (or at least that's the most natural design).
> ---
>  util/Makefile.local |    3 ++-
>  util/string-util.c  |   34 ++++++++++++++++++++++++++++++++++
>  util/string-util.h  |   19 +++++++++++++++++++
>  3 files changed, 55 insertions(+), 1 deletion(-)
>  create mode 100644 util/string-util.c
>  create mode 100644 util/string-util.h
>
> diff --git a/util/Makefile.local b/util/Makefile.local
> index 3ca623e..a11e35b 100644
> --- a/util/Makefile.local
> +++ b/util/Makefile.local
> @@ -3,7 +3,8 @@
>  dir := util
>  extra_cflags += -I$(srcdir)/$(dir)
>  
> -libutil_c_srcs := $(dir)/xutil.c $(dir)/error_util.c $(dir)/hex-escape.c
> +libutil_c_srcs := $(dir)/xutil.c $(dir)/error_util.c $(dir)/hex-escape.c \
> +		  $(dir)/string-util.c
>  
>  libutil_modules := $(libutil_c_srcs:.c=.o)
>  
> diff --git a/util/string-util.c b/util/string-util.c
> new file mode 100644
> index 0000000..44f8cd3
> --- /dev/null
> +++ b/util/string-util.c
> @@ -0,0 +1,34 @@
> +/* string-util.c -  Extra or enhanced routines for null terminated strings.
> + *
> + * Copyright (c) 2012 Jani Nikula
> + *
> + * This program is free software: you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation, either version 3 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see http://www.gnu.org/licenses/ .
> + *
> + * Author: Jani Nikula <jani@nikula.org>
> + */
> +
> +
> +#include "string-util.h"
> +
> +char *
> +strtok_len (char *s, const char *delim, size_t *len)
> +{
> +    /* skip initial delims */
> +    s += strspn (s, delim);
> +
> +    /* length of token */
> +    *len = strcspn (s, delim);
> +
> +    return *len ? s : NULL;
> +}
> diff --git a/util/string-util.h b/util/string-util.h
> new file mode 100644
> index 0000000..696da40
> --- /dev/null
> +++ b/util/string-util.h
> @@ -0,0 +1,19 @@
> +#ifndef _STRING_UTIL_H
> +#define _STRING_UTIL_H
> +
> +#include <string.h>
> +
> +/* like strtok(3), but without state, and doesn't modify s. usage pattern:
> + *
> + * const char *tok = input;
> + * const char *delim = " \t";
> + * size_t tok_len = 0;
> + *
> + * while ((tok = strtok_len (tok + tok_len, delim, &tok_len)) != NULL) {
> + *     // do stuff with string tok of length tok_len
> + * }
> + */
> +
> +char *strtok_len (char *s, const char *delim, size_t *len);
> +
> +#endif
> -- 
> 1.7.10.4
>
> _______________________________________________
> notmuch mailing list
> notmuch@notmuchmail.org
> http://notmuchmail.org/mailman/listinfo/notmuch

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Patch v3b 4/9] tag-util.[ch]: New files for common tagging routines
  2012-12-07  1:26 ` [Patch v3b 4/9] tag-util.[ch]: New files for common tagging routines david
@ 2012-12-07 22:45   ` Jani Nikula
  2012-12-08 10:50   ` Mark Walters
  1 sibling, 0 replies; 21+ messages in thread
From: Jani Nikula @ 2012-12-07 22:45 UTC (permalink / raw)
  To: david, notmuch; +Cc: David Bremner


A few small comments, otherwise LGTM.

On Fri, 07 Dec 2012, david@tethera.net wrote:
> From: David Bremner <bremner@debian.org>
>
> These are meant to be shared between notmuch-tag and notmuch-restore.
>
> The bulk of the routines implement a "tag operation list" abstract
> data type act as a structured representation of a set of tag
> operations (typically coming from a single tag command or line of
> input).
> ---
>  Makefile.local |    1 +
>  tag-util.c     |  278 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  tag-util.h     |  119 ++++++++++++++++++++++++
>  3 files changed, 398 insertions(+)
>  create mode 100644 tag-util.c
>  create mode 100644 tag-util.h
>
> diff --git a/Makefile.local b/Makefile.local
> index 2b91946..854867d 100644
> --- a/Makefile.local
> +++ b/Makefile.local
> @@ -274,6 +274,7 @@ notmuch_client_srcs =		\
>  	query-string.c		\
>  	mime-node.c		\
>  	crypto.c		\
> +	tag-util.c
>  
>  notmuch_client_modules = $(notmuch_client_srcs:.c=.o)
>  
> diff --git a/tag-util.c b/tag-util.c
> new file mode 100644
> index 0000000..932ee7f
> --- /dev/null
> +++ b/tag-util.c
> @@ -0,0 +1,278 @@
> +#include <assert.h>
> +#include "string-util.h"
> +#include "tag-util.h"
> +#include "hex-escape.h"
> +
> +#define TAG_OP_LIST_INITIAL_SIZE 10
> +
> +struct _tag_operation_t {
> +    const char *tag;
> +    notmuch_bool_t remove;
> +};
> +
> +struct _tag_op_list_t {
> +    tag_operation_t *ops;
> +    size_t count;
> +    size_t size;
> +};
> +
> +int
> +parse_tag_line (void *ctx, char *line,
> +		tag_op_flag_t flags,
> +		char **query_string,
> +		tag_op_list_t *tag_ops)
> +{
> +    char *tok = line;
> +    size_t tok_len = 0;
> +    char *line_for_error = talloc_strdup (ctx, line);

Check the return value?

> +    int ret = 0;
> +
> +    chomp_newline (line);
> +
> +    /* remove leading space */
> +    while (*tok == ' ' || *tok == '\t')
> +	tok++;
> +
> +    /* Skip empty and comment lines. */
> +    if (*tok == '\0' || *tok == '#') {
> +	ret = 1;
> +	goto DONE;
> +    }
> +
> +    tag_op_list_reset (tag_ops);
> +
> +    /* Parse tags. */
> +    while ((tok = strtok_len (tok + tok_len, " ", &tok_len)) != NULL) {
> +	notmuch_bool_t remove;
> +	char *tag;
> +
> +	/* Optional explicit end of tags marker. */
> +	if (tok_len == 2 && strncmp (tok, "--", tok_len) == 0) {
> +	    tok = strtok_len (tok + tok_len, " ", &tok_len);
> +	    break;
> +	}
> +
> +	/* Implicit end of tags. */
> +	if (*tok != '-' && *tok != '+')
> +	    break;
> +
> +	/* If tag is terminated by NUL, there's no query string. */
> +	if (*(tok + tok_len) == '\0') {
> +	    fprintf (stderr, "no query string: %s\n", line_for_error);
> +	    ret = 1;

Is this intentionally 1 rather than -1? Should the message have a
"Warning:" prefix if 1, or "Error:" prefix if -1?

> +	    goto DONE;
> +	}
> +
> +	/* Terminate, and start next token after terminator. */
> +	*(tok + tok_len++) = '\0';
> +
> +	remove = (*tok == '-');
> +	tag = tok + 1;
> +
> +	/* Maybe refuse empty tags. */
> +	if (! (flags & TAG_FLAG_BE_GENEROUS) && *tag == '\0') {
> +	    fprintf (stderr, "Error: empty tag: %s\n", line_for_error);

ret = -1; ?

> +	    goto DONE;
> +	}
> +
> +	/* Decode tag. */
> +	if (hex_decode_inplace (tag) != HEX_SUCCESS) {
> +	    fprintf (stderr, "Hex decoding of tag %s failed\n",
> +		     tag);
> +	    ret = 1;

or -1? Error vs. Warning prefix?

> +	    goto DONE;
> +	}
> +
> +	if (tag_op_list_append (ctx, tag_ops, tag, remove)) {
> +	    ret = -1;
> +	    goto DONE;
> +	}
> +    }
> +
> +    if (tok == NULL) {
> +	fprintf (stderr, "Warning: Ignoring invalid input line: %s\n",
> +		 line_for_error);
> +	ret = 1;
> +	goto DONE;
> +    }
> +
> +    /* tok now points to the query string */
> +    if (hex_decode_inplace (tok) != HEX_SUCCESS) {
> +	fprintf (stderr, "Hex decoding of query %s failed\n",
> +		 tok);
> +	ret = 1;

or -1? Error vs. Warning prefix?

> +	goto DONE;
> +    }
> +
> +    *query_string = tok;
> +  DONE:
> +    talloc_free (line_for_error);
> +    return ret;
> +}
> +
> +static inline void
> +message_error (notmuch_message_t *message,
> +	       notmuch_status_t status,
> +	       const char *format, ...)
> +{
> +    va_list va_args;
> +
> +    va_start (va_args, format);
> +
> +    vfprintf (stderr, format, va_args);
> +    fprintf (stderr, "Message-ID: %s\n", notmuch_message_get_message_id (message));
> +    fprintf (stderr, "Status: %s\n", notmuch_status_to_string (status));
> +}
> +
> +notmuch_status_t
> +tag_op_list_apply (notmuch_message_t *message,
> +		   tag_op_list_t *list,
> +		   tag_op_flag_t flags)
> +{
> +    size_t i;
> +    notmuch_status_t status = 0;
> +    tag_operation_t *tag_ops = list->ops;
> +
> +    status = notmuch_message_freeze (message);
> +    if (status) {
> +	message_error (message, status, "freezing message");
> +	return status;
> +    }
> +
> +    if (flags & TAG_FLAG_REMOVE_ALL) {
> +	status = notmuch_message_remove_all_tags (message);
> +	if (status) {
> +	    message_error (message, status, "removing all tags");
> +	    return status;
> +	}
> +    }
> +
> +    for (i = 0; i < list->count; i++) {
> +	if (tag_ops[i].remove) {
> +	    status = notmuch_message_remove_tag (message, tag_ops[i].tag);
> +	    if (status) {
> +		message_error (message, status, "removing tag %s", tag_ops[i].tag);
> +		return status;
> +	    }
> +	} else {
> +	    status = notmuch_message_add_tag (message, tag_ops[i].tag);
> +	    if (status) {
> +		message_error (message, status, "adding tag %s", tag_ops[i].tag);
> +		return status;
> +	    }
> +
> +	}
> +    }
> +
> +    status = notmuch_message_thaw (message);
> +    if (status) {
> +	message_error (message, status, "thawing message");
> +	return status;
> +    }
> +
> +
> +    if (flags & TAG_FLAG_MAILDIR_SYNC) {
> +	status = notmuch_message_tags_to_maildir_flags (message);
> +	if (status) {
> +	    message_error (message, status, "synching tags to maildir");
> +	    return status;
> +	}
> +    }
> +
> +    return NOTMUCH_STATUS_SUCCESS;
> +
> +}
> +
> +
> +/* Array of tagging operations (add or remove.  Size will be increased
> + * as necessary. */
> +
> +tag_op_list_t *
> +tag_op_list_create (void *ctx)
> +{
> +    tag_op_list_t *list;
> +
> +    list = talloc (ctx, tag_op_list_t);
> +    if (list == NULL)
> +	return NULL;
> +
> +    list->size = TAG_OP_LIST_INITIAL_SIZE;
> +    list->count = 0;
> +
> +    list->ops = talloc_array (ctx, tag_operation_t, list->size);
> +    if (list->ops == NULL)
> +	return NULL;
> +
> +    return list;
> +}
> +
> +
> +int
> +tag_op_list_append (void *ctx,
> +		    tag_op_list_t *list,
> +		    const char *tag,
> +		    notmuch_bool_t remove)
> +{
> +    /* Make room if current array is full.  This should be a fairly
> +     * rare case, considering the initial array size.
> +     */
> +
> +    if (list->count == list->size) {
> +	list->size *= 2;
> +	list->ops = talloc_realloc (ctx, list->ops, tag_operation_t,
> +				    list->size);
> +	if (list->ops == NULL) {
> +	    fprintf (stderr, "Out of memory.\n");
> +	    return 1;
> +	}
> +    }
> +
> +    /* add the new operation */
> +
> +    list->ops[list->count].tag = tag;
> +    list->ops[list->count].remove = remove;
> +    list->count++;
> +    return 0;
> +}
> +
> +/*
> + *   Is the i'th tag operation a remove?
> + */
> +
> +notmuch_bool_t
> +tag_op_list_isremove (const tag_op_list_t *list, size_t i)
> +{
> +    assert (i < list->count);
> +    return list->ops[i].remove;
> +}
> +
> +/*
> + * Reset a list to contain no operations
> + */
> +
> +void
> +tag_op_list_reset (tag_op_list_t *list)
> +{
> +    list->count = 0;
> +}
> +
> +/*
> + * Return the number of operations in a list
> + */
> +
> +size_t
> +tag_op_list_size (const tag_op_list_t *list)
> +{
> +    return list->count;
> +}
> +
> +/*
> + *   return the i'th tag in the list
> + */
> +
> +const char *
> +tag_op_list_tag (const tag_op_list_t *list, size_t i)
> +{
> +    assert (i < list->count);
> +    return list->ops[i].tag;
> +}
> diff --git a/tag-util.h b/tag-util.h
> new file mode 100644
> index 0000000..df05d72
> --- /dev/null
> +++ b/tag-util.h
> @@ -0,0 +1,119 @@
> +#ifndef _TAG_UTIL_H
> +#define _TAG_UTIL_H
> +
> +#include "notmuch-client.h"
> +
> +typedef struct _tag_operation_t tag_operation_t;
> +typedef struct _tag_op_list_t tag_op_list_t;
> +
> +/* Use powers of 2 */
> +typedef enum {
> +    TAG_FLAG_NONE = 0,
> +
> +    /* Operations are synced to maildir, if possible.
> +     */
> +    TAG_FLAG_MAILDIR_SYNC = (1 << 0),
> +
> +    /* Remove all tags from message before applying list.
> +     */
> +    TAG_FLAG_REMOVE_ALL = (1 << 1),
> +
> +    /* Don't try to avoid database operations. Useful when we
> +     * know that message passed needs these operations.
> +      */
> +    TAG_FLAG_PRE_OPTIMIZED = (1 << 2),
> +
> +    /* Accept strange tags that might be user error;
> +     * intended for use by notmuch-restore.
> +     */
> +    TAG_FLAG_BE_GENEROUS = (1 << 3)
> +
> +} tag_op_flag_t;
> +
> +/* Parse a string of the following format:
> + *
> + * +<tag>|-<tag> [...] [--] <search-terms>
> + *
> + * Each line is interpreted similarly to "notmuch tag" command line
> + * arguments. The delimiter is one or more spaces ' '. Any characters
> + * in <tag> and <search-terms> MAY be hex encoded with %NN where NN is
> + * the hexadecimal value of the character. Any ' ' and '%' characters
> + * in <tag> and <search-terms> MUST be hex encoded (using %20 and %25,
> + * respectively). Any characters that are not part of <tag> or
> + * <search-terms> MUST NOT be hex encoded.
> + *
> + * Leading and trailing space ' ' is ignored. Empty lines and lines
> + * beginning with '#' are ignored.
> + *
> + * Returns: 0 for OK, 1 for skipped line, -1 for fatal(ish) error.
> + *
> + * Output Parameters:
> + *	ops	contains a list of tag operations
> + *	query_str the search terms.
> + */
> +int
> +parse_tag_line (void *ctx, char *line,
> +		tag_op_flag_t flags,
> +		char **query_str, tag_op_list_t *ops);
> +
> +/*
> + * Create an empty list of tag operations
> + *
> + * ctx is passed to talloc
> + */
> +
> +tag_op_list_t
> +*tag_op_list_create (void *ctx);
> +
> +/*
> + * Add a tag operation (delete iff remove == TRUE) to a list.
> + * The list is expanded as necessary.
> + */
> +
> +int
> +tag_op_list_append (void *ctx,
> +		    tag_op_list_t *list,
> +		    const char *tag,
> +		    notmuch_bool_t remove);
> +
> +/*
> + * Apply a list of tag operations, in order, to a given message.
> + *
> + * Flags can be bitwise ORed; see enum above for possibilies.
> + */
> +
> +notmuch_status_t
> +tag_op_list_apply (notmuch_message_t *message,
> +		   tag_op_list_t *tag_ops,
> +		   tag_op_flag_t flags);
> +
> +/*
> + * Return the number of operations in a list
> + */
> +
> +size_t
> +tag_op_list_size (const tag_op_list_t *list);
> +
> +/*
> + * Reset a list to contain no operations
> + */
> +
> +void
> +tag_op_list_reset (tag_op_list_t *list);
> +
> +
> + /*
> +  *   return the i'th tag in the list
> +  */
> +
> +const char *
> +tag_op_list_tag (const tag_op_list_t *list, size_t i);
> +
> +/*
> + *   Is the i'th tag operation a remove?
> + */
> +
> +notmuch_bool_t
> +tag_op_list_isremove (const tag_op_list_t *list, size_t i);
> +
> +#endif
> -- 
> 1.7.10.4
>
> _______________________________________________
> notmuch mailing list
> notmuch@notmuchmail.org
> http://notmuchmail.org/mailman/listinfo/notmuch

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Patch v3b 5/9] notmuch-restore: add support for input format 'batch-tag'
  2012-12-07  1:26 ` [Patch v3b 5/9] notmuch-restore: add support for input format 'batch-tag' david
@ 2012-12-07 22:50   ` Jani Nikula
  2012-12-08 11:43   ` Mark Walters
  1 sibling, 0 replies; 21+ messages in thread
From: Jani Nikula @ 2012-12-07 22:50 UTC (permalink / raw)
  To: david, notmuch; +Cc: David Bremner


LGTM.

On Fri, 07 Dec 2012, david@tethera.net wrote:
> From: David Bremner <bremner@debian.org>
>
> This can be enabled with the new --format=batch-tag command line
> option to "notmuch restore". The input must consist of lines of the
> format:
>
>     +<tag>|-<tag> [...] [--] id:<msg-id>
>
> Each line is interpreted similarly to "notmuch tag" command line
> arguments. The delimiter is one or more spaces ' '. Any characters in
> <tag> and <search-terms> MAY be hex encoded with %NN where NN is the
> hexadecimal value of the character. Any ' ' and '%' characters in
> <tag> and <msg-id> MUST be hex encoded (using %20 and %25,
> respectively). Any characters that are not part of <tag> or
> <search-terms> MUST NOT be hex encoded.
>
> Leading and trailing space ' ' is ignored. Empty lines and lines
> beginning with '#' are ignored.
>
> Commit message mainly stolen from Jani's batch tagging commit, to
> follow.
> ---
>  notmuch-restore.c |  206 ++++++++++++++++++++++++++++++++++-------------------
>  1 file changed, 131 insertions(+), 75 deletions(-)
>
> diff --git a/notmuch-restore.c b/notmuch-restore.c
> index f03dcac..ceec2d3 100644
> --- a/notmuch-restore.c
> +++ b/notmuch-restore.c
> @@ -19,18 +19,22 @@
>   */
>  
>  #include "notmuch-client.h"
> +#include "dump-restore-private.h"
> +#include "tag-util.h"
> +#include "string-util.h"
> +
> +static volatile sig_atomic_t interrupted;
> +static regex_t regex;
>  
>  static int
> -tag_message (notmuch_database_t *notmuch, const char *message_id,
> -	     char *file_tags, notmuch_bool_t remove_all,
> -	     notmuch_bool_t synchronize_flags)
> +tag_message (unused (void *ctx),
> +	     notmuch_database_t *notmuch,
> +	     const char *message_id,
> +	     tag_op_list_t *tag_ops,
> +	     tag_op_flag_t flags)
>  {
>      notmuch_status_t status;
> -    notmuch_tags_t *db_tags;
> -    char *db_tags_str;
>      notmuch_message_t *message = NULL;
> -    const char *tag;
> -    char *next;
>      int ret = 0;
>  
>      status = notmuch_database_find_message (notmuch, message_id, &message);
> @@ -44,55 +48,62 @@ tag_message (notmuch_database_t *notmuch, const char *message_id,
>  
>      /* In order to detect missing messages, this check/optimization is
>       * intentionally done *after* first finding the message. */
> -    if (! remove_all && (file_tags == NULL || *file_tags == '\0'))
> -	goto DONE;
> -
> -    db_tags_str = NULL;
> -    for (db_tags = notmuch_message_get_tags (message);
> -	 notmuch_tags_valid (db_tags);
> -	 notmuch_tags_move_to_next (db_tags)) {
> -	tag = notmuch_tags_get (db_tags);
> -
> -	if (db_tags_str)
> -	    db_tags_str = talloc_asprintf_append (db_tags_str, " %s", tag);
> -	else
> -	    db_tags_str = talloc_strdup (message, tag);
> -    }
> +    if ((flags & TAG_FLAG_REMOVE_ALL) || tag_op_list_size (tag_ops))
> +	tag_op_list_apply (message, tag_ops, flags);
>  
> -    if (((file_tags == NULL || *file_tags == '\0') &&
> -	 (db_tags_str == NULL || *db_tags_str == '\0')) ||
> -	(file_tags && db_tags_str && strcmp (file_tags, db_tags_str) == 0))
> -	goto DONE;
> +    notmuch_message_destroy (message);
>  
> -    notmuch_message_freeze (message);
> +    return ret;
> +}
>  
> -    if (remove_all)
> -	notmuch_message_remove_all_tags (message);
> +static int
> +parse_sup_line (void *ctx, char *line,
> +		char **query_str, tag_op_list_t *tag_ops)
> +{
>  
> -    next = file_tags;
> -    while (next) {
> -	tag = strsep (&next, " ");
> -	if (*tag == '\0')
> -	    continue;
> -	status = notmuch_message_add_tag (message, tag);
> -	if (status) {
> -	    fprintf (stderr, "Error applying tag %s to message %s:\n",
> -		     tag, message_id);
> -	    fprintf (stderr, "%s\n", notmuch_status_to_string (status));
> -	    ret = 1;
> -	}
> +    regmatch_t match[3];
> +    char *file_tags;
> +    int rerr;
> +
> +    tag_op_list_reset (tag_ops);
> +
> +    chomp_newline (line);
> +
> +    /* Silently ignore blank lines */
> +    if (line[0] == '\0') {
> +	return 1;
> +    }
> +
> +    rerr = xregexec (&regex, line, 3, match, 0);
> +    if (rerr == REG_NOMATCH) {
> +	fprintf (stderr, "Warning: Ignoring invalid sup format line: %s\n",
> +		 line);
> +	return 1;
>      }
>  
> -    notmuch_message_thaw (message);
> +    *query_str = talloc_strndup (ctx, line + match[1].rm_so,
> +				 match[1].rm_eo - match[1].rm_so);
> +    file_tags = talloc_strndup (ctx, line + match[2].rm_so,
> +				match[2].rm_eo - match[2].rm_so);
>  
> -    if (synchronize_flags)
> -	notmuch_message_tags_to_maildir_flags (message);
> +    char *tok = file_tags;
> +    size_t tok_len = 0;
>  
> -  DONE:
> -    if (message)
> -	notmuch_message_destroy (message);
> +    tag_op_list_reset (tag_ops);
> +
> +    while ((tok = strtok_len (tok + tok_len, " ", &tok_len)) != NULL) {
> +
> +	if (*(tok + tok_len) != '\0') {
> +	    *(tok + tok_len) = '\0';
> +	    tok_len++;
> +	}
> +
> +	if (tag_op_list_append (ctx, tag_ops, tok, FALSE))
> +	    return -1;
> +    }
> +
> +    return 0;
>  
> -    return ret;
>  }
>  
>  int
> @@ -100,16 +111,19 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
>  {
>      notmuch_config_t *config;
>      notmuch_database_t *notmuch;
> -    notmuch_bool_t synchronize_flags;
>      notmuch_bool_t accumulate = FALSE;
> +    tag_op_flag_t flags = 0;
> +    tag_op_list_t *tag_ops;
> +
>      char *input_file_name = NULL;
>      FILE *input = stdin;
>      char *line = NULL;
>      size_t line_size;
>      ssize_t line_len;
> -    regex_t regex;
> -    int rerr;
> +
> +    int ret = 0;
>      int opt_index;
> +    int input_format = DUMP_FORMAT_AUTO;
>  
>      config = notmuch_config_open (ctx, NULL, NULL);
>      if (config == NULL)
> @@ -119,9 +133,15 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
>  			       NOTMUCH_DATABASE_MODE_READ_WRITE, &notmuch))
>  	return 1;
>  
> -    synchronize_flags = notmuch_config_get_maildir_synchronize_flags (config);
> +    if (notmuch_config_get_maildir_synchronize_flags (config))
> +	flags |= TAG_FLAG_MAILDIR_SYNC;
>  
>      notmuch_opt_desc_t options[] = {
> +	{ NOTMUCH_OPT_KEYWORD, &input_format, "format", 'f',
> +	  (notmuch_keyword_t []){ { "auto", DUMP_FORMAT_AUTO },
> +				  { "batch-tag", DUMP_FORMAT_BATCH_TAG },
> +				  { "sup", DUMP_FORMAT_SUP },
> +				  { 0, 0 } } },
>  	{ NOTMUCH_OPT_STRING, &input_file_name, "input", 'i', 0 },
>  	{ NOTMUCH_OPT_BOOLEAN,  &accumulate, "accumulate", 'a', 0 },
>  	{ 0, 0, 0, 0, 0 }
> @@ -134,6 +154,9 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
>  	return 1;
>      }
>  
> +    if (! accumulate)
> +	flags |= TAG_FLAG_REMOVE_ALL;
> +
>      if (input_file_name) {
>  	input = fopen (input_file_name, "r");
>  	if (input == NULL) {
> @@ -154,44 +177,77 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
>       * non-space characters for the message-id, then one or more
>       * spaces, then a list of space-separated tags as a sequence of
>       * characters within literal '(' and ')'. */
> -    if ( xregcomp (&regex,
> -		   "^([^ ]+) \\(([^)]*)\\)$",
> -		   REG_EXTENDED) )
> -	INTERNAL_ERROR ("compile time constant regex failed.");
> +    char *p;
>  
> -    while ((line_len = getline (&line, &line_size, input)) != -1) {
> -	regmatch_t match[3];
> -	char *message_id, *file_tags;
> +    line_len = getline (&line, &line_size, input);
> +    if (line_len == 0)
> +	return 0;
>  
> -	chomp_newline (line);
> +    tag_ops = tag_op_list_create (ctx);
> +    if (tag_ops == NULL) {
> +	fprintf (stderr, "Out of memory.\n");
> +	return 1;
> +    }
>  
> -	rerr = xregexec (&regex, line, 3, match, 0);
> -	if (rerr == REG_NOMATCH) {
> -	    fprintf (stderr, "Warning: Ignoring invalid input line: %s\n",
> -		     line);
> -	    continue;
> +    for (p = line; (input_format == DUMP_FORMAT_AUTO) && *p; p++) {
> +	if (*p == '(')
> +	    input_format = DUMP_FORMAT_SUP;
> +    }
> +
> +    if (input_format == DUMP_FORMAT_AUTO)
> +	input_format = DUMP_FORMAT_BATCH_TAG;
> +
> +    if (input_format == DUMP_FORMAT_SUP)
> +	if ( xregcomp (&regex,
> +		       "^([^ ]+) \\(([^)]*)\\)$",
> +		       REG_EXTENDED) )
> +	    INTERNAL_ERROR ("compile time constant regex failed.");
> +
> +    do {
> +	char *query_string;
> +
> +	if (input_format == DUMP_FORMAT_SUP) {
> +	    ret = parse_sup_line (ctx, line, &query_string, tag_ops);
> +	} else {
> +	    ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS,
> +				  &query_string, tag_ops);
> +
> +	    if (ret == 0) {
> +		if (strncmp ("id:", query_string, 3) != 0) {
> +		    fprintf (stderr, "Unsupported query: %s\n", query_string);
> +		    continue;
> +		}
> +		/* delete id: from front of string; tag_message
> +		 * expects a raw message-id.
> +		 *
> +		 * XXX: Note that query string id:foo and bar will be
> +		 * interpreted as a message id "foo and bar". This
> +		 * should eventually be fixed to give a better error
> +		 * message.
> +		 */
> +		query_string = query_string + 3;
> +	    }
>  	}
>  
> -	message_id = xstrndup (line + match[1].rm_so,
> -			       match[1].rm_eo - match[1].rm_so);
> -	file_tags = xstrndup (line + match[2].rm_so,
> -			      match[2].rm_eo - match[2].rm_so);
> +	if (ret > 0)
> +	    continue;
>  
> -	tag_message (notmuch, message_id, file_tags, ! accumulate,
> -		     synchronize_flags);
> +	if (ret < 0 || tag_message (ctx, notmuch, query_string,
> +				    tag_ops, flags))
> +	    break;
>  
> -	free (message_id);
> -	free (file_tags);
> -    }
> +    }  while ((line_len = getline (&line, &line_size, input)) != -1);
>  
> -    regfree (&regex);
> +    if (input_format == DUMP_FORMAT_SUP)
> +	regfree (&regex);
>  
>      if (line)
>  	free (line);
>  
>      notmuch_database_destroy (notmuch);
> +
>      if (input != stdin)
>  	fclose (input);
>  
> -    return 0;
> +    return ret;
>  }
> -- 
> 1.7.10.4
>
> _______________________________________________
> notmuch mailing list
> notmuch@notmuchmail.org
> http://notmuchmail.org/mailman/listinfo/notmuch

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Patch v3b 6/9] test: update dump-restore roundtripping test for batch-tag format
  2012-12-07  1:26 ` [Patch v3b 6/9] test: update dump-restore roundtripping test for batch-tag format david
@ 2012-12-07 22:56   ` Jani Nikula
  0 siblings, 0 replies; 21+ messages in thread
From: Jani Nikula @ 2012-12-07 22:56 UTC (permalink / raw)
  To: david, notmuch; +Cc: David Bremner

On Fri, 07 Dec 2012, david@tethera.net wrote:
> From: David Bremner <bremner@debian.org>
>
> Now we can actually round trip these crazy tags and and message ids.
> hex-xcode is no longer needed as it's built in.
> ---
>  test/dump-restore |   17 ++++++++---------
>  1 file changed, 8 insertions(+), 9 deletions(-)
>
> diff --git a/test/dump-restore b/test/dump-restore
> index b4c807f..ce81e6f 100755
> --- a/test/dump-restore
> +++ b/test/dump-restore
> @@ -99,23 +99,22 @@ notmuch dump --format=batch-tag from:cworth | sed 's/^.*-- id://' | \
>  test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
>  
>  test_begin_subtest 'roundtripping random message-ids and tags'
> -    test_subtest_known_broken
> -    ${TEST_DIRECTORY}/random-corpus --config-path=${NOTMUCH_CONFIG} \
> -			--num-messages=10
>  
> -     notmuch dump| \
> -	 ${TEST_DIRECTORY}/hex-xcode --direction=encode| \
> +    ${TEST_DIRECTORY}/random-corpus --config-path=${NOTMUCH_CONFIG}
> +			--num-messages=100

What happened to the \ at the end of the line? Hmm, does it matter?

Otherwise, LGTM.

> +
> +     notmuch dump --format=batch-tag| \
>  	 sort > EXPECTED.$test_count
>  
>       notmuch tag +this_tag_is_very_unlikely_to_be_random '*'
>  
> -     ${TEST_DIRECTORY}/hex-xcode --direction=decode < EXPECTED.$test_count | \
> -	 notmuch restore 2>/dev/null
> +     notmuch restore --format=batch-tag < EXPECTED.$test_count
>  
> -     notmuch dump| \
> -	 ${TEST_DIRECTORY}/hex-xcode --direction=encode| \
> +     notmuch dump --format=batch-tag| \
>  	 sort > OUTPUT.$test_count
>  
>  test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
>  
>  test_done
> +
> +# Note the database is "poisoned" for sup format at this point.
> -- 
> 1.7.10.4
>
> _______________________________________________
> notmuch mailing list
> notmuch@notmuchmail.org
> http://notmuchmail.org/mailman/listinfo/notmuch

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Patch v3b 9/9] tag-util: optimization of tag application
  2012-12-07  1:26 ` [Patch v3b 9/9] tag-util: optimization of tag application david
@ 2012-12-07 23:08   ` Jani Nikula
  2012-12-08 11:22   ` Mark Walters
  1 sibling, 0 replies; 21+ messages in thread
From: Jani Nikula @ 2012-12-07 23:08 UTC (permalink / raw)
  To: david, notmuch; +Cc: David Bremner

On Fri, 07 Dec 2012, david@tethera.net wrote:
> From: David Bremner <bremner@debian.org>
>
> The idea is not to bother with restore operations if they don't change
> the set of tags. This is actually a relatively common case.
>
> In order to avoid fancy datastructures, this method is quadratic in
> the number of tags; at least on my mail database this doesn't seem to
> be a big problem.
> ---
>  tag-util.c |   66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 66 insertions(+)
>
> diff --git a/tag-util.c b/tag-util.c
> index 932ee7f..3d54e9e 100644
> --- a/tag-util.c
> +++ b/tag-util.c
> @@ -124,6 +124,69 @@ message_error (notmuch_message_t *message,
>      fprintf (stderr, "Status: %s\n", notmuch_status_to_string (status));
>  }
>  
> +static int
> +makes_changes (notmuch_message_t *message,
> +	       tag_op_list_t *list,
> +	       tag_op_flag_t flags)
> +{
> +
> +    size_t i;
> +
> +    notmuch_tags_t *tags;
> +    notmuch_bool_t changes = FALSE;
> +
> +    /* First, do we delete an existing tag? */
> +    changes = FALSE;
> +    for (tags = notmuch_message_get_tags (message);
> +	 ! changes && notmuch_tags_valid (tags);
> +	 notmuch_tags_move_to_next (tags)) {
> +	const char *cur_tag = notmuch_tags_get (tags);
> +	int last_op =  (flags & TAG_FLAG_REMOVE_ALL) ? -1 : 0;
> +
> +	/* slight contortions to count down with an unsigned index */
> +	for (i = list->count; i-- > 0; /*nothing*/) {
> +	    if (strcmp (cur_tag, list->ops[i].tag) == 0) {
> +		last_op = list->ops[i].remove ? -1 : 1;
> +		break;
> +	    }
> +	}

After some eyeballing it looks correct, but not not exactly pretty. If
you insist on unsigned, you could also have a regular (i = 0; i <
list->count; i++) and use j = list->count - i - 1; within the block. But
that's just style bikeshedding after convincing you to use a count down
loop in the first place...

Otherwise, the patch LGTM.


> +
> +	changes = (last_op == -1);
> +    }
> +    notmuch_tags_destroy (tags);
> +
> +    if (changes)
> +	return TRUE;
> +
> +    /* Now check for adding new tags */
> +    for (i = 0; i < list->count; i++) {
> +	notmuch_bool_t exists = FALSE;
> +
> +	if (list->ops[i].remove)
> +	    continue;
> +
> +	for (tags = notmuch_message_get_tags (message);
> +	     notmuch_tags_valid (tags);
> +	     notmuch_tags_move_to_next (tags)) {
> +	    const char *cur_tag = notmuch_tags_get (tags);
> +	    if (strcmp (cur_tag, list->ops[i].tag) == 0) {
> +		exists = TRUE;
> +		break;
> +	    }
> +	}
> +	notmuch_tags_destroy (tags);
> +
> +	/* the following test is conservative,
> +	 * in the sense it ignores cases like +foo ... -foo
> +	 * but this is OK from a correctness point of view
> +	 */
> +	if (! exists)
> +	    return TRUE;
> +    }
> +    return FALSE;
> +
> +}
> +
>  notmuch_status_t
>  tag_op_list_apply (notmuch_message_t *message,
>  		   tag_op_list_t *list,
> @@ -133,6 +196,9 @@ tag_op_list_apply (notmuch_message_t *message,
>      notmuch_status_t status = 0;
>      tag_operation_t *tag_ops = list->ops;
>  
> +    if (! (flags & TAG_FLAG_PRE_OPTIMIZED) && ! makes_changes (message, list, flags))
> +	return NOTMUCH_STATUS_SUCCESS;
> +
>      status = notmuch_message_freeze (message);
>      if (status) {
>  	message_error (message, status, "freezing message");
> -- 
> 1.7.10.4
>
> _______________________________________________
> notmuch mailing list
> notmuch@notmuchmail.org
> http://notmuchmail.org/mailman/listinfo/notmuch

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Patch v3b 3/9] util: add string-util.[ch]
  2012-12-07  1:26 ` [Patch v3b 3/9] util: add string-util.[ch] david
  2012-12-07 22:30   ` Jani Nikula
@ 2012-12-08 10:23   ` Mark Walters
  1 sibling, 0 replies; 21+ messages in thread
From: Mark Walters @ 2012-12-08 10:23 UTC (permalink / raw)
  To: david, notmuch; +Cc: David Bremner


On Fri, 07 Dec 2012, david@tethera.net wrote:
> From: David Bremner <bremner@debian.org>
>
> This is to give a home to strtok_len. It's a bit silly to add a header
> for one routine, but it needs to be shared between several compilation
> units (or at least that's the most natural design).
> ---
>  util/Makefile.local |    3 ++-
>  util/string-util.c  |   34 ++++++++++++++++++++++++++++++++++
>  util/string-util.h  |   19 +++++++++++++++++++
>  3 files changed, 55 insertions(+), 1 deletion(-)
>  create mode 100644 util/string-util.c
>  create mode 100644 util/string-util.h
>
> diff --git a/util/Makefile.local b/util/Makefile.local
> index 3ca623e..a11e35b 100644
> --- a/util/Makefile.local
> +++ b/util/Makefile.local
> @@ -3,7 +3,8 @@
>  dir := util
>  extra_cflags += -I$(srcdir)/$(dir)
>  
> -libutil_c_srcs := $(dir)/xutil.c $(dir)/error_util.c $(dir)/hex-escape.c
> +libutil_c_srcs := $(dir)/xutil.c $(dir)/error_util.c $(dir)/hex-escape.c \
> +		  $(dir)/string-util.c
>  
>  libutil_modules := $(libutil_c_srcs:.c=.o)
>  
> diff --git a/util/string-util.c b/util/string-util.c
> new file mode 100644
> index 0000000..44f8cd3
> --- /dev/null
> +++ b/util/string-util.c
> @@ -0,0 +1,34 @@
> +/* string-util.c -  Extra or enhanced routines for null terminated strings.
> + *
> + * Copyright (c) 2012 Jani Nikula
> + *
> + * This program is free software: you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation, either version 3 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see http://www.gnu.org/licenses/ .
> + *
> + * Author: Jani Nikula <jani@nikula.org>
> + */
> +
> +
> +#include "string-util.h"
> +
> +char *
> +strtok_len (char *s, const char *delim, size_t *len)
> +{
> +    /* skip initial delims */
> +    s += strspn (s, delim);
> +
> +    /* length of token */
> +    *len = strcspn (s, delim);
> +
> +    return *len ? s : NULL;
> +}
> diff --git a/util/string-util.h b/util/string-util.h
> new file mode 100644
> index 0000000..696da40
> --- /dev/null
> +++ b/util/string-util.h
> @@ -0,0 +1,19 @@
> +#ifndef _STRING_UTIL_H
> +#define _STRING_UTIL_H
> +
> +#include <string.h>
> +
> +/* like strtok(3), but without state, and doesn't modify s. usage pattern:
> + *
> + * const char *tok = input;
> + * const char *delim = " \t";
> + * size_t tok_len = 0;
> + *
> + * while ((tok = strtok_len (tok + tok_len, delim, &tok_len)) != NULL) {
> + *     // do stuff with string tok of length tok_len
> + * }

Hi

Some possibly silly comments on the comment. strtok(3) seems to return a
null terminated string and this does not. Might be worth including in
the comment? Secondly every time I read this I read "..modify s. usage
pattern" as one ongoing sentence with s. an abbreviation. Perhaps a
capital u? and maybe str or string rather than s?

Best wishes

Mark

> + */
> +
> +char *strtok_len (char *s, const char *delim, size_t *len);
> +
> +#endif
> -- 
> 1.7.10.4
>
> _______________________________________________
> notmuch mailing list
> notmuch@notmuchmail.org
> http://notmuchmail.org/mailman/listinfo/notmuch

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Patch v3b 4/9] tag-util.[ch]: New files for common tagging routines
  2012-12-07  1:26 ` [Patch v3b 4/9] tag-util.[ch]: New files for common tagging routines david
  2012-12-07 22:45   ` Jani Nikula
@ 2012-12-08 10:50   ` Mark Walters
  1 sibling, 0 replies; 21+ messages in thread
From: Mark Walters @ 2012-12-08 10:50 UTC (permalink / raw)
  To: david, notmuch; +Cc: David Bremner

On Fri, 07 Dec 2012, david@tethera.net wrote:
> From: David Bremner <bremner@debian.org>
>
> These are meant to be shared between notmuch-tag and notmuch-restore.
>
> The bulk of the routines implement a "tag operation list" abstract
> data type act as a structured representation of a set of tag
> operations (typically coming from a single tag command or line of
> input).
> ---
>  Makefile.local |    1 +
>  tag-util.c     |  278 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  tag-util.h     |  119 ++++++++++++++++++++++++
>  3 files changed, 398 insertions(+)
>  create mode 100644 tag-util.c
>  create mode 100644 tag-util.h
>
> diff --git a/Makefile.local b/Makefile.local
> index 2b91946..854867d 100644
> --- a/Makefile.local
> +++ b/Makefile.local
> @@ -274,6 +274,7 @@ notmuch_client_srcs =		\
>  	query-string.c		\
>  	mime-node.c		\
>  	crypto.c		\
> +	tag-util.c
>  
>  notmuch_client_modules = $(notmuch_client_srcs:.c=.o)
>  
> diff --git a/tag-util.c b/tag-util.c
> new file mode 100644
> index 0000000..932ee7f
> --- /dev/null
> +++ b/tag-util.c
> @@ -0,0 +1,278 @@
> +#include <assert.h>
> +#include "string-util.h"
> +#include "tag-util.h"
> +#include "hex-escape.h"
> +
> +#define TAG_OP_LIST_INITIAL_SIZE 10
> +
> +struct _tag_operation_t {
> +    const char *tag;
> +    notmuch_bool_t remove;
> +};
> +
> +struct _tag_op_list_t {
> +    tag_operation_t *ops;
> +    size_t count;
> +    size_t size;
> +};
> +
> +int
> +parse_tag_line (void *ctx, char *line,
> +		tag_op_flag_t flags,
> +		char **query_string,
> +		tag_op_list_t *tag_ops)
> +{
> +    char *tok = line;
> +    size_t tok_len = 0;
> +    char *line_for_error = talloc_strdup (ctx, line);
> +    int ret = 0;
> +
> +    chomp_newline (line);
> +
> +    /* remove leading space */
> +    while (*tok == ' ' || *tok == '\t')
> +	tok++;
> +
> +    /* Skip empty and comment lines. */
> +    if (*tok == '\0' || *tok == '#') {
> +	ret = 1;
> +	goto DONE;
> +    }
> +
> +    tag_op_list_reset (tag_ops);
> +
> +    /* Parse tags. */
> +    while ((tok = strtok_len (tok + tok_len, " ", &tok_len)) != NULL) {
> +	notmuch_bool_t remove;
> +	char *tag;
> +
> +	/* Optional explicit end of tags marker. */
> +	if (tok_len == 2 && strncmp (tok, "--", tok_len) == 0) {
> +	    tok = strtok_len (tok + tok_len, " ", &tok_len);
> +	    break;
> +	}
> +
> +	/* Implicit end of tags. */
> +	if (*tok != '-' && *tok != '+')
> +	    break;
> +
> +	/* If tag is terminated by NUL, there's no query string. */
> +	if (*(tok + tok_len) == '\0') {
> +	    fprintf (stderr, "no query string: %s\n", line_for_error);
> +	    ret = 1;
> +	    goto DONE;
> +	}
> +
> +	/* Terminate, and start next token after terminator. */
> +	*(tok + tok_len++) = '\0';
> +
> +	remove = (*tok == '-');
> +	tag = tok + 1;
> +
> +	/* Maybe refuse empty tags. */
> +	if (! (flags & TAG_FLAG_BE_GENEROUS) && *tag == '\0') {
> +	    fprintf (stderr, "Error: empty tag: %s\n", line_for_error);
> +	    goto DONE;
> +	}
> +
> +	/* Decode tag. */
> +	if (hex_decode_inplace (tag) != HEX_SUCCESS) {
> +	    fprintf (stderr, "Hex decoding of tag %s failed\n",
> +		     tag);
> +	    ret = 1;
> +	    goto DONE;
> +	}
> +
> +	if (tag_op_list_append (ctx, tag_ops, tag, remove)) {
> +	    ret = -1;
> +	    goto DONE;
> +	}
> +    }
> +
> +    if (tok == NULL) {
> +	fprintf (stderr, "Warning: Ignoring invalid input line: %s\n",
> +		 line_for_error);
> +	ret = 1;
> +	goto DONE;
> +    }
> +
> +    /* tok now points to the query string */
> +    if (hex_decode_inplace (tok) != HEX_SUCCESS) {
> +	fprintf (stderr, "Hex decoding of query %s failed\n",
> +		 tok);


For these hex_decode errors would it be worth printing the full line as
well as the bit that fails hex_decode? Perhaps put something under DONE:
to print the whole line if ret is not success?

Otherwise LGTM

Mark


> +	ret = 1;
> +	goto DONE;
> +    }
> +
> +    *query_string = tok;
> +  DONE:
> +    talloc_free (line_for_error);
> +    return ret;
> +}
> +
> +static inline void
> +message_error (notmuch_message_t *message,
> +	       notmuch_status_t status,
> +	       const char *format, ...)
> +{
> +    va_list va_args;
> +
> +    va_start (va_args, format);
> +
> +    vfprintf (stderr, format, va_args);
> +    fprintf (stderr, "Message-ID: %s\n", notmuch_message_get_message_id (message));
> +    fprintf (stderr, "Status: %s\n", notmuch_status_to_string (status));
> +}
> +
> +notmuch_status_t
> +tag_op_list_apply (notmuch_message_t *message,
> +		   tag_op_list_t *list,
> +		   tag_op_flag_t flags)
> +{
> +    size_t i;
> +    notmuch_status_t status = 0;
> +    tag_operation_t *tag_ops = list->ops;
> +
> +    status = notmuch_message_freeze (message);
> +    if (status) {
> +	message_error (message, status, "freezing message");
> +	return status;
> +    }
> +
> +    if (flags & TAG_FLAG_REMOVE_ALL) {
> +	status = notmuch_message_remove_all_tags (message);
> +	if (status) {
> +	    message_error (message, status, "removing all tags");
> +	    return status;
> +	}
> +    }
> +
> +    for (i = 0; i < list->count; i++) {
> +	if (tag_ops[i].remove) {
> +	    status = notmuch_message_remove_tag (message, tag_ops[i].tag);
> +	    if (status) {
> +		message_error (message, status, "removing tag %s", tag_ops[i].tag);
> +		return status;
> +	    }
> +	} else {
> +	    status = notmuch_message_add_tag (message, tag_ops[i].tag);
> +	    if (status) {
> +		message_error (message, status, "adding tag %s", tag_ops[i].tag);
> +		return status;
> +	    }
> +
> +	}
> +    }
> +
> +    status = notmuch_message_thaw (message);
> +    if (status) {
> +	message_error (message, status, "thawing message");
> +	return status;
> +    }
> +
> +
> +    if (flags & TAG_FLAG_MAILDIR_SYNC) {
> +	status = notmuch_message_tags_to_maildir_flags (message);
> +	if (status) {
> +	    message_error (message, status, "synching tags to maildir");
> +	    return status;
> +	}
> +    }
> +
> +    return NOTMUCH_STATUS_SUCCESS;
> +
> +}
> +
> +
> +/* Array of tagging operations (add or remove.  Size will be increased
> + * as necessary. */
> +
> +tag_op_list_t *
> +tag_op_list_create (void *ctx)
> +{
> +    tag_op_list_t *list;
> +
> +    list = talloc (ctx, tag_op_list_t);
> +    if (list == NULL)
> +	return NULL;
> +
> +    list->size = TAG_OP_LIST_INITIAL_SIZE;
> +    list->count = 0;
> +
> +    list->ops = talloc_array (ctx, tag_operation_t, list->size);
> +    if (list->ops == NULL)
> +	return NULL;
> +
> +    return list;
> +}
> +
> +
> +int
> +tag_op_list_append (void *ctx,
> +		    tag_op_list_t *list,
> +		    const char *tag,
> +		    notmuch_bool_t remove)
> +{
> +    /* Make room if current array is full.  This should be a fairly
> +     * rare case, considering the initial array size.
> +     */
> +
> +    if (list->count == list->size) {
> +	list->size *= 2;
> +	list->ops = talloc_realloc (ctx, list->ops, tag_operation_t,
> +				    list->size);
> +	if (list->ops == NULL) {
> +	    fprintf (stderr, "Out of memory.\n");
> +	    return 1;
> +	}
> +    }
> +
> +    /* add the new operation */
> +
> +    list->ops[list->count].tag = tag;
> +    list->ops[list->count].remove = remove;
> +    list->count++;
> +    return 0;
> +}
> +
> +/*
> + *   Is the i'th tag operation a remove?
> + */
> +
> +notmuch_bool_t
> +tag_op_list_isremove (const tag_op_list_t *list, size_t i)
> +{
> +    assert (i < list->count);
> +    return list->ops[i].remove;
> +}
> +
> +/*
> + * Reset a list to contain no operations
> + */
> +
> +void
> +tag_op_list_reset (tag_op_list_t *list)
> +{
> +    list->count = 0;
> +}
> +
> +/*
> + * Return the number of operations in a list
> + */
> +
> +size_t
> +tag_op_list_size (const tag_op_list_t *list)
> +{
> +    return list->count;
> +}
> +
> +/*
> + *   return the i'th tag in the list
> + */
> +
> +const char *
> +tag_op_list_tag (const tag_op_list_t *list, size_t i)
> +{
> +    assert (i < list->count);
> +    return list->ops[i].tag;
> +}
> diff --git a/tag-util.h b/tag-util.h
> new file mode 100644
> index 0000000..df05d72
> --- /dev/null
> +++ b/tag-util.h
> @@ -0,0 +1,119 @@
> +#ifndef _TAG_UTIL_H
> +#define _TAG_UTIL_H
> +
> +#include "notmuch-client.h"
> +
> +typedef struct _tag_operation_t tag_operation_t;
> +typedef struct _tag_op_list_t tag_op_list_t;
> +
> +/* Use powers of 2 */
> +typedef enum {
> +    TAG_FLAG_NONE = 0,
> +
> +    /* Operations are synced to maildir, if possible.
> +     */
> +    TAG_FLAG_MAILDIR_SYNC = (1 << 0),
> +
> +    /* Remove all tags from message before applying list.
> +     */
> +    TAG_FLAG_REMOVE_ALL = (1 << 1),
> +
> +    /* Don't try to avoid database operations. Useful when we
> +     * know that message passed needs these operations.
> +      */
> +    TAG_FLAG_PRE_OPTIMIZED = (1 << 2),
> +
> +    /* Accept strange tags that might be user error;
> +     * intended for use by notmuch-restore.
> +     */
> +    TAG_FLAG_BE_GENEROUS = (1 << 3)
> +
> +} tag_op_flag_t;
> +
> +/* Parse a string of the following format:
> + *
> + * +<tag>|-<tag> [...] [--] <search-terms>
> + *
> + * Each line is interpreted similarly to "notmuch tag" command line
> + * arguments. The delimiter is one or more spaces ' '. Any characters
> + * in <tag> and <search-terms> MAY be hex encoded with %NN where NN is
> + * the hexadecimal value of the character. Any ' ' and '%' characters
> + * in <tag> and <search-terms> MUST be hex encoded (using %20 and %25,
> + * respectively). Any characters that are not part of <tag> or
> + * <search-terms> MUST NOT be hex encoded.
> + *
> + * Leading and trailing space ' ' is ignored. Empty lines and lines
> + * beginning with '#' are ignored.
> + *
> + * Returns: 0 for OK, 1 for skipped line, -1 for fatal(ish) error.
> + *
> + * Output Parameters:
> + *	ops	contains a list of tag operations
> + *	query_str the search terms.
> + */
> +int
> +parse_tag_line (void *ctx, char *line,
> +		tag_op_flag_t flags,
> +		char **query_str, tag_op_list_t *ops);
> +
> +/*
> + * Create an empty list of tag operations
> + *
> + * ctx is passed to talloc
> + */
> +
> +tag_op_list_t
> +*tag_op_list_create (void *ctx);
> +
> +/*
> + * Add a tag operation (delete iff remove == TRUE) to a list.
> + * The list is expanded as necessary.
> + */
> +
> +int
> +tag_op_list_append (void *ctx,
> +		    tag_op_list_t *list,
> +		    const char *tag,
> +		    notmuch_bool_t remove);
> +
> +/*
> + * Apply a list of tag operations, in order, to a given message.
> + *
> + * Flags can be bitwise ORed; see enum above for possibilies.
> + */
> +
> +notmuch_status_t
> +tag_op_list_apply (notmuch_message_t *message,
> +		   tag_op_list_t *tag_ops,
> +		   tag_op_flag_t flags);
> +
> +/*
> + * Return the number of operations in a list
> + */
> +
> +size_t
> +tag_op_list_size (const tag_op_list_t *list);
> +
> +/*
> + * Reset a list to contain no operations
> + */
> +
> +void
> +tag_op_list_reset (tag_op_list_t *list);
> +
> +
> + /*
> +  *   return the i'th tag in the list
> +  */
> +
> +const char *
> +tag_op_list_tag (const tag_op_list_t *list, size_t i);
> +
> +/*
> + *   Is the i'th tag operation a remove?
> + */
> +
> +notmuch_bool_t
> +tag_op_list_isremove (const tag_op_list_t *list, size_t i);
> +
> +#endif
> -- 
> 1.7.10.4
>
> _______________________________________________
> notmuch mailing list
> notmuch@notmuchmail.org
> http://notmuchmail.org/mailman/listinfo/notmuch

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Patch v3b 9/9] tag-util: optimization of tag application
  2012-12-07  1:26 ` [Patch v3b 9/9] tag-util: optimization of tag application david
  2012-12-07 23:08   ` Jani Nikula
@ 2012-12-08 11:22   ` Mark Walters
  1 sibling, 0 replies; 21+ messages in thread
From: Mark Walters @ 2012-12-08 11:22 UTC (permalink / raw)
  To: david, notmuch; +Cc: David Bremner

On Fri, 07 Dec 2012, david@tethera.net wrote:
> From: David Bremner <bremner@debian.org>
>
> The idea is not to bother with restore operations if they don't change
> the set of tags. This is actually a relatively common case.
>
> In order to avoid fancy datastructures, this method is quadratic in
> the number of tags; at least on my mail database this doesn't seem to
> be a big problem.
> ---
>  tag-util.c |   66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 66 insertions(+)
>
> diff --git a/tag-util.c b/tag-util.c
> index 932ee7f..3d54e9e 100644
> --- a/tag-util.c
> +++ b/tag-util.c
> @@ -124,6 +124,69 @@ message_error (notmuch_message_t *message,
>      fprintf (stderr, "Status: %s\n", notmuch_status_to_string (status));
>  }
>  
> +static int
> +makes_changes (notmuch_message_t *message,
> +	       tag_op_list_t *list,
> +	       tag_op_flag_t flags)
> +{
> +
> +    size_t i;
> +
> +    notmuch_tags_t *tags;
> +    notmuch_bool_t changes = FALSE;
> +
> +    /* First, do we delete an existing tag? */
> +    changes = FALSE;
> +    for (tags = notmuch_message_get_tags (message);
> +	 ! changes && notmuch_tags_valid (tags);
> +	 notmuch_tags_move_to_next (tags)) {
> +	const char *cur_tag = notmuch_tags_get (tags);
> +	int last_op =  (flags & TAG_FLAG_REMOVE_ALL) ? -1 : 0;
> +
> +	/* slight contortions to count down with an unsigned index */
> +	for (i = list->count; i-- > 0; /*nothing*/) {
> +	    if (strcmp (cur_tag, list->ops[i].tag) == 0) {
> +		last_op = list->ops[i].remove ? -1 : 1;
> +		break;
> +	    }
> +	}

I agree that this is a little ugly but ok I think. Is it worth adding a
comment as to why you are counting backwards? eg " we count backwards to
check whether the last change for the tag foo is removal"

But basically LGTM

Mark


> +
> +	changes = (last_op == -1);
> +    }
> +    notmuch_tags_destroy (tags);
> +
> +    if (changes)
> +	return TRUE;
> +
> +    /* Now check for adding new tags */
> +    for (i = 0; i < list->count; i++) {
> +	notmuch_bool_t exists = FALSE;
> +
> +	if (list->ops[i].remove)
> +	    continue;
> +
> +	for (tags = notmuch_message_get_tags (message);
> +	     notmuch_tags_valid (tags);
> +	     notmuch_tags_move_to_next (tags)) {
> +	    const char *cur_tag = notmuch_tags_get (tags);
> +	    if (strcmp (cur_tag, list->ops[i].tag) == 0) {
> +		exists = TRUE;
> +		break;
> +	    }
> +	}
> +	notmuch_tags_destroy (tags);
> +
> +	/* the following test is conservative,
> +	 * in the sense it ignores cases like +foo ... -foo
> +	 * but this is OK from a correctness point of view
> +	 */
> +	if (! exists)
> +	    return TRUE;
> +    }
> +    return FALSE;
> +
> +}
> +
>  notmuch_status_t
>  tag_op_list_apply (notmuch_message_t *message,
>  		   tag_op_list_t *list,
> @@ -133,6 +196,9 @@ tag_op_list_apply (notmuch_message_t *message,
>      notmuch_status_t status = 0;
>      tag_operation_t *tag_ops = list->ops;
>  
> +    if (! (flags & TAG_FLAG_PRE_OPTIMIZED) && ! makes_changes (message, list, flags))
> +	return NOTMUCH_STATUS_SUCCESS;
> +
>      status = notmuch_message_freeze (message);
>      if (status) {
>  	message_error (message, status, "freezing message");
> -- 
> 1.7.10.4
>
> _______________________________________________
> notmuch mailing list
> notmuch@notmuchmail.org
> http://notmuchmail.org/mailman/listinfo/notmuch

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Patch v3b 5/9] notmuch-restore: add support for input format 'batch-tag'
  2012-12-07  1:26 ` [Patch v3b 5/9] notmuch-restore: add support for input format 'batch-tag' david
  2012-12-07 22:50   ` Jani Nikula
@ 2012-12-08 11:43   ` Mark Walters
  1 sibling, 0 replies; 21+ messages in thread
From: Mark Walters @ 2012-12-08 11:43 UTC (permalink / raw)
  To: david, notmuch; +Cc: David Bremner


Hi

Basically LGTM: just one comment below.

On Fri, 07 Dec 2012, david@tethera.net wrote:
> From: David Bremner <bremner@debian.org>
>
> This can be enabled with the new --format=batch-tag command line
> option to "notmuch restore". The input must consist of lines of the
> format:
>
>     +<tag>|-<tag> [...] [--] id:<msg-id>
>
> Each line is interpreted similarly to "notmuch tag" command line
> arguments. The delimiter is one or more spaces ' '. Any characters in
> <tag> and <search-terms> MAY be hex encoded with %NN where NN is the
> hexadecimal value of the character. Any ' ' and '%' characters in
> <tag> and <msg-id> MUST be hex encoded (using %20 and %25,
> respectively). Any characters that are not part of <tag> or
> <search-terms> MUST NOT be hex encoded.
>
> Leading and trailing space ' ' is ignored. Empty lines and lines
> beginning with '#' are ignored.
>
> Commit message mainly stolen from Jani's batch tagging commit, to
> follow.
> ---
>  notmuch-restore.c |  206 ++++++++++++++++++++++++++++++++++-------------------
>  1 file changed, 131 insertions(+), 75 deletions(-)
>
> diff --git a/notmuch-restore.c b/notmuch-restore.c
> index f03dcac..ceec2d3 100644
> --- a/notmuch-restore.c
> +++ b/notmuch-restore.c
> @@ -19,18 +19,22 @@
>   */
>  
>  #include "notmuch-client.h"
> +#include "dump-restore-private.h"
> +#include "tag-util.h"
> +#include "string-util.h"
> +
> +static volatile sig_atomic_t interrupted;
> +static regex_t regex;
>  
>  static int
> -tag_message (notmuch_database_t *notmuch, const char *message_id,
> -	     char *file_tags, notmuch_bool_t remove_all,
> -	     notmuch_bool_t synchronize_flags)
> +tag_message (unused (void *ctx),
> +	     notmuch_database_t *notmuch,
> +	     const char *message_id,
> +	     tag_op_list_t *tag_ops,
> +	     tag_op_flag_t flags)
>  {
>      notmuch_status_t status;
> -    notmuch_tags_t *db_tags;
> -    char *db_tags_str;
>      notmuch_message_t *message = NULL;
> -    const char *tag;
> -    char *next;
>      int ret = 0;
>  
>      status = notmuch_database_find_message (notmuch, message_id, &message);
> @@ -44,55 +48,62 @@ tag_message (notmuch_database_t *notmuch, const char *message_id,
>  
>      /* In order to detect missing messages, this check/optimization is
>       * intentionally done *after* first finding the message. */
> -    if (! remove_all && (file_tags == NULL || *file_tags == '\0'))
> -	goto DONE;
> -
> -    db_tags_str = NULL;
> -    for (db_tags = notmuch_message_get_tags (message);
> -	 notmuch_tags_valid (db_tags);
> -	 notmuch_tags_move_to_next (db_tags)) {
> -	tag = notmuch_tags_get (db_tags);
> -
> -	if (db_tags_str)
> -	    db_tags_str = talloc_asprintf_append (db_tags_str, " %s", tag);
> -	else
> -	    db_tags_str = talloc_strdup (message, tag);
> -    }
> +    if ((flags & TAG_FLAG_REMOVE_ALL) || tag_op_list_size (tag_ops))
> +	tag_op_list_apply (message, tag_ops, flags);
>  
> -    if (((file_tags == NULL || *file_tags == '\0') &&
> -	 (db_tags_str == NULL || *db_tags_str == '\0')) ||
> -	(file_tags && db_tags_str && strcmp (file_tags, db_tags_str) == 0))
> -	goto DONE;
> +    notmuch_message_destroy (message);
>  
> -    notmuch_message_freeze (message);
> +    return ret;
> +}
>  
> -    if (remove_all)
> -	notmuch_message_remove_all_tags (message);
> +static int
> +parse_sup_line (void *ctx, char *line,
> +		char **query_str, tag_op_list_t *tag_ops)
> +{
>  
> -    next = file_tags;
> -    while (next) {
> -	tag = strsep (&next, " ");
> -	if (*tag == '\0')
> -	    continue;
> -	status = notmuch_message_add_tag (message, tag);
> -	if (status) {
> -	    fprintf (stderr, "Error applying tag %s to message %s:\n",
> -		     tag, message_id);
> -	    fprintf (stderr, "%s\n", notmuch_status_to_string (status));
> -	    ret = 1;
> -	}
> +    regmatch_t match[3];
> +    char *file_tags;
> +    int rerr;
> +
> +    tag_op_list_reset (tag_ops);
> +
> +    chomp_newline (line);
> +
> +    /* Silently ignore blank lines */
> +    if (line[0] == '\0') {
> +	return 1;
> +    }
> +
> +    rerr = xregexec (&regex, line, 3, match, 0);
> +    if (rerr == REG_NOMATCH) {
> +	fprintf (stderr, "Warning: Ignoring invalid sup format line: %s\n",
> +		 line);
> +	return 1;
>      }
>  
> -    notmuch_message_thaw (message);
> +    *query_str = talloc_strndup (ctx, line + match[1].rm_so,
> +				 match[1].rm_eo - match[1].rm_so);
> +    file_tags = talloc_strndup (ctx, line + match[2].rm_so,
> +				match[2].rm_eo - match[2].rm_so);
>  
> -    if (synchronize_flags)
> -	notmuch_message_tags_to_maildir_flags (message);
> +    char *tok = file_tags;
> +    size_t tok_len = 0;
>  
> -  DONE:
> -    if (message)
> -	notmuch_message_destroy (message);
> +    tag_op_list_reset (tag_ops);
> +
> +    while ((tok = strtok_len (tok + tok_len, " ", &tok_len)) != NULL) {
> +
> +	if (*(tok + tok_len) != '\0') {
> +	    *(tok + tok_len) = '\0';
> +	    tok_len++;
> +	}
> +
> +	if (tag_op_list_append (ctx, tag_ops, tok, FALSE))
> +	    return -1;
> +    }
> +
> +    return 0;
>  
> -    return ret;
>  }
>  
>  int
> @@ -100,16 +111,19 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
>  {
>      notmuch_config_t *config;
>      notmuch_database_t *notmuch;
> -    notmuch_bool_t synchronize_flags;
>      notmuch_bool_t accumulate = FALSE;
> +    tag_op_flag_t flags = 0;
> +    tag_op_list_t *tag_ops;
> +
>      char *input_file_name = NULL;
>      FILE *input = stdin;
>      char *line = NULL;
>      size_t line_size;
>      ssize_t line_len;
> -    regex_t regex;
> -    int rerr;
> +
> +    int ret = 0;
>      int opt_index;
> +    int input_format = DUMP_FORMAT_AUTO;
>  
>      config = notmuch_config_open (ctx, NULL, NULL);
>      if (config == NULL)
> @@ -119,9 +133,15 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
>  			       NOTMUCH_DATABASE_MODE_READ_WRITE, &notmuch))
>  	return 1;
>  
> -    synchronize_flags = notmuch_config_get_maildir_synchronize_flags (config);
> +    if (notmuch_config_get_maildir_synchronize_flags (config))
> +	flags |= TAG_FLAG_MAILDIR_SYNC;
>  
>      notmuch_opt_desc_t options[] = {
> +	{ NOTMUCH_OPT_KEYWORD, &input_format, "format", 'f',
> +	  (notmuch_keyword_t []){ { "auto", DUMP_FORMAT_AUTO },
> +				  { "batch-tag", DUMP_FORMAT_BATCH_TAG },
> +				  { "sup", DUMP_FORMAT_SUP },
> +				  { 0, 0 } } },
>  	{ NOTMUCH_OPT_STRING, &input_file_name, "input", 'i', 0 },
>  	{ NOTMUCH_OPT_BOOLEAN,  &accumulate, "accumulate", 'a', 0 },
>  	{ 0, 0, 0, 0, 0 }
> @@ -134,6 +154,9 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
>  	return 1;
>      }
>  
> +    if (! accumulate)
> +	flags |= TAG_FLAG_REMOVE_ALL;
> +
>      if (input_file_name) {
>  	input = fopen (input_file_name, "r");
>  	if (input == NULL) {
> @@ -154,44 +177,77 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
>       * non-space characters for the message-id, then one or more
>       * spaces, then a list of space-separated tags as a sequence of
>       * characters within literal '(' and ')'. */

^^^ shouldn't this comment move to parse_sup_line?

Best wishes

Mark


> -    if ( xregcomp (&regex,
> -		   "^([^ ]+) \\(([^)]*)\\)$",
> -		   REG_EXTENDED) )
> -	INTERNAL_ERROR ("compile time constant regex failed.");
> +    char *p;
>  
> -    while ((line_len = getline (&line, &line_size, input)) != -1) {
> -	regmatch_t match[3];
> -	char *message_id, *file_tags;
> +    line_len = getline (&line, &line_size, input);
> +    if (line_len == 0)
> +	return 0;
>  
> -	chomp_newline (line);
> +    tag_ops = tag_op_list_create (ctx);
> +    if (tag_ops == NULL) {
> +	fprintf (stderr, "Out of memory.\n");
> +	return 1;
> +    }
>  
> -	rerr = xregexec (&regex, line, 3, match, 0);
> -	if (rerr == REG_NOMATCH) {
> -	    fprintf (stderr, "Warning: Ignoring invalid input line: %s\n",
> -		     line);
> -	    continue;
> +    for (p = line; (input_format == DUMP_FORMAT_AUTO) && *p; p++) {
> +	if (*p == '(')
> +	    input_format = DUMP_FORMAT_SUP;
> +    }
> +
> +    if (input_format == DUMP_FORMAT_AUTO)
> +	input_format = DUMP_FORMAT_BATCH_TAG;
> +
> +    if (input_format == DUMP_FORMAT_SUP)
> +	if ( xregcomp (&regex,
> +		       "^([^ ]+) \\(([^)]*)\\)$",
> +		       REG_EXTENDED) )
> +	    INTERNAL_ERROR ("compile time constant regex failed.");
> +
> +    do {
> +	char *query_string;
> +
> +	if (input_format == DUMP_FORMAT_SUP) {
> +	    ret = parse_sup_line (ctx, line, &query_string, tag_ops);
> +	} else {
> +	    ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS,
> +				  &query_string, tag_ops);
> +
> +	    if (ret == 0) {
> +		if (strncmp ("id:", query_string, 3) != 0) {
> +		    fprintf (stderr, "Unsupported query: %s\n", query_string);
> +		    continue;
> +		}
> +		/* delete id: from front of string; tag_message
> +		 * expects a raw message-id.
> +		 *
> +		 * XXX: Note that query string id:foo and bar will be
> +		 * interpreted as a message id "foo and bar". This
> +		 * should eventually be fixed to give a better error
> +		 * message.
> +		 */
> +		query_string = query_string + 3;
> +	    }
>  	}
>  
> -	message_id = xstrndup (line + match[1].rm_so,
> -			       match[1].rm_eo - match[1].rm_so);
> -	file_tags = xstrndup (line + match[2].rm_so,
> -			      match[2].rm_eo - match[2].rm_so);
> +	if (ret > 0)
> +	    continue;
>  
> -	tag_message (notmuch, message_id, file_tags, ! accumulate,
> -		     synchronize_flags);
> +	if (ret < 0 || tag_message (ctx, notmuch, query_string,
> +				    tag_ops, flags))
> +	    break;
>  
> -	free (message_id);
> -	free (file_tags);
> -    }
> +    }  while ((line_len = getline (&line, &line_size, input)) != -1);
>  
> -    regfree (&regex);
> +    if (input_format == DUMP_FORMAT_SUP)
> +	regfree (&regex);
>  
>      if (line)
>  	free (line);
>  
>      notmuch_database_destroy (notmuch);
> +
>      if (input != stdin)
>  	fclose (input);
>  
> -    return 0;
> +    return ret;
>  }
> -- 
> 1.7.10.4
>
> _______________________________________________
> notmuch mailing list
> notmuch@notmuchmail.org
> http://notmuchmail.org/mailman/listinfo/notmuch

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2012-12-08 11:43 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-12-07  1:26 V3b of batch tagging/dump/restore patches david
2012-12-07  1:26 ` [Patch v3b 1/9] notmuch-dump: add --format=(batch-tag|sup) david
2012-12-07 22:21   ` Jani Nikula
2012-12-07  1:26 ` [Patch v3b 2/9] test: add sanity check for dump --format=batch-tag david
2012-12-07 22:26   ` Jani Nikula
2012-12-07  1:26 ` [Patch v3b 3/9] util: add string-util.[ch] david
2012-12-07 22:30   ` Jani Nikula
2012-12-08 10:23   ` Mark Walters
2012-12-07  1:26 ` [Patch v3b 4/9] tag-util.[ch]: New files for common tagging routines david
2012-12-07 22:45   ` Jani Nikula
2012-12-08 10:50   ` Mark Walters
2012-12-07  1:26 ` [Patch v3b 5/9] notmuch-restore: add support for input format 'batch-tag' david
2012-12-07 22:50   ` Jani Nikula
2012-12-08 11:43   ` Mark Walters
2012-12-07  1:26 ` [Patch v3b 6/9] test: update dump-restore roundtripping test for batch-tag format david
2012-12-07 22:56   ` Jani Nikula
2012-12-07  1:26 ` [Patch v3b 7/9] test: second set of dump/restore --format=batch-tag tests david
2012-12-07  1:26 ` [Patch v3b 8/9] notmuch-{dump, restore}.1: document new format options david
2012-12-07  1:26 ` [Patch v3b 9/9] tag-util: optimization of tag application david
2012-12-07 23:08   ` Jani Nikula
2012-12-08 11:22   ` Mark Walters

Code repositories for project(s) associated with this public inbox

	https://yhetil.org/notmuch.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).