* [PATCH 1/8] hex-escape: (en|de)code strings to/from restricted character set
2012-03-31 22:17 [PATCH 0/8] batch tagging support: "notmuch tag --stdin" Jani Nikula
@ 2012-03-31 22:17 ` Jani Nikula
2012-03-31 22:17 ` [PATCH 2/8] hex-escape: be more strict about the format while decoding Jani Nikula
` (6 subsequent siblings)
7 siblings, 0 replies; 28+ messages in thread
From: Jani Nikula @ 2012-03-31 22:17 UTC (permalink / raw)
To: notmuch
From: David Bremner <bremner@debian.org>
The character set is chosen to be suitable for pathnames, and the same
as that used by contrib/nmbug
---
util/Makefile.local | 2 +-
util/hex-escape.c | 156 +++++++++++++++++++++++++++++++++++++++++++++++++++
util/hex-escape.h | 32 +++++++++++
3 files changed, 189 insertions(+), 1 deletions(-)
create mode 100644 util/hex-escape.c
create mode 100644 util/hex-escape.h
diff --git a/util/Makefile.local b/util/Makefile.local
index c7cae61..3ca623e 100644
--- a/util/Makefile.local
+++ b/util/Makefile.local
@@ -3,7 +3,7 @@
dir := util
extra_cflags += -I$(srcdir)/$(dir)
-libutil_c_srcs := $(dir)/xutil.c $(dir)/error_util.c
+libutil_c_srcs := $(dir)/xutil.c $(dir)/error_util.c $(dir)/hex-escape.c
libutil_modules := $(libutil_c_srcs:.c=.o)
diff --git a/util/hex-escape.c b/util/hex-escape.c
new file mode 100644
index 0000000..6c1260b
--- /dev/null
+++ b/util/hex-escape.c
@@ -0,0 +1,156 @@
+/* hex-escape.c - Manage encoding and decoding of byte strings into path names
+ *
+ * Copyright (c) 2011 David Bremner
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/ .
+ *
+ * Author: David Bremner <david@tethera.net>
+ */
+
+#include <assert.h>
+#include <string.h>
+#include <talloc.h>
+#include "error_util.h"
+#include "hex-escape.h"
+
+static const size_t default_buf_size = 1024;
+
+static const char *output_charset =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-_@=.:,";
+
+static const int escape_char = '%';
+
+static int
+is_output (char c)
+{
+ return (strchr (output_charset, c) != NULL);
+}
+
+static int
+maybe_realloc (void *ctx, size_t needed, char **out, size_t *out_size)
+{
+ if (*out_size < needed) {
+
+ if (*out == NULL)
+ *out = talloc_size (ctx, needed);
+ else
+ *out = talloc_realloc (ctx, *out, char, needed);
+
+ if (*out == NULL)
+ return 0;
+
+ *out_size = needed;
+ }
+ return 1;
+}
+
+hex_status_t
+hex_encode (void *ctx, const char *in, char **out, size_t *out_size)
+{
+
+ const unsigned char *p;
+ char *q;
+
+ size_t escape_count = 0;
+ size_t len = 0;
+ size_t needed;
+
+ assert (ctx); assert (in); assert (out); assert (out_size);
+
+ for (p = (unsigned char *) in; *p; p++) {
+ escape_count += (!is_output (*p));
+ len++;
+ }
+
+ needed = len + escape_count * 2 + 1;
+
+ if (*out == NULL)
+ *out_size = 0;
+
+ if (!maybe_realloc (ctx, needed, out, out_size))
+ return HEX_OUT_OF_MEMORY;
+
+ q = *out;
+ p = (unsigned char *) in;
+
+ while (*p) {
+ if (is_output (*p)) {
+ *q++ = *p++;
+ } else {
+ sprintf (q, "%%%02x", *p++);
+ q += 3;
+ }
+ }
+
+ *q = '\0';
+ return HEX_SUCCESS;
+}
+
+hex_status_t
+hex_decode (void *ctx, const char *in, char **out, size_t * out_size)
+{
+
+ char buf[3];
+
+ const char *p;
+ unsigned char *q;
+
+ size_t escape_count = 0;
+ size_t needed = 0;
+
+ assert (ctx); assert (in); assert (out); assert (out_size);
+
+ size_t len = strlen (in);
+
+ for (p = in; *p; p++)
+ escape_count += (*p == escape_char);
+
+ needed = len - escape_count * 2 + 1;
+
+ if (!maybe_realloc (ctx, needed, out, out_size))
+ return HEX_OUT_OF_MEMORY;
+
+ p = in;
+ q = (unsigned char *) *out;
+ buf[2] = 0;
+
+ while (*p) {
+
+ if (*p == escape_char) {
+
+ char *endp;
+
+ if (len < 3)
+ return HEX_SYNTAX_ERROR;
+
+ buf[0] = p[1];
+ buf[1] = p[2];
+
+ *q = strtol (buf, &endp, 16);
+
+ if (endp != buf + 2)
+ return HEX_SYNTAX_ERROR;
+
+ len -= 3;
+ p += 3;
+ q++;
+ } else {
+ *q++ = *p++;
+ }
+ }
+
+ *q = '\0';
+
+ return HEX_SUCCESS;
+}
diff --git a/util/hex-escape.h b/util/hex-escape.h
new file mode 100644
index 0000000..e409626
--- /dev/null
+++ b/util/hex-escape.h
@@ -0,0 +1,32 @@
+#ifndef _HEX_ESCAPE_H
+#define _HEX_ESCAPE_H
+
+typedef enum hex_status {
+ HEX_SUCCESS = 0,
+ HEX_SYNTAX_ERROR,
+ HEX_OUT_OF_MEMORY
+} hex_status_t;
+
+/*
+ * The API is modelled on that for getline.
+ *
+ * If 'out' points to a NULL pointer a char array of the appropriate
+ * size is allocated using talloc, and out_size is updated.
+ *
+ * If 'out' points to a non-NULL pointer, it assumed to describe an
+ * existing char array, with the size given in *out_size. This array
+ * may be resized by talloc_realloc if needed; in this case *out_size
+ * will also be updated.
+ *
+ * Note that it is an error to pass a NULL pointer for any parameter
+ * of these routines.
+ */
+
+hex_status_t
+hex_encode (void *talloc_ctx, const char *in, char **out,
+ size_t *out_size);
+
+hex_status_t
+hex_decode (void *talloc_ctx, const char *in, char **out,
+ size_t *out_size);
+#endif
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 2/8] hex-escape: be more strict about the format while decoding
2012-03-31 22:17 [PATCH 0/8] batch tagging support: "notmuch tag --stdin" Jani Nikula
2012-03-31 22:17 ` [PATCH 1/8] hex-escape: (en|de)code strings to/from restricted character set Jani Nikula
@ 2012-03-31 22:17 ` Jani Nikula
2012-04-05 11:33 ` David Bremner
2012-03-31 22:17 ` [PATCH 3/8] hex-escape: add function to decode escaped string in-place Jani Nikula
` (5 subsequent siblings)
7 siblings, 1 reply; 28+ messages in thread
From: Jani Nikula @ 2012-03-31 22:17 UTC (permalink / raw)
To: notmuch
Signed-off-by: Jani Nikula <jani@nikula.org>
---
This could be folded to "hex-escape: (en|de)code strings to/from
restricted character set".
---
util/hex-escape.c | 7 ++++---
1 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/util/hex-escape.c b/util/hex-escape.c
index 6c1260b..9de79df 100644
--- a/util/hex-escape.c
+++ b/util/hex-escape.c
@@ -21,6 +21,7 @@
#include <assert.h>
#include <string.h>
#include <talloc.h>
+#include <ctype.h>
#include "error_util.h"
#include "hex-escape.h"
@@ -131,18 +132,18 @@ hex_decode (void *ctx, const char *in, char **out, size_t * out_size)
char *endp;
- if (len < 3)
+ if (!isxdigit ((unsigned char) p[1]) ||
+ !isxdigit ((unsigned char) p[2]))
return HEX_SYNTAX_ERROR;
buf[0] = p[1];
buf[1] = p[2];
- *q = strtol (buf, &endp, 16);
+ *q = strtoul (buf, &endp, 16);
if (endp != buf + 2)
return HEX_SYNTAX_ERROR;
- len -= 3;
p += 3;
q++;
} else {
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* Re: [PATCH 2/8] hex-escape: be more strict about the format while decoding
2012-03-31 22:17 ` [PATCH 2/8] hex-escape: be more strict about the format while decoding Jani Nikula
@ 2012-04-05 11:33 ` David Bremner
2012-04-05 11:49 ` Jani Nikula
0 siblings, 1 reply; 28+ messages in thread
From: David Bremner @ 2012-04-05 11:33 UTC (permalink / raw)
To: Jani Nikula, notmuch
Jani Nikula <jani@nikula.org> writes:
> Signed-off-by: Jani Nikula <jani@nikula.org>
>
> ---
>
> This could be folded to "hex-escape: (en|de)code strings to/from
> restricted character set".
That's probably a good plan.
> - if (len < 3)
> + if (!isxdigit ((unsigned char) p[1]) ||
> + !isxdigit ((unsigned char) p[2]))
What happens if there are not two characters after the escape? Is this
relying on calling isxdigit on the null terminator?
d
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 2/8] hex-escape: be more strict about the format while decoding
2012-04-05 11:33 ` David Bremner
@ 2012-04-05 11:49 ` Jani Nikula
2012-04-05 11:59 ` David Bremner
0 siblings, 1 reply; 28+ messages in thread
From: Jani Nikula @ 2012-04-05 11:49 UTC (permalink / raw)
To: David Bremner, notmuch
On Thu, 05 Apr 2012 08:33:23 -0300, David Bremner <david@tethera.net> wrote:
> Jani Nikula <jani@nikula.org> writes:
>
> > Signed-off-by: Jani Nikula <jani@nikula.org>
> >
> > ---
> >
> > This could be folded to "hex-escape: (en|de)code strings to/from
> > restricted character set".
>
> That's probably a good plan.
>
> > - if (len < 3)
> > + if (!isxdigit ((unsigned char) p[1]) ||
> > + !isxdigit ((unsigned char) p[2]))
>
> What happens if there are not two characters after the escape? Is this
> relying on calling isxdigit on the null terminator?
It is, and technically there's nothing wrong with that. Would you prefer
explicit checks for '\0' in the if condition, for clarity? Or a comment
about it?
Jani.
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 2/8] hex-escape: be more strict about the format while decoding
2012-04-05 11:49 ` Jani Nikula
@ 2012-04-05 11:59 ` David Bremner
0 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2012-04-05 11:59 UTC (permalink / raw)
To: Jani Nikula, notmuch
Jani Nikula <jani@nikula.org> writes:
> On Thu, 05 Apr 2012 08:33:23 -0300, David Bremner <david@tethera.net> wrote:
>>
>> > - if (len < 3)
>> > + if (!isxdigit ((unsigned char) p[1]) ||
>> > + !isxdigit ((unsigned char) p[2]))
>>
>> What happens if there are not two characters after the escape? Is this
>> relying on calling isxdigit on the null terminator?
>
> It is, and technically there's nothing wrong with that. Would you prefer
> explicit checks for '\0' in the if condition, for clarity? Or a comment
> about it?
I think a comment would do. Or the checks if you prefer.
^ permalink raw reply [flat|nested] 28+ messages in thread
* [PATCH 3/8] hex-escape: add function to decode escaped string in-place
2012-03-31 22:17 [PATCH 0/8] batch tagging support: "notmuch tag --stdin" Jani Nikula
2012-03-31 22:17 ` [PATCH 1/8] hex-escape: (en|de)code strings to/from restricted character set Jani Nikula
2012-03-31 22:17 ` [PATCH 2/8] hex-escape: be more strict about the format while decoding Jani Nikula
@ 2012-03-31 22:17 ` Jani Nikula
2012-04-05 11:56 ` David Bremner
2012-03-31 22:17 ` [PATCH 4/8] test/hex-xcode: new test binary Jani Nikula
` (4 subsequent siblings)
7 siblings, 1 reply; 28+ messages in thread
From: Jani Nikula @ 2012-03-31 22:17 UTC (permalink / raw)
To: notmuch
Add function hex_decode_inplace() to decode the input string onto
itself.
Signed-off-by: Jani Nikula <jani@nikula.org>
---
This could be folded to "hex-escape: (en|de)code strings to/from
restricted character set".
---
util/hex-escape.c | 62 ++++++++++++++++++++++++++++++----------------------
util/hex-escape.h | 6 +++++
2 files changed, 42 insertions(+), 26 deletions(-)
diff --git a/util/hex-escape.c b/util/hex-escape.c
index 9de79df..e794f98 100644
--- a/util/hex-escape.c
+++ b/util/hex-escape.c
@@ -98,38 +98,15 @@ hex_encode (void *ctx, const char *in, char **out, size_t *out_size)
return HEX_SUCCESS;
}
-hex_status_t
-hex_decode (void *ctx, const char *in, char **out, size_t * out_size)
+/* Note: This must succeed for p == q to support hex_decode_inplace(). */
+static hex_status_t
+hex_decode_internal (const char *p, unsigned char *q)
{
-
char buf[3];
-
- const char *p;
- unsigned char *q;
-
- size_t escape_count = 0;
- size_t needed = 0;
-
- assert (ctx); assert (in); assert (out); assert (out_size);
-
- size_t len = strlen (in);
-
- for (p = in; *p; p++)
- escape_count += (*p == escape_char);
-
- needed = len - escape_count * 2 + 1;
-
- if (!maybe_realloc (ctx, needed, out, out_size))
- return HEX_OUT_OF_MEMORY;
-
- p = in;
- q = (unsigned char *) *out;
buf[2] = 0;
while (*p) {
-
if (*p == escape_char) {
-
char *endp;
if (!isxdigit ((unsigned char) p[1]) ||
@@ -155,3 +132,36 @@ hex_decode (void *ctx, const char *in, char **out, size_t * out_size)
return HEX_SUCCESS;
}
+
+hex_status_t
+hex_decode_inplace (char *p)
+{
+ return hex_decode_internal (p, (unsigned char *) p);
+}
+
+hex_status_t
+hex_decode (void *ctx, const char *in, char **out, size_t * out_size)
+{
+ const char *p;
+ unsigned char *q;
+
+ size_t escape_count = 0;
+ size_t needed = 0;
+
+ assert (ctx); assert (in); assert (out); assert (out_size);
+
+ size_t len = strlen (in);
+
+ for (p = in; *p; p++)
+ escape_count += (*p == escape_char);
+
+ needed = len - escape_count * 2 + 1;
+
+ if (!maybe_realloc (ctx, needed, out, out_size))
+ return HEX_OUT_OF_MEMORY;
+
+ p = in;
+ q = (unsigned char *) *out;
+
+ return hex_decode_internal (p, q);
+}
diff --git a/util/hex-escape.h b/util/hex-escape.h
index e409626..be70ad2 100644
--- a/util/hex-escape.h
+++ b/util/hex-escape.h
@@ -29,4 +29,10 @@ hex_encode (void *talloc_ctx, const char *in, char **out,
hex_status_t
hex_decode (void *talloc_ctx, const char *in, char **out,
size_t *out_size);
+
+/*
+ * Decode 'in' onto itself.
+ */
+hex_status_t
+hex_decode_inplace (char *in);
#endif
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* Re: [PATCH 3/8] hex-escape: add function to decode escaped string in-place
2012-03-31 22:17 ` [PATCH 3/8] hex-escape: add function to decode escaped string in-place Jani Nikula
@ 2012-04-05 11:56 ` David Bremner
2012-04-05 12:00 ` Jani Nikula
0 siblings, 1 reply; 28+ messages in thread
From: David Bremner @ 2012-04-05 11:56 UTC (permalink / raw)
To: Jani Nikula, notmuch
Jani Nikula <jani@nikula.org> writes:
> Add function hex_decode_inplace() to decode the input string onto
> itself.
>
> Signed-off-by: Jani Nikula <jani@nikula.org>
>
> ---
>
> This could be folded to "hex-escape: (en|de)code strings to/from
> restricted character set".
Probably. It's a bit hard to follow as is; somehow the code movement is
a bit noisy.
> while (*p) {
> -
> if (*p == escape_char) {
> -
unrelated whitespace changes, naughty naughty.
> +hex_status_t
> +hex_decode_inplace (char *p)
> +{
> + return hex_decode_internal (p, (unsigned char *) p);
this could probably use a comment to the effect that there _will_ be
enough space.
> +
> + p = in;
> + q = (unsigned char *) *out;
I understand trying to minimize changes, but do p and q serve any
purpose here?
> +
> + return hex_decode_internal (p, q);
> +}
> +/*
> + * Decode 'in' onto itself.
> + */
This is just a bit terse for my taste.
d
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 3/8] hex-escape: add function to decode escaped string in-place
2012-04-05 11:56 ` David Bremner
@ 2012-04-05 12:00 ` Jani Nikula
0 siblings, 0 replies; 28+ messages in thread
From: Jani Nikula @ 2012-04-05 12:00 UTC (permalink / raw)
To: David Bremner, notmuch
Thanks for the review, David. Agreed on all points.
J.
On Thu, 05 Apr 2012 08:56:24 -0300, David Bremner <bremner@unb.ca> wrote:
> Jani Nikula <jani@nikula.org> writes:
>
> > Add function hex_decode_inplace() to decode the input string onto
> > itself.
> >
> > Signed-off-by: Jani Nikula <jani@nikula.org>
> >
> > ---
> >
> > This could be folded to "hex-escape: (en|de)code strings to/from
> > restricted character set".
>
> Probably. It's a bit hard to follow as is; somehow the code movement is
> a bit noisy.
>
> > while (*p) {
> > -
> > if (*p == escape_char) {
> > -
> unrelated whitespace changes, naughty naughty.
>
> > +hex_status_t
> > +hex_decode_inplace (char *p)
> > +{
> > + return hex_decode_internal (p, (unsigned char *) p);
> this could probably use a comment to the effect that there _will_ be
> enough space.
>
> > +
> > + p = in;
> > + q = (unsigned char *) *out;
>
> I understand trying to minimize changes, but do p and q serve any
> purpose here?
>
> > +
> > + return hex_decode_internal (p, q);
> > +}
>
>
> > +/*
> > + * Decode 'in' onto itself.
> > + */
>
> This is just a bit terse for my taste.
>
> d
^ permalink raw reply [flat|nested] 28+ messages in thread
* [PATCH 4/8] test/hex-xcode: new test binary
2012-03-31 22:17 [PATCH 0/8] batch tagging support: "notmuch tag --stdin" Jani Nikula
` (2 preceding siblings ...)
2012-03-31 22:17 ` [PATCH 3/8] hex-escape: add function to decode escaped string in-place Jani Nikula
@ 2012-03-31 22:17 ` Jani Nikula
2012-03-31 22:17 ` [PATCH 5/8] test/hex-escaping: new test for hex escaping routines Jani Nikula
` (3 subsequent siblings)
7 siblings, 0 replies; 28+ messages in thread
From: Jani Nikula @ 2012-03-31 22:17 UTC (permalink / raw)
To: notmuch
From: David Bremner <bremner@debian.org>
This program is used both as a test-bed/unit-tester for
../util/hex-escape.c, and also as a utility in future tests of dump
and restore.
---
test/.gitignore | 1 +
test/Makefile.local | 6 ++-
test/basic | 2 +-
test/hex-xcode.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 110 insertions(+), 2 deletions(-)
create mode 100644 test/hex-xcode.c
diff --git a/test/.gitignore b/test/.gitignore
index e63c689..be7ab5e 100644
--- a/test/.gitignore
+++ b/test/.gitignore
@@ -3,4 +3,5 @@ corpus.mail
smtp-dummy
symbol-test
arg-test
+hex-xcode
tmp.*
diff --git a/test/Makefile.local b/test/Makefile.local
index 4a6a4b1..9cecd28 100644
--- a/test/Makefile.local
+++ b/test/Makefile.local
@@ -13,6 +13,9 @@ smtp_dummy_modules = $(smtp_dummy_srcs:.c=.o)
$(dir)/arg-test: $(dir)/arg-test.o command-line-arguments.o util/libutil.a
$(call quiet,CC) -I. $^ -o $@
+$(dir)/hex-xcode: $(dir)/hex-xcode.o command-line-arguments.o util/libutil.a
+ $(call quiet,CC) -I. $^ -o $@ -ltalloc
+
$(dir)/smtp-dummy: $(smtp_dummy_modules)
$(call quiet,CC) $^ -o $@
@@ -21,7 +24,8 @@ $(dir)/symbol-test: $(dir)/symbol-test.o
.PHONY: test check
-test-binaries: $(dir)/arg-test $(dir)/smtp-dummy $(dir)/symbol-test
+test-binaries: $(dir)/arg-test $(dir)/hex-xcode \
+ $(dir)/smtp-dummy $(dir)/symbol-test
test: all test-binaries
@${dir}/notmuch-test $(OPTIONS)
diff --git a/test/basic b/test/basic
index d6aed24..af57026 100755
--- a/test/basic
+++ b/test/basic
@@ -54,7 +54,7 @@ test_begin_subtest 'Ensure that all available tests will be run by notmuch-test'
eval $(sed -n -e '/^TESTS="$/,/^"$/p' $TEST_DIRECTORY/notmuch-test)
tests_in_suite=$(for i in $TESTS; do echo $i; done | sort)
available=$(find "$TEST_DIRECTORY" -maxdepth 1 -type f -executable -printf '%f\n' | \
- sed -r -e "/^(aggregate-results.sh|notmuch-test|smtp-dummy|test-verbose|symbol-test|arg-test)$/d" | \
+ sed -r -e "/^(aggregate-results.sh|notmuch-test|smtp-dummy|test-verbose|symbol-test|arg-test|hex-xcode)$/d" | \
sort)
test_expect_equal "$tests_in_suite" "$available"
diff --git a/test/hex-xcode.c b/test/hex-xcode.c
new file mode 100644
index 0000000..eec6541
--- /dev/null
+++ b/test/hex-xcode.c
@@ -0,0 +1,103 @@
+/* No, nothing to to with IDE from Apple Inc.
+ testbed for ../util/hex-escape.c.
+
+ usage:
+ hex-xcode [--direction=(encode|decode)] [--omit-newline] < file
+ hex-xcode [--direction=(encode|decode)] [--omit-newline] arg1 arg2 arg3 ...
+
+ */
+
+#include "notmuch-client.h"
+#include "hex-escape.h"
+#include <assert.h>
+
+
+enum direction {
+ ENCODE,
+ DECODE
+};
+
+static int
+xcode (void *ctx, enum direction dir, char *in, char **buf_p, size_t *size_p)
+{
+ hex_status_t status;
+
+ if (dir == ENCODE)
+ status = hex_encode (ctx, in, buf_p, size_p);
+ else
+ status = hex_decode (ctx, in, buf_p, size_p);
+
+ if (status == HEX_SUCCESS)
+ fputs (*buf_p, stdout);
+
+ return status;
+}
+
+
+int
+main (int argc, char **argv)
+{
+
+
+ enum direction dir = DECODE;
+ int omit_newline = FALSE;
+
+ notmuch_opt_desc_t options[] = {
+ { NOTMUCH_OPT_KEYWORD, &dir, "direction", 'd',
+ (notmuch_keyword_t []){ { "encode", ENCODE },
+ { "decode", DECODE },
+ { 0, 0 } } },
+ { NOTMUCH_OPT_BOOLEAN, &omit_newline, "omit-newline", 'n', 0 },
+ { 0, 0, 0, 0, 0 }
+ };
+
+ int opt_index = parse_arguments (argc, argv, options, 1);
+
+ if (opt_index < 0)
+ exit (1);
+
+ void *ctx = talloc_new (NULL);
+
+ char *line = NULL;
+ size_t line_size;
+ ssize_t line_len;
+
+ char *buffer = NULL;
+ size_t buf_size = 0;
+
+ notmuch_bool_t read_stdin = TRUE;
+
+ for (; opt_index < argc; opt_index++) {
+
+ if (xcode (ctx, dir, argv[opt_index],
+ &buffer, &buf_size) != HEX_SUCCESS)
+ return 1;
+
+ if (!omit_newline)
+ putchar ('\n');
+
+ read_stdin = FALSE;
+ }
+
+ if (!read_stdin)
+ return 0;
+
+ while ((line_len = getline (&line, &line_size, stdin)) != -1) {
+
+ chomp_newline (line);
+
+ if (xcode (ctx, dir, line, &buffer, &buf_size) != HEX_SUCCESS)
+ return 1;
+
+ if (!omit_newline)
+ putchar ('\n');
+
+ }
+
+ if (line)
+ free (line);
+
+ talloc_free (ctx);
+
+ return 0;
+}
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 5/8] test/hex-escaping: new test for hex escaping routines
2012-03-31 22:17 [PATCH 0/8] batch tagging support: "notmuch tag --stdin" Jani Nikula
` (3 preceding siblings ...)
2012-03-31 22:17 ` [PATCH 4/8] test/hex-xcode: new test binary Jani Nikula
@ 2012-03-31 22:17 ` Jani Nikula
2012-03-31 22:17 ` [PATCH 6/8] cli: add support for batch tagging operations to "notmuch tag" Jani Nikula
` (2 subsequent siblings)
7 siblings, 0 replies; 28+ messages in thread
From: Jani Nikula @ 2012-03-31 22:17 UTC (permalink / raw)
To: notmuch
From: David Bremner <bremner@debian.org>
These are more like unit tests, to (try to) make sure the library
functionality is working before building more complicated things on
top of it.
---
test/hex-escaping | 26 ++++++++++++++++++++++++++
test/notmuch-test | 1 +
2 files changed, 27 insertions(+), 0 deletions(-)
create mode 100755 test/hex-escaping
diff --git a/test/hex-escaping b/test/hex-escaping
new file mode 100755
index 0000000..f34cc8c
--- /dev/null
+++ b/test/hex-escaping
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+test_description="hex encoding and decoding"
+. ./test-lib.sh
+
+test_begin_subtest "round trip"
+find $TEST_DIRECTORY/corpus -type f -print | sort | xargs cat > EXPECTED
+$TEST_DIRECTORY/hex-xcode --direction=encode < EXPECTED | $TEST_DIRECTORY/hex-xcode --direction=decode > OUTPUT
+test_expect_equal_file OUTPUT EXPECTED
+
+test_begin_subtest "punctuation"
+tag1='comic_swear=$&^%$^%\\//-+$^%$'
+tag_enc1=$($TEST_DIRECTORY/hex-xcode --direction=encode "$tag1")
+test_expect_equal "$tag_enc1" "comic_swear=%24%26%5e%25%24%5e%25%5c%5c%2f%2f-+%24%5e%25%24"
+
+test_begin_subtest "round trip newlines"
+printf 'this\n tag\t has\n spaces\n' > EXPECTED.$test_count
+$TEST_DIRECTORY/hex-xcode --direction=encode < EXPECTED.$test_count |\
+ $TEST_DIRECTORY/hex-xcode --direction=decode > OUTPUT.$test_count
+test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
+
+test_begin_subtest "round trip 8bit chars"
+echo '%c3%91%c3%a5%c3%b0%c3%a3%c3%a5%c3%a9-%c3%8f%c3%8a' > EXPECTED.$test_count
+$TEST_DIRECTORY/hex-xcode --direction=decode < EXPECTED.$test_count |\
+ $TEST_DIRECTORY/hex-xcode --direction=encode > OUTPUT.$test_count
+test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
+test_done
diff --git a/test/notmuch-test b/test/notmuch-test
index f03b594..d667fbd 100755
--- a/test/notmuch-test
+++ b/test/notmuch-test
@@ -56,6 +56,7 @@ TESTS="
emacs-address-cleaning
emacs-hello
emacs-show
+ hex-escaping
"
TESTS=${NOTMUCH_TESTS:=$TESTS}
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 6/8] cli: add support for batch tagging operations to "notmuch tag"
2012-03-31 22:17 [PATCH 0/8] batch tagging support: "notmuch tag --stdin" Jani Nikula
` (4 preceding siblings ...)
2012-03-31 22:17 ` [PATCH 5/8] test/hex-escaping: new test for hex escaping routines Jani Nikula
@ 2012-03-31 22:17 ` Jani Nikula
2012-04-02 17:51 ` Jameson Graef Rollins
2012-04-02 17:54 ` Jameson Graef Rollins
2012-03-31 22:17 ` [PATCH 7/8] test: add test for notmuch tag --stdin option Jani Nikula
2012-03-31 22:17 ` [PATCH 8/8] man: document " Jani Nikula
7 siblings, 2 replies; 28+ messages in thread
From: Jani Nikula @ 2012-03-31 22:17 UTC (permalink / raw)
To: notmuch
Add support for batch tagging operations through stdin to "notmuch
tag". This can be enabled with the new --stdin command line option to
"notmuch new". The input must consist of lines of the format:
T +<tag>|-<tag> [...] [--] <search-terms>
Each line is interpreted similarly to "notmuch tag" command line
arguments. The delimiter is one or more spaces ' '. Any characters in
<tag> and <search-terms> MAY be hex encoded with %NN where NN is the
hexadecimal ASCII value of the character. Any ' ' and '%' characters
in <tag> and <search-terms> MUST be hex encoded (using %20 and %25,
respectively). Any characters that are not part of <tag> or
<search-terms> MUST NOT be hex encoded.
Leading and trailing space ' ' is ignored. Empty lines and lines
beginning with '#' are ignored.
Signed-off-by: Jani Nikula <jani@nikula.org>
---
notmuch-tag.c | 244 +++++++++++++++++++++++++++++++++++++++++++++++++-------
1 files changed, 213 insertions(+), 31 deletions(-)
diff --git a/notmuch-tag.c b/notmuch-tag.c
index 05feed3..32c7167 100644
--- a/notmuch-tag.c
+++ b/notmuch-tag.c
@@ -19,6 +19,7 @@
*/
#include "notmuch-client.h"
+#include "hex-escape.h"
static volatile sig_atomic_t interrupted;
@@ -167,17 +168,181 @@ tag_query (void *ctx, notmuch_database_t *notmuch, const char *query_string,
return interrupted;
}
+/* like strtok(3), but without state, and doesn't modify s. usage pattern:
+ *
+ * const char *tok = input;
+ * const char *delim = " \t";
+ * size_t tok_len = 0;
+ *
+ * while ((tok = strtok_len (tok + tok_len, delim, &tok_len)) != NULL) {
+ * // do stuff with string tok of length tok_len
+ * }
+ */
+static
+char *strtok_len(char *s, const char *delim, size_t *len)
+{
+ /* skip initial delims */
+ s += strspn (s, delim);
+
+ /* length of token */
+ *len = strcspn (s, delim);
+
+ return *len ? s : NULL;
+}
+
+/* Tag messages according to 'input', which must consist of lines of
+ * the format:
+ *
+ * T +<tag>|-<tag> [...] [--] <search-terms>
+ *
+ * Each line is interpreted similarly to "notmuch tag" command line
+ * arguments. The delimiter is one or more spaces ' '. Any characters
+ * in <tag> and <search-terms> MAY be hex encoded with %NN where NN is
+ * the hexadecimal ASCII value of the character. Any ' ' and '%'
+ * characters in <tag> and <search-terms> MUST be hex encoded (using
+ * %20 and %25, respectively). Any characters that are not part of
+ * <tag> or <search-terms> MUST NOT be hex encoded.
+ *
+ * Leading and trailing space ' ' is ignored. Empty lines and lines
+ * beginning with '#' are ignored.
+ */
+static int
+tag_file (void *ctx, notmuch_database_t *notmuch, FILE *input,
+ notmuch_bool_t synchronize_flags)
+{
+ char *line = NULL;
+ size_t line_size;
+ ssize_t line_len;
+ tag_operation_t *tag_ops;
+ int tag_ops_array_size = 10;
+ int ret = 0;
+
+ /* Array of tagging operations (add or remove), terminated with an
+ * empty element. Size will be increased as necessary. */
+ tag_ops = talloc_array (ctx, tag_operation_t, tag_ops_array_size);
+ if (tag_ops == NULL) {
+ fprintf (stderr, "Out of memory.\n");
+ return 1;
+ }
+
+ while ((line_len = getline (&line, &line_size, input)) != -1 &&
+ !interrupted) {
+ char *tok;
+ size_t tok_len;
+ int tag_ops_count = 0;
+
+ chomp_newline (line);
+
+ tok = strtok_len (line, " ", &tok_len);
+
+ /* Skip empty and comment lines. */
+ if (tok == NULL || *tok == '#')
+ continue;
+
+ /* T for tagging is the only recognized action for now. */
+ if (strncmp (tok, "T", tok_len) != 0) {
+ fprintf (stderr, "Warning: Ignoring invalid input line: %s\n",
+ line);
+ continue;
+ }
+
+ /* Parse tags. */
+ while ((tok = strtok_len (tok + tok_len, " ", &tok_len)) != NULL) {
+ notmuch_bool_t remove;
+ char *tag;
+
+ /* Optional explicit end of tags marker. */
+ if (strncmp (tok, "--", tok_len) == 0) {
+ tok = strtok_len (tok + tok_len, " ", &tok_len);
+ break;
+ }
+
+ /* Implicit end of tags. */
+ if (*tok != '-' && *tok != '+')
+ break;
+
+ /* If tag is terminated by NUL, there's no query string. */
+ if (*(tok + tok_len) == '\0') {
+ tok = NULL;
+ break;
+ }
+
+ /* Terminate, and start next token after terminator. */
+ *(tok + tok_len++) = '\0';
+
+ remove = (*tok == '-');
+ tag = tok + 1;
+
+ /* Refuse empty tags. */
+ if (*tag == '\0') {
+ tok = NULL;
+ break;
+ }
+
+ /* Decode tag. */
+ if (hex_decode_inplace (tag) != HEX_SUCCESS) {
+ tok = NULL;
+ break;
+ }
+
+ tag_ops[tag_ops_count].tag = tag;
+ tag_ops[tag_ops_count].remove = remove;
+ tag_ops_count++;
+
+ /* Make room for terminating empty element and potential
+ * new tags, if necessary. This should be a fairly rare
+ * case, considering the initial array size. */
+ if (tag_ops_count == tag_ops_array_size) {
+ tag_ops_array_size *= 2;
+ tag_ops = talloc_realloc (ctx, tag_ops, tag_operation_t,
+ tag_ops_array_size);
+ if (tag_ops == NULL) {
+ fprintf (stderr, "Out of memory.\n");
+ return 1;
+ }
+ }
+ }
+
+ if (tok == NULL || tag_ops_count == 0) {
+ /* FIXME: line has been modified! */
+ fprintf (stderr, "Warning: Ignoring invalid input line: %s\n",
+ line);
+ continue;
+ }
+
+ tag_ops[tag_ops_count].tag = NULL;
+
+ /* tok now points to the query string */
+ if (hex_decode_inplace (tok) != HEX_SUCCESS) {
+ /* FIXME: line has been modified! */
+ fprintf (stderr, "Warning: Ignoring invalid input line: %s\n",
+ line);
+ continue;
+ }
+
+ ret = tag_query (ctx, notmuch, tok, tag_ops, synchronize_flags);
+ if (ret)
+ break;
+ }
+
+ if (line)
+ free (line);
+
+ return ret || interrupted;
+}
+
int
notmuch_tag_command (void *ctx, int argc, char *argv[])
{
- tag_operation_t *tag_ops;
+ tag_operation_t *tag_ops = NULL;
int tag_ops_count = 0;
- char *query_string;
+ char *query_string = NULL;
notmuch_config_t *config;
notmuch_database_t *notmuch;
struct sigaction action;
notmuch_bool_t synchronize_flags;
- int i;
+ notmuch_bool_t use_stdin = FALSE;
+ int i, opt_index;
int ret;
/* Setup our handler for SIGINT */
@@ -187,42 +352,56 @@ notmuch_tag_command (void *ctx, int argc, char *argv[])
action.sa_flags = SA_RESTART;
sigaction (SIGINT, &action, NULL);
- argc--; argv++; /* skip subcommand argument */
+ notmuch_opt_desc_t options[] = {
+ { NOTMUCH_OPT_BOOLEAN, &use_stdin, "stdin", 0, 0 },
+ { 0, 0, 0, 0, 0 }
+ };
- /* Array of tagging operations (add or remove), terminated with an
- * empty element. */
- tag_ops = talloc_array (ctx, tag_operation_t, argc + 1);
- if (tag_ops == NULL) {
- fprintf (stderr, "Out of memory.\n");
+ opt_index = parse_arguments (argc, argv, options, 1);
+ if (opt_index < 0)
return 1;
- }
- for (i = 0; i < argc; i++) {
- if (strcmp (argv[i], "--") == 0) {
- i++;
- break;
+ if (use_stdin) {
+ if (opt_index != argc) {
+ fprintf (stderr, "Can't specify both cmdline and stdin!\n");
+ return 1;
}
- if (argv[i][0] == '+' || argv[i][0] == '-') {
- tag_ops[tag_ops_count].tag = argv[i] + 1;
- tag_ops[tag_ops_count].remove = (argv[i][0] == '-');
- tag_ops_count++;
- } else {
- break;
+ } else {
+ /* Array of tagging operations (add or remove), terminated with an
+ * empty element. */
+ tag_ops = talloc_array (ctx, tag_operation_t, argc - opt_index + 1);
+ if (tag_ops == NULL) {
+ fprintf (stderr, "Out of memory.\n");
+ return 1;
}
- }
- tag_ops[tag_ops_count].tag = NULL;
+ for (i = opt_index; i < argc; i++) {
+ if (strcmp (argv[i], "--") == 0) {
+ i++;
+ break;
+ }
+ if (argv[i][0] == '+' || argv[i][0] == '-') {
+ tag_ops[tag_ops_count].tag = argv[i] + 1;
+ tag_ops[tag_ops_count].remove = (argv[i][0] == '-');
+ tag_ops_count++;
+ } else {
+ break;
+ }
+ }
- if (tag_ops_count == 0) {
- fprintf (stderr, "Error: 'notmuch tag' requires at least one tag to add or remove.\n");
- return 1;
- }
+ tag_ops[tag_ops_count].tag = NULL;
- query_string = query_string_from_args (ctx, argc - i, &argv[i]);
+ if (tag_ops_count == 0) {
+ fprintf (stderr, "Error: 'notmuch tag' requires at least one tag to add or remove.\n");
+ return 1;
+ }
- if (*query_string == '\0') {
- fprintf (stderr, "Error: notmuch tag requires at least one search term.\n");
- return 1;
+ query_string = query_string_from_args (ctx, argc - i, &argv[i]);
+
+ if (*query_string == '\0') {
+ fprintf (stderr, "Error: notmuch tag requires at least one search term.\n");
+ return 1;
+ }
}
config = notmuch_config_open (ctx, NULL, NULL);
@@ -236,7 +415,10 @@ notmuch_tag_command (void *ctx, int argc, char *argv[])
synchronize_flags = notmuch_config_get_maildir_synchronize_flags (config);
- ret = tag_query (ctx, notmuch, query_string, tag_ops, synchronize_flags);
+ if (use_stdin)
+ ret = tag_file (ctx, notmuch, stdin, synchronize_flags);
+ else
+ ret = tag_query (ctx, notmuch, query_string, tag_ops, synchronize_flags);
notmuch_database_close (notmuch);
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* Re: [PATCH 6/8] cli: add support for batch tagging operations to "notmuch tag"
2012-03-31 22:17 ` [PATCH 6/8] cli: add support for batch tagging operations to "notmuch tag" Jani Nikula
@ 2012-04-02 17:51 ` Jameson Graef Rollins
2012-04-02 19:46 ` Jani Nikula
2012-04-02 17:54 ` Jameson Graef Rollins
1 sibling, 1 reply; 28+ messages in thread
From: Jameson Graef Rollins @ 2012-04-02 17:51 UTC (permalink / raw)
To: Jani Nikula, notmuch
[-- Attachment #1: Type: text/plain, Size: 1227 bytes --]
On Sat, Mar 31 2012, Jani Nikula <jani@nikula.org> wrote:
> Add support for batch tagging operations through stdin to "notmuch
> tag". This can be enabled with the new --stdin command line option to
> "notmuch new". The input must consist of lines of the format:
>
> T +<tag>|-<tag> [...] [--] <search-terms>
Hey, Jani. I can understand why you're going for this form, since it
mimics the command line arguments for tag and you want to be able to tag
for arbitrary searches, but I must say that I find it unappealing that
this functionality is *so* similar to that of notmuch restore, but the
file format is totally different. Can't we unify all of this in a
better way?
This patch series seems to beg that we actually just unify the tag and
restore functions in to one thing. They're really just doing the same
thing. If we extended restore to accept a search-term instead of a
message id they would in fact be identical.
The more I think about it the more it makes sense to me that we just
merge tag and restore, and extend the input file format to be able to
accept search terms. It just doesn't make sense to have these two
interfaces that do basically the exact same thing but in a slightly
divergent way.
jamie.
[-- Attachment #2: Type: application/pgp-signature, Size: 835 bytes --]
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 6/8] cli: add support for batch tagging operations to "notmuch tag"
2012-04-02 17:51 ` Jameson Graef Rollins
@ 2012-04-02 19:46 ` Jani Nikula
0 siblings, 0 replies; 28+ messages in thread
From: Jani Nikula @ 2012-04-02 19:46 UTC (permalink / raw)
To: Jameson Graef Rollins, notmuch
Jameson Graef Rollins <jrollins@finestructure.net> writes:
> On Sat, Mar 31 2012, Jani Nikula <jani@nikula.org> wrote:
>> Add support for batch tagging operations through stdin to "notmuch
>> tag". This can be enabled with the new --stdin command line option to
>> "notmuch new". The input must consist of lines of the format:
>>
>> T +<tag>|-<tag> [...] [--] <search-terms>
>
> Hey, Jani. I can understand why you're going for this form, since it
> mimics the command line arguments for tag and you want to be able to tag
> for arbitrary searches, but I must say that I find it unappealing that
> this functionality is *so* similar to that of notmuch restore, but the
> file format is totally different. Can't we unify all of this in a
> better way?
Hi Jameson -
Thanks for your comments. The intent is to converge notmuch tag and
restore file formats, and reuse the code between them. The above is the
proposed format for both, but I see that I failed to mention the future
plans.
This actually started from David's dump/restore rework [1]. I wanted to
have batch tagging, and realized his proposed format wouldn't work for
that. There was some discussion about this on IRC, and we settled on the
above format as a starting point. And now would be the time to comment
on the format, if you have any issues with it! ;)
> This patch series seems to beg that we actually just unify the tag and
> restore functions in to one thing. They're really just doing the same
> thing. If we extended restore to accept a search-term instead of a
> message id they would in fact be identical.
>
> The more I think about it the more it makes sense to me that we just
> merge tag and restore, and extend the input file format to be able to
> accept search terms. It just doesn't make sense to have these two
> interfaces that do basically the exact same thing but in a slightly
> divergent way.
I totally agree on this goal. The dump/restore follow-up part is a
work-in-progress in my local tree. At least initially, the format would
be the same for tag and restore, but with the following subtle
differences:
1) notmuch restore will only accept an id:message-id style query to be
able to warn about messages present in the dump file but missing in
the database. This is because dump/restore is primarily a
backup/restore style operation.
2) Partly because of the above, and partly because of notmuch restore
--accumulate vs. not, the query/tagging optimizations will have to be
different.
The rough idea is that both notmuch tag and restore would use the same
file parsing (tag_file() introduced in this series), but notmuch tag
would use tag_query() in notmuch-tag.c for tagging, and notmuch restore
would use tag_message() in notmuch-restore.c for tagging.
> and btw, according to comments in the code 'T' is supposed to stand for
> the action, "tag" in this case. What other actions do you imagine?
This was actually David's idea. It allows extensibility in the format.
Thanks again for your interest, and sorry about not opening up the
future plans up front.
BR,
Jani.
[1] id:"1324214111-32079-1-git-send-email-david@tethera.net"
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 6/8] cli: add support for batch tagging operations to "notmuch tag"
2012-03-31 22:17 ` [PATCH 6/8] cli: add support for batch tagging operations to "notmuch tag" Jani Nikula
2012-04-02 17:51 ` Jameson Graef Rollins
@ 2012-04-02 17:54 ` Jameson Graef Rollins
2012-04-02 20:26 ` David Bremner
1 sibling, 1 reply; 28+ messages in thread
From: Jameson Graef Rollins @ 2012-04-02 17:54 UTC (permalink / raw)
To: Jani Nikula, notmuch
[-- Attachment #1: Type: text/plain, Size: 458 bytes --]
On Sat, Mar 31 2012, Jani Nikula <jani@nikula.org> wrote:
> Add support for batch tagging operations through stdin to "notmuch
> tag". This can be enabled with the new --stdin command line option to
> "notmuch new". The input must consist of lines of the format:
>
> T +<tag>|-<tag> [...] [--] <search-terms>
and btw, according to comments in the code 'T' is supposed to stand for
the action, "tag" in this case. What other actions do you imagine?
jamie.
[-- Attachment #2: Type: application/pgp-signature, Size: 835 bytes --]
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 6/8] cli: add support for batch tagging operations to "notmuch tag"
2012-04-02 17:54 ` Jameson Graef Rollins
@ 2012-04-02 20:26 ` David Bremner
2012-04-02 20:42 ` Jameson Graef Rollins
0 siblings, 1 reply; 28+ messages in thread
From: David Bremner @ 2012-04-02 20:26 UTC (permalink / raw)
To: Jameson Graef Rollins, Jani Nikula, notmuch
Jameson Graef Rollins <jrollins@finestructure.net> writes:
> On Sat, Mar 31 2012, Jani Nikula <jani@nikula.org> wrote:
>> Add support for batch tagging operations through stdin to "notmuch
>> tag". This can be enabled with the new --stdin command line option to
>> "notmuch new". The input must consist of lines of the format:
>>
>> T +<tag>|-<tag> [...] [--] <search-terms>
>
I think that's my fault. I was imagining a possible future line-oriented
notmuch server and having various actions/queries possible. It seems a
bit blue sky at this point, but it does give extesibility fairly
cheaply.
d
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 6/8] cli: add support for batch tagging operations to "notmuch tag"
2012-04-02 20:26 ` David Bremner
@ 2012-04-02 20:42 ` Jameson Graef Rollins
2012-04-02 21:07 ` Jani Nikula
0 siblings, 1 reply; 28+ messages in thread
From: Jameson Graef Rollins @ 2012-04-02 20:42 UTC (permalink / raw)
To: David Bremner, Jani Nikula, notmuch
[-- Attachment #1: Type: text/plain, Size: 939 bytes --]
On Mon, Apr 02 2012, David Bremner <david@tethera.net> wrote:
> Jameson Graef Rollins <jrollins@finestructure.net> writes:
>> On Sat, Mar 31 2012, Jani Nikula <jani@nikula.org> wrote:
>>> Add support for batch tagging operations through stdin to "notmuch
>>> tag". This can be enabled with the new --stdin command line option to
>>> "notmuch new". The input must consist of lines of the format:
>>>
>>> T +<tag>|-<tag> [...] [--] <search-terms>
>>
>
> I think that's my fault. I was imagining a possible future line-oriented
> notmuch server and having various actions/queries possible. It seems a
> bit blue sky at this point, but it does give extesibility fairly
> cheaply.
But then why not just make the command explicit, and just have the first
field be "tag"?
But then I wonder why do we even need any of this at all? Isn't it this
just exactly equivalent to:
xargs -l notmuch <commands.txt
??
jamie.
[-- Attachment #2: Type: application/pgp-signature, Size: 835 bytes --]
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 6/8] cli: add support for batch tagging operations to "notmuch tag"
2012-04-02 20:42 ` Jameson Graef Rollins
@ 2012-04-02 21:07 ` Jani Nikula
2012-04-02 21:20 ` Jameson Graef Rollins
0 siblings, 1 reply; 28+ messages in thread
From: Jani Nikula @ 2012-04-02 21:07 UTC (permalink / raw)
To: Jameson Graef Rollins; +Cc: Notmuch Mail
[-- Attachment #1: Type: text/plain, Size: 1238 bytes --]
On Apr 2, 2012 11:42 PM, "Jameson Graef Rollins" <jrollins@finestructure.net>
wrote:
>
> On Mon, Apr 02 2012, David Bremner <david@tethera.net> wrote:
> > Jameson Graef Rollins <jrollins@finestructure.net> writes:
> >> On Sat, Mar 31 2012, Jani Nikula <jani@nikula.org> wrote:
> >>> Add support for batch tagging operations through stdin to "notmuch
> >>> tag". This can be enabled with the new --stdin command line option to
> >>> "notmuch new". The input must consist of lines of the format:
> >>>
> >>> T +<tag>|-<tag> [...] [--] <search-terms>
> >>
> >
> > I think that's my fault. I was imagining a possible future line-oriented
> > notmuch server and having various actions/queries possible. It seems a
> > bit blue sky at this point, but it does give extesibility fairly
> > cheaply.
>
> But then why not just make the command explicit, and just have the first
> field be "tag"?
>
> But then I wonder why do we even need any of this at all? Isn't it this
> just exactly equivalent to:
>
> xargs -l notmuch <commands.txt
>
> ??
Batch tagging brings performance and atomicity by opening and closing the
db only once. The hex encoding handles insane message ids and tags.
Otherwise there should be no difference.
Jani.
>
> jamie.
[-- Attachment #2: Type: text/html, Size: 1831 bytes --]
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 6/8] cli: add support for batch tagging operations to "notmuch tag"
2012-04-02 21:07 ` Jani Nikula
@ 2012-04-02 21:20 ` Jameson Graef Rollins
2012-04-02 21:31 ` Jani Nikula
0 siblings, 1 reply; 28+ messages in thread
From: Jameson Graef Rollins @ 2012-04-02 21:20 UTC (permalink / raw)
To: Jani Nikula; +Cc: Notmuch Mail
[-- Attachment #1: Type: text/plain, Size: 549 bytes --]
On Mon, Apr 02 2012, Jani Nikula <jani@nikula.org> wrote:
> Batch tagging brings performance and atomicity by opening and closing the
> db only once. The hex encoding handles insane message ids and tags.
> Otherwise there should be no difference.
I can maybe see performance in terms of opening and closing the db only
once, but atomicity? Not sure I see how that is improved.
And shouldn't we be able to improve the handling of command line
arguments so that we get the same encoding handling on the command line
as we would from stdin?
jamie.
[-- Attachment #2: Type: application/pgp-signature, Size: 835 bytes --]
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 6/8] cli: add support for batch tagging operations to "notmuch tag"
2012-04-02 21:20 ` Jameson Graef Rollins
@ 2012-04-02 21:31 ` Jani Nikula
2012-04-02 21:43 ` Jameson Graef Rollins
0 siblings, 1 reply; 28+ messages in thread
From: Jani Nikula @ 2012-04-02 21:31 UTC (permalink / raw)
To: Jameson Graef Rollins; +Cc: Notmuch Mail
[-- Attachment #1: Type: text/plain, Size: 762 bytes --]
On Apr 3, 2012 12:20 AM, "Jameson Graef Rollins" <jrollins@finestructure.net>
wrote:
>
> On Mon, Apr 02 2012, Jani Nikula <jani@nikula.org> wrote:
> > Batch tagging brings performance and atomicity by opening and closing
the
> > db only once. The hex encoding handles insane message ids and tags.
> > Otherwise there should be no difference.
>
> I can maybe see performance in terms of opening and closing the db only
> once, but atomicity? Not sure I see how that is improved.
Err, I meant locking, sorry. Nobody else is able to alter the db while
batch tagging is in progress.
>
> And shouldn't we be able to improve the handling of command line
> arguments so that we get the same encoding handling on the command line
> as we would from stdin?
>
> jamie.
[-- Attachment #2: Type: text/html, Size: 1024 bytes --]
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 6/8] cli: add support for batch tagging operations to "notmuch tag"
2012-04-02 21:31 ` Jani Nikula
@ 2012-04-02 21:43 ` Jameson Graef Rollins
2012-04-03 8:21 ` Jani Nikula
0 siblings, 1 reply; 28+ messages in thread
From: Jameson Graef Rollins @ 2012-04-02 21:43 UTC (permalink / raw)
To: Jani Nikula; +Cc: Notmuch Mail
[-- Attachment #1: Type: text/plain, Size: 512 bytes --]
So what if we instead modified the top level binary ("notmuch") to:
* add an option to specify that commands are to be processed from stdin
(--batch)
* when in batch mode the db is opened once at the beginning and locked
* commands are processed from stdin in the exact same form they are
specified on the command line ("tag +foo -- from:bar", "search
tag:foo", etc.).
* db is closed when EOF is reached.
That seems like it would be a generally much cleaner interface, and much
more flexible.
jamie.
[-- Attachment #2: Type: application/pgp-signature, Size: 835 bytes --]
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 6/8] cli: add support for batch tagging operations to "notmuch tag"
2012-04-02 21:43 ` Jameson Graef Rollins
@ 2012-04-03 8:21 ` Jani Nikula
2012-04-03 22:51 ` Jameson Graef Rollins
0 siblings, 1 reply; 28+ messages in thread
From: Jani Nikula @ 2012-04-03 8:21 UTC (permalink / raw)
To: Jameson Graef Rollins; +Cc: Notmuch Mail
On Mon, 02 Apr 2012 14:43:16 -0700, Jameson Graef Rollins <jrollins@finestructure.net> wrote:
> So what if we instead modified the top level binary ("notmuch") to:
>
> * add an option to specify that commands are to be processed from stdin
> (--batch)
>
> * when in batch mode the db is opened once at the beginning and locked
>
> * commands are processed from stdin in the exact same form they are
> specified on the command line ("tag +foo -- from:bar", "search
> tag:foo", etc.).
>
> * db is closed when EOF is reached.
>
> That seems like it would be a generally much cleaner interface, and much
> more flexible.
Yes, it's more flexible, but what are the real benefits of such
flexibility? What other commands than tag/restore would truly benefit
from this? I might add that nobody has asked for such flexibility.
As to the cleanliness of the interface, all is well as long as no
characters needing escaping are used. With them, things get hairy. On
the command line, the shell provides the necessary escaping and quoting
mechanisms, and parsing to subcommand argv. I think it would be
confusing to require %NN style hex escaping on the command line where
shell mechanisms can be used, but also it would be a lot of work to
duplicate the shell escaping and quoting mechanisms to the stdin
parsing. For simplicity, I'd use hex escaping for stdin, and leave the
command line to the shell.
Finally, I'm not sure how the config and database open/close could be
cleanly duplicated in top level "notmuch" and the subcommands. Should
there be logic in the subcommands not to open the config/database if
they're already opened in top level? All of notmuch cli should also be
reworked to pass the config and database on to the subcommands. Not all
the commands need to open the database in read/write mode, and some
commands don't need to open the database at all, so it would not make
sense to move config/database open/close to top level completely.
In summary, I don't see enough value in this to put all the work
required into it. I think it's way more trouble than it's worth.
BR,
Jani.
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 6/8] cli: add support for batch tagging operations to "notmuch tag"
2012-04-03 8:21 ` Jani Nikula
@ 2012-04-03 22:51 ` Jameson Graef Rollins
2012-04-04 2:09 ` David Bremner
0 siblings, 1 reply; 28+ messages in thread
From: Jameson Graef Rollins @ 2012-04-03 22:51 UTC (permalink / raw)
To: Jani Nikula; +Cc: Notmuch Mail
[-- Attachment #1: Type: text/plain, Size: 1100 bytes --]
On Tue, Apr 03 2012, Jani Nikula <jani@nikula.org> wrote:
> Yes, it's more flexible, but what are the real benefits of such
> flexibility? What other commands than tag/restore would truly benefit
> from this? I might add that nobody has asked for such flexibility.
But I do believe it was your patch that introduced the idea of batch
processing of other commands to begin with. I think you're just turning
my original question to you back on me. There's no point in adding and
initial flag to the beginning of the line to specify the command to
execute if you have no intention of ever supporting other commands. Or
vice versa, if you ultimately want to support multiple commands then the
place to do it is in the top level interface, and not in one of the
subcommands.
Ultimately, I think this patch series suffers from being simultaneously
too close to existing functionality (restore), yet tantalizingly close
to a much more generally useful concept of batch command processing. I
would rather see it go one way or the other rather than add something
fairly redundant in the middle.
jamie.
[-- Attachment #2: Type: application/pgp-signature, Size: 835 bytes --]
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 6/8] cli: add support for batch tagging operations to "notmuch tag"
2012-04-03 22:51 ` Jameson Graef Rollins
@ 2012-04-04 2:09 ` David Bremner
2012-04-04 7:55 ` Jameson Graef Rollins
0 siblings, 1 reply; 28+ messages in thread
From: David Bremner @ 2012-04-04 2:09 UTC (permalink / raw)
To: Jameson Graef Rollins, Jani Nikula; +Cc: Notmuch Mail
Jameson Graef Rollins <jrollins@finestructure.net> writes:
> There's no point in adding and initial flag to the beginning of the
> line to specify the command to execute if you have no intention of
> ever supporting other commands.
My thinking was that it was useful for the disk format to have a bit
more information in it so that we could more easily change the interface
in an upwardly compatible way. If at some point in the future we do have
more general batch command processing, it would be nice not have to
change the file format again, particularly for dump files.
> Ultimately, I think this patch series suffers from being
> simultaneously too close to existing functionality (restore),
Well, as Jani mentioned there is a fair amount of code overlap with an
in-progress improved dump/restore facility.
> yet tantalizingly close to a much more generally useful concept of
> batch command processing.
Well, opinions seem to differ about how close we are to that in terms of
real code, but I think it is worth thinking about as we define file
formats.
d
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 6/8] cli: add support for batch tagging operations to "notmuch tag"
2012-04-04 2:09 ` David Bremner
@ 2012-04-04 7:55 ` Jameson Graef Rollins
2012-04-04 10:55 ` David Bremner
0 siblings, 1 reply; 28+ messages in thread
From: Jameson Graef Rollins @ 2012-04-04 7:55 UTC (permalink / raw)
To: David Bremner, Jani Nikula; +Cc: Notmuch Mail
[-- Attachment #1: Type: text/plain, Size: 1197 bytes --]
On Tue, Apr 03 2012, David Bremner <david@tethera.net> wrote:
> My thinking was that it was useful for the disk format to have a bit
> more information in it so that we could more easily change the interface
> in an upwardly compatible way. If at some point in the future we do have
> more general batch command processing, it would be nice not have to
> change the file format again, particularly for dump files.
I concede that it's possible to move forward with this idea in a way
that satisfies an immediate need while still being flexible going
forward.
With that in mind, I think I stand by my suggestion that the form should
match exactly the notmuch subcommand format. Even considering the
technical issues that Jani brought up, I still think it makes the most
sense to imagine generic batch processing handled by the top level
binary. And in that case the most logical format for the input is
probably just that of the CLI arguments.
Just out of curiosity and for the sake of argument, if we were going to
design a server/batch processor from the ground up would it make sense
to use a format like this, or would we better off opting for some other
more established protocol?
jamie.
[-- Attachment #2: Type: application/pgp-signature, Size: 835 bytes --]
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 6/8] cli: add support for batch tagging operations to "notmuch tag"
2012-04-04 7:55 ` Jameson Graef Rollins
@ 2012-04-04 10:55 ` David Bremner
0 siblings, 0 replies; 28+ messages in thread
From: David Bremner @ 2012-04-04 10:55 UTC (permalink / raw)
To: Jameson Graef Rollins, Jani Nikula; +Cc: Notmuch Mail
Jameson Graef Rollins <jrollins@finestructure.net> writes:
> With that in mind, I think I stand by my suggestion that the form should
> match exactly the notmuch subcommand format. Even considering the
> technical issues that Jani brought up, I still think it makes the most
> sense to imagine generic batch processing handled by the top level
> binary. And in that case the most logical format for the input is
> probably just that of the CLI arguments.
One thing that worries me about this (and to be honest it worries me a
bit about the single character command tag) is the potential increase in
size of a dump file, if we use exactly a list of commands as a dump
format. The SQL/XML-like argument that it will all compress well is
true; nontheless for applications involving version control, it does
seem useful to have an uncompressed version around.
A very rough estimate suggests for my about 250k messages, appending
"tag " to the front of each line bloats a dump file by about 5%. Maybe
that is not worth worrying about. I'd be curious to see how 4 * #lines /
(total dump size) works out for other people. I thought that the bloat
from having + in front of every tag would be larger, but it seems that
my messages average something like one tag per message (many messages
with no tags). I'm not sure how universal that is.
We could also give up on marking the command on each line, and insert
some kind of simple header at the top. This idea came up in the context
of restore formats before.
> Just out of curiosity and for the sake of argument, if we were going to
> design a server/batch processor from the ground up would it make sense
> to use a format like this, or would we better off opting for some other
> more established protocol?
I guess it depends how much work it is to support the established
protocol, and how good the fit is with notmuch. Are there candidates
other than IMAP?
As far as implementation effort, as a totally unscientific experiment, I
grabbed Net::IMAP::Server from CPAN, it is almost 7000 lines of
perl. I'm not suggesting we use Perl ;), but I doubt C is
shorter. Hopefully we wouldn't write such a library from scratch. A
quick search did not lead me to "the canonical imap server library",
unless that is the UW one, which I have bad, if non-specific memories
about.
I think we'd need to use a fair number of extensions to basic IMAP. What
might work well is the GMail extensions to IMAP. I have no idea about
the difficulty of implementing those; I suspect there are not solid C
libraries supporting them.
d
^ permalink raw reply [flat|nested] 28+ messages in thread
* [PATCH 7/8] test: add test for notmuch tag --stdin option
2012-03-31 22:17 [PATCH 0/8] batch tagging support: "notmuch tag --stdin" Jani Nikula
` (5 preceding siblings ...)
2012-03-31 22:17 ` [PATCH 6/8] cli: add support for batch tagging operations to "notmuch tag" Jani Nikula
@ 2012-03-31 22:17 ` Jani Nikula
2012-03-31 22:17 ` [PATCH 8/8] man: document " Jani Nikula
7 siblings, 0 replies; 28+ messages in thread
From: Jani Nikula @ 2012-03-31 22:17 UTC (permalink / raw)
To: notmuch
---
test/tagging | 13 +++++++++++++
1 files changed, 13 insertions(+), 0 deletions(-)
diff --git a/test/tagging b/test/tagging
index e4782ed..15eb42d 100755
--- a/test/tagging
+++ b/test/tagging
@@ -46,4 +46,17 @@ test_expect_equal "$output" "\
thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; One (:\" inbox tag1 unread)
thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Two (inbox tag1 tag4 unread)"
+test_begin_subtest "Batch stdin"
+notmuch tag --stdin <<EOF
+# %20 is a space in tag
+T -:"%20 -tag1 +tag5 +tag6 -- One
+T +tag1 -tag1 -tag4 +tag4 -- Two
+T -tag6 One
+T +tag5 Two
+EOF
+output=$(notmuch search \* | notmuch_search_sanitize)
+test_expect_equal "$output" "\
+thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; One (inbox tag5 unread)
+thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Two (inbox tag4 tag5 unread)"
+
test_done
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 8/8] man: document notmuch tag --stdin option
2012-03-31 22:17 [PATCH 0/8] batch tagging support: "notmuch tag --stdin" Jani Nikula
` (6 preceding siblings ...)
2012-03-31 22:17 ` [PATCH 7/8] test: add test for notmuch tag --stdin option Jani Nikula
@ 2012-03-31 22:17 ` Jani Nikula
7 siblings, 0 replies; 28+ messages in thread
From: Jani Nikula @ 2012-03-31 22:17 UTC (permalink / raw)
To: notmuch
---
man/man1/notmuch-tag.1 | 44 +++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 43 insertions(+), 1 deletions(-)
diff --git a/man/man1/notmuch-tag.1 b/man/man1/notmuch-tag.1
index aa4546e..30693cf 100644
--- a/man/man1/notmuch-tag.1
+++ b/man/man1/notmuch-tag.1
@@ -4,7 +4,11 @@ notmuch-tag \- Add/remove tags for all messages matching the search terms.
.SH SYNOPSIS
.B notmuch tag
-.RI "+<" tag> "|\-<" tag "> [...] [\-\-] <" search-term ">..."
+.RI "+<" tag ">|\-<" tag "> [...] [\-\-] <" search-terms ">"
+
+.B notmuch tag
+.RI "--stdin"
+
.SH DESCRIPTION
@@ -29,6 +33,44 @@ updates the maildir flags according to tag changes if the
configuration option is enabled. See \fBnotmuch-config\fR(1) for
details.
+Supported options for
+.B tag
+include
+.RS 4
+.TP 4
+.BR \-\-stdin
+
+Read batch tagging operations from standard input. This is more
+efficient than repeated
+.B notmuch tag
+invocations. See
+.B TAG FILE FORMAT
+below for the input format. This option is not compatible with
+specifying tagging on the command line.
+.RE
+
+.SH TAG FILE FORMAT
+
+The input must consist of lines of the format:
+
+.RI "T +<" tag ">|\-<" tag "> [...] [\-\-] <" search-terms ">"
+
+Each line is interpreted similarly to
+.B notmuch tag
+command line arguments. The delimiter is one or more spaces ' '. Any
+characters in <tag> and <search-terms>
+.B may
+be hex encoded with %NN where NN is the hexadecimal value of the
+character. Any ' ' and '%' characters in <tag> and <search-terms>
+.B must
+be hex encoded (using %20 and %25, respectively). Any characters that
+are not part of <tag> or <search-terms>
+.B must not
+be hex encoded.
+
+Leading and trailing space ' ' is ignored. Empty lines and lines
+beginning with '#' are ignored.
+
.SH SEE ALSO
\fBnotmuch\fR(1), \fBnotmuch-config\fR(1), \fBnotmuch-count\fR(1),
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread