* [patch v3 1/6] hex-escape: (en|de)code strings to/from restricted character set
2012-12-02 13:33 v3 of testing for new tagging/dump/restore david
@ 2012-12-02 13:33 ` david
2012-12-02 13:33 ` [patch v3 2/6] test/hex-xcode: new test binary david
` (6 subsequent siblings)
7 siblings, 0 replies; 14+ messages in thread
From: david @ 2012-12-02 13:33 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
From: David Bremner <bremner@debian.org>
The character set is chosen to be suitable for pathnames, and the same
as that used by contrib/nmbug
[With additions by Jani Nikula]
---
util/Makefile.local | 2 +-
util/hex-escape.c | 161 +++++++++++++++++++++++++++++++++++++++++++++++++++
util/hex-escape.h | 41 +++++++++++++
3 files changed, 203 insertions(+), 1 deletion(-)
create mode 100644 util/hex-escape.c
create mode 100644 util/hex-escape.h
diff --git a/util/Makefile.local b/util/Makefile.local
index c7cae61..3ca623e 100644
--- a/util/Makefile.local
+++ b/util/Makefile.local
@@ -3,7 +3,7 @@
dir := util
extra_cflags += -I$(srcdir)/$(dir)
-libutil_c_srcs := $(dir)/xutil.c $(dir)/error_util.c
+libutil_c_srcs := $(dir)/xutil.c $(dir)/error_util.c $(dir)/hex-escape.c
libutil_modules := $(libutil_c_srcs:.c=.o)
diff --git a/util/hex-escape.c b/util/hex-escape.c
new file mode 100644
index 0000000..b7e2e07
--- /dev/null
+++ b/util/hex-escape.c
@@ -0,0 +1,161 @@
+/* hex-escape.c - Manage encoding and decoding of byte strings into path names
+ *
+ * Copyright (c) 2011 David Bremner
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/ .
+ *
+ * Author: David Bremner <david@tethera.net>
+ */
+
+#include <assert.h>
+#include <string.h>
+#include <talloc.h>
+#include <ctype.h>
+#include "error_util.h"
+#include "hex-escape.h"
+
+static const size_t default_buf_size = 1024;
+
+static const char *output_charset =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-_@=.,";
+
+static const char escape_char = '%';
+
+static int
+is_output (char c)
+{
+ return (strchr (output_charset, c) != NULL);
+}
+
+static int
+maybe_realloc (void *ctx, size_t needed, char **out, size_t *out_size)
+{
+ if (*out_size < needed) {
+
+ if (*out == NULL)
+ *out = talloc_size (ctx, needed);
+ else
+ *out = talloc_realloc (ctx, *out, char, needed);
+
+ if (*out == NULL)
+ return 0;
+
+ *out_size = needed;
+ }
+ return 1;
+}
+
+hex_status_t
+hex_encode (void *ctx, const char *in, char **out, size_t *out_size)
+{
+
+ const char *p;
+ char *q;
+
+ size_t needed = 1; /* for the NUL */
+
+ assert (ctx); assert (in); assert (out); assert (out_size);
+
+ for (p = in; *p; p++) {
+ needed += is_output (*p) ? 1 : 3;
+ }
+
+ if (*out == NULL)
+ *out_size = 0;
+
+ if (!maybe_realloc (ctx, needed, out, out_size))
+ return HEX_OUT_OF_MEMORY;
+
+ q = *out;
+ p = in;
+
+ while (*p) {
+ if (is_output (*p)) {
+ *q++ = *p++;
+ } else {
+ sprintf (q, "%%%02x", (unsigned char)*p++);
+ q += 3;
+ }
+ }
+
+ *q = '\0';
+ return HEX_SUCCESS;
+}
+
+/* Hex decode 'in' to 'out'.
+ *
+ * This must succeed for in == out to support hex_decode_inplace().
+ */
+static hex_status_t
+hex_decode_internal (const char *in, unsigned char *out)
+{
+ char buf[3];
+
+ while (*in) {
+ if (*in == escape_char) {
+ char *endp;
+
+ /* This also handles unexpected end-of-string. */
+ if (!isxdigit ((unsigned char) in[1]) ||
+ !isxdigit ((unsigned char) in[2]))
+ return HEX_SYNTAX_ERROR;
+
+ buf[0] = in[1];
+ buf[1] = in[2];
+ buf[2] = '\0';
+
+ *out = strtoul (buf, &endp, 16);
+
+ if (endp != buf + 2)
+ return HEX_SYNTAX_ERROR;
+
+ in += 3;
+ out++;
+ } else {
+ *out++ = *in++;
+ }
+ }
+
+ *out = '\0';
+
+ return HEX_SUCCESS;
+}
+
+hex_status_t
+hex_decode_inplace (char *s)
+{
+ /* A decoded string is never longer than the encoded one, so it is
+ * safe to decode a string onto itself. */
+ return hex_decode_internal (s, (unsigned char *) s);
+}
+
+hex_status_t
+hex_decode (void *ctx, const char *in, char **out, size_t * out_size)
+{
+ const char *p;
+ size_t needed = 1; /* for the NUL */
+
+ assert (ctx); assert (in); assert (out); assert (out_size);
+
+ for (p = in; *p; p++)
+ if ((p[0] == escape_char) && isxdigit (p[1]) && isxdigit (p[2]))
+ needed -= 1;
+ else
+ needed += 1;
+
+ if (!maybe_realloc (ctx, needed, out, out_size))
+ return HEX_OUT_OF_MEMORY;
+
+ return hex_decode_internal (in, (unsigned char *) *out);
+}
diff --git a/util/hex-escape.h b/util/hex-escape.h
new file mode 100644
index 0000000..5182042
--- /dev/null
+++ b/util/hex-escape.h
@@ -0,0 +1,41 @@
+#ifndef _HEX_ESCAPE_H
+#define _HEX_ESCAPE_H
+
+typedef enum hex_status {
+ HEX_SUCCESS = 0,
+ HEX_SYNTAX_ERROR,
+ HEX_OUT_OF_MEMORY
+} hex_status_t;
+
+/*
+ * The API for hex_encode() and hex_decode() is modelled on that for
+ * getline.
+ *
+ * If 'out' points to a NULL pointer a char array of the appropriate
+ * size is allocated using talloc, and out_size is updated.
+ *
+ * If 'out' points to a non-NULL pointer, it assumed to describe an
+ * existing char array, with the size given in *out_size. This array
+ * may be resized by talloc_realloc if needed; in this case *out_size
+ * will also be updated.
+ *
+ * Note that it is an error to pass a NULL pointer for any parameter
+ * of these routines.
+ */
+
+hex_status_t
+hex_encode (void *talloc_ctx, const char *in, char **out,
+ size_t *out_size);
+
+hex_status_t
+hex_decode (void *talloc_ctx, const char *in, char **out,
+ size_t *out_size);
+
+/*
+ * Non-allocating hex decode to decode 's' in-place. The length of the
+ * result is always equal to or shorter than the length of the
+ * original.
+ */
+hex_status_t
+hex_decode_inplace (char *s);
+#endif
--
1.7.10.4
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [patch v3 2/6] test/hex-xcode: new test binary
2012-12-02 13:33 v3 of testing for new tagging/dump/restore david
2012-12-02 13:33 ` [patch v3 1/6] hex-escape: (en|de)code strings to/from restricted character set david
@ 2012-12-02 13:33 ` david
2012-12-02 13:33 ` [patch v3 3/6] test/hex-escaping: new test for hex escaping routines david
` (5 subsequent siblings)
7 siblings, 0 replies; 14+ messages in thread
From: david @ 2012-12-02 13:33 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
From: David Bremner <bremner@debian.org>
This program is used both as a test-bed/unit-tester for
../util/hex-escape.c, and also as a utility in future tests of dump
and restore.
---
test/.gitignore | 1 +
test/Makefile.local | 13 +++++-
test/basic | 1 +
test/hex-xcode.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 122 insertions(+), 2 deletions(-)
create mode 100644 test/hex-xcode.c
diff --git a/test/.gitignore b/test/.gitignore
index e63c689..be7ab5e 100644
--- a/test/.gitignore
+++ b/test/.gitignore
@@ -3,4 +3,5 @@ corpus.mail
smtp-dummy
symbol-test
arg-test
+hex-xcode
tmp.*
diff --git a/test/Makefile.local b/test/Makefile.local
index 9ae130a..8da4c56 100644
--- a/test/Makefile.local
+++ b/test/Makefile.local
@@ -13,6 +13,9 @@ smtp_dummy_modules = $(smtp_dummy_srcs:.c=.o)
$(dir)/arg-test: $(dir)/arg-test.o command-line-arguments.o util/libutil.a
$(call quiet,CC) -I. $^ -o $@
+$(dir)/hex-xcode: $(dir)/hex-xcode.o command-line-arguments.o util/libutil.a
+ $(call quiet,CC) -I. $^ -o $@ -ltalloc
+
$(dir)/smtp-dummy: $(smtp_dummy_modules)
$(call quiet,CC) $^ -o $@
@@ -24,8 +27,13 @@ $(dir)/parse-time: $(dir)/parse-time.o parse-time-string/parse-time-string.o
.PHONY: test check
-test-binaries: $(dir)/arg-test $(dir)/smtp-dummy $(dir)/symbol-test \
- $(dir)/parse-time
+TEST_BINARIES=$(dir)/arg-test \
+ $(dir)/hex-xcode \
+ $(dir)/parse-time \
+ $(dir)/smtp-dummy \
+ $(dir)/symbol-test
+
+test-binaries: $(TEST_BINARIES)
test: all test-binaries
@${dir}/notmuch-test $(OPTIONS)
@@ -36,5 +44,6 @@ SRCS := $(SRCS) $(smtp_dummy_srcs)
CLEAN := $(CLEAN) $(dir)/smtp-dummy $(dir)/smtp-dummy.o \
$(dir)/symbol-test $(dir)/symbol-test.o \
$(dir)/arg-test $(dir)/arg-test.o \
+ $(dir)/hex-xcode $(dir)/hex-xcode.o \
$(dir)/parse-time $(dir)/parse-time.o \
$(dir)/corpus.mail $(dir)/test-results $(dir)/tmp.*
diff --git a/test/basic b/test/basic
index b7feb07..c448ef8 100755
--- a/test/basic
+++ b/test/basic
@@ -56,6 +56,7 @@ tests_in_suite=$(for i in $TESTS; do echo $i; done | sort)
available=$(find "$TEST_DIRECTORY" -maxdepth 1 -type f -perm +111 \
! -name aggregate-results.sh \
! -name arg-test \
+ ! -name hex-xcode \
! -name notmuch-test \
! -name parse-time \
! -name smtp-dummy \
diff --git a/test/hex-xcode.c b/test/hex-xcode.c
new file mode 100644
index 0000000..65d4956
--- /dev/null
+++ b/test/hex-xcode.c
@@ -0,0 +1,109 @@
+/* No, nothing to to with IDE from Apple Inc.
+ * testbed for ../util/hex-escape.c.
+ *
+ * usage:
+ * hex-xcode [--direction=(encode|decode)] [--omit-newline] < file
+ * hex-xcode [--direction=(encode|decode)] [--omit-newline] [--in-place] arg1 arg2 arg3 ...
+ *
+ */
+
+#include "notmuch-client.h"
+#include "hex-escape.h"
+#include <assert.h>
+
+enum direction {
+ ENCODE,
+ DECODE
+};
+
+static int inplace = FALSE;
+
+static int
+xcode (void *ctx, enum direction dir, char *in, char **buf_p, size_t *size_p)
+{
+ hex_status_t status;
+
+ if (dir == ENCODE)
+ status = hex_encode (ctx, in, buf_p, size_p);
+ else
+ if (inplace) {
+ status = hex_decode_inplace (in);
+ *buf_p = in;
+ *size_p = strlen(in);
+ } else {
+ status = hex_decode (ctx, in, buf_p, size_p);
+ }
+
+ if (status == HEX_SUCCESS)
+ fputs (*buf_p, stdout);
+
+ return status;
+}
+
+int
+main (int argc, char **argv)
+{
+
+ enum direction dir = DECODE;
+ int omit_newline = FALSE;
+
+ notmuch_opt_desc_t options[] = {
+ { NOTMUCH_OPT_KEYWORD, &dir, "direction", 'd',
+ (notmuch_keyword_t []){ { "encode", ENCODE },
+ { "decode", DECODE },
+ { 0, 0 } } },
+ { NOTMUCH_OPT_BOOLEAN, &omit_newline, "omit-newline", 'n', 0 },
+ { NOTMUCH_OPT_BOOLEAN, &inplace, "in-place", 'i', 0 },
+ { 0, 0, 0, 0, 0 }
+ };
+
+ int opt_index = parse_arguments (argc, argv, options, 1);
+
+ if (opt_index < 0)
+ exit (1);
+
+ void *ctx = talloc_new (NULL);
+
+ char *line = NULL;
+ size_t line_size;
+ ssize_t line_len;
+
+ char *buffer = NULL;
+ size_t buf_size = 0;
+
+ notmuch_bool_t read_stdin = TRUE;
+
+ for (; opt_index < argc; opt_index++) {
+
+ if (xcode (ctx, dir, argv[opt_index],
+ &buffer, &buf_size) != HEX_SUCCESS)
+ return 1;
+
+ if (! omit_newline)
+ putchar ('\n');
+
+ read_stdin = FALSE;
+ }
+
+ if (! read_stdin)
+ return 0;
+
+ while ((line_len = getline (&line, &line_size, stdin)) != -1) {
+
+ chomp_newline (line);
+
+ if (xcode (ctx, dir, line, &buffer, &buf_size) != HEX_SUCCESS)
+ return 1;
+
+ if (! omit_newline)
+ putchar ('\n');
+
+ }
+
+ if (line)
+ free (line);
+
+ talloc_free (ctx);
+
+ return 0;
+}
--
1.7.10.4
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [patch v3 3/6] test/hex-escaping: new test for hex escaping routines
2012-12-02 13:33 v3 of testing for new tagging/dump/restore david
2012-12-02 13:33 ` [patch v3 1/6] hex-escape: (en|de)code strings to/from restricted character set david
2012-12-02 13:33 ` [patch v3 2/6] test/hex-xcode: new test binary david
@ 2012-12-02 13:33 ` david
2012-12-02 15:29 ` Jani Nikula
2012-12-02 13:33 ` [patch v3 4/6] test: add database routines for testing david
` (4 subsequent siblings)
7 siblings, 1 reply; 14+ messages in thread
From: david @ 2012-12-02 13:33 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
From: David Bremner <bremner@debian.org>
These are more like unit tests, to (try to) make sure the library
functionality is working before building more complicated things on
top of it.
---
test/hex-escaping | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
test/notmuch-test | 1 +
2 files changed, 51 insertions(+)
create mode 100755 test/hex-escaping
diff --git a/test/hex-escaping b/test/hex-escaping
new file mode 100755
index 0000000..3f107dd
--- /dev/null
+++ b/test/hex-escaping
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+test_description="hex encoding and decoding"
+. ./test-lib.sh
+
+test_begin_subtest "round trip"
+find $TEST_DIRECTORY/corpus -type f -print | sort | xargs cat > EXPECTED
+$TEST_DIRECTORY/hex-xcode --direction=encode < EXPECTED | $TEST_DIRECTORY/hex-xcode --direction=decode > OUTPUT
+test_expect_equal_file OUTPUT EXPECTED
+
+test_begin_subtest "punctuation"
+tag1='comic_swear=$&^%$^%\\//-+$^%$'
+tag_enc1=$($TEST_DIRECTORY/hex-xcode --direction=encode "$tag1")
+test_expect_equal "$tag_enc1" "comic_swear=%24%26%5e%25%24%5e%25%5c%5c%2f%2f-+%24%5e%25%24"
+
+test_begin_subtest "round trip newlines"
+printf 'this\n tag\t has\n spaces\n' > EXPECTED.$test_count
+$TEST_DIRECTORY/hex-xcode --direction=encode < EXPECTED.$test_count |\
+ $TEST_DIRECTORY/hex-xcode --direction=decode > OUTPUT.$test_count
+test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
+
+test_begin_subtest "round trip 8bit chars"
+echo '%c3%91%c3%a5%c3%b0%c3%a3%c3%a5%c3%a9-%c3%8f%c3%8a' > EXPECTED.$test_count
+$TEST_DIRECTORY/hex-xcode --in-place --direction=decode < EXPECTED.$test_count |\
+ $TEST_DIRECTORY/hex-xcode --direction=encode > OUTPUT.$test_count
+test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
+
+test_begin_subtest "round trip (in-place)"
+find $TEST_DIRECTORY/corpus -type f -print | sort | xargs cat > EXPECTED
+$TEST_DIRECTORY/hex-xcode --in-place --direction=encode < EXPECTED |\
+ $TEST_DIRECTORY/hex-xcode --in-place --direction=decode > OUTPUT
+test_expect_equal_file OUTPUT EXPECTED
+
+test_begin_subtest "punctuation (in-place)"
+tag1='comic_swear=$&^%$^%\\//-+$^%$'
+tag_enc1=$($TEST_DIRECTORY/hex-xcode --in-place --direction=encode "$tag1")
+test_expect_equal "$tag_enc1" "comic_swear=%24%26%5e%25%24%5e%25%5c%5c%2f%2f-+%24%5e%25%24"
+
+test_begin_subtest "round trip newlines (in-place)"
+printf 'this\n tag\t has\n spaces\n' > EXPECTED.$test_count
+$TEST_DIRECTORY/hex-xcode --in-place --direction=encode < EXPECTED.$test_count |\
+ $TEST_DIRECTORY/hex-xcode --in-place --direction=decode > OUTPUT.$test_count
+test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
+
+test_begin_subtest "round trip 8bit chars (in-place)"
+echo '%c3%91%c3%a5%c3%b0%c3%a3%c3%a5%c3%a9-%c3%8f%c3%8a' > EXPECTED.$test_count
+$TEST_DIRECTORY/hex-xcode --in-place --direction=decode < EXPECTED.$test_count |\
+ $TEST_DIRECTORY/hex-xcode --in-place --direction=encode > OUTPUT.$test_count
+test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
+
+test_done
diff --git a/test/notmuch-test b/test/notmuch-test
index f275439..a6ef34f 100755
--- a/test/notmuch-test
+++ b/test/notmuch-test
@@ -60,6 +60,7 @@ TESTS="
emacs-hello
emacs-show
missing-headers
+ hex-escaping
parse-time-string
search-date
"
--
1.7.10.4
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [patch v3 3/6] test/hex-escaping: new test for hex escaping routines
2012-12-02 13:33 ` [patch v3 3/6] test/hex-escaping: new test for hex escaping routines david
@ 2012-12-02 15:29 ` Jani Nikula
2012-12-02 23:57 ` David Bremner
0 siblings, 1 reply; 14+ messages in thread
From: Jani Nikula @ 2012-12-02 15:29 UTC (permalink / raw)
To: david, notmuch; +Cc: David Bremner
On Sun, 02 Dec 2012, david@tethera.net wrote:
> From: David Bremner <bremner@debian.org>
>
> These are more like unit tests, to (try to) make sure the library
> functionality is working before building more complicated things on
> top of it.
> ---
> test/hex-escaping | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
> test/notmuch-test | 1 +
> 2 files changed, 51 insertions(+)
> create mode 100755 test/hex-escaping
>
> diff --git a/test/hex-escaping b/test/hex-escaping
> new file mode 100755
> index 0000000..3f107dd
> --- /dev/null
> +++ b/test/hex-escaping
> @@ -0,0 +1,50 @@
> +#!/usr/bin/env bash
> +test_description="hex encoding and decoding"
> +. ./test-lib.sh
> +
> +test_begin_subtest "round trip"
> +find $TEST_DIRECTORY/corpus -type f -print | sort | xargs cat > EXPECTED
> +$TEST_DIRECTORY/hex-xcode --direction=encode < EXPECTED | $TEST_DIRECTORY/hex-xcode --direction=decode > OUTPUT
> +test_expect_equal_file OUTPUT EXPECTED
> +
> +test_begin_subtest "punctuation"
> +tag1='comic_swear=$&^%$^%\\//-+$^%$'
> +tag_enc1=$($TEST_DIRECTORY/hex-xcode --direction=encode "$tag1")
> +test_expect_equal "$tag_enc1" "comic_swear=%24%26%5e%25%24%5e%25%5c%5c%2f%2f-+%24%5e%25%24"
> +
> +test_begin_subtest "round trip newlines"
> +printf 'this\n tag\t has\n spaces\n' > EXPECTED.$test_count
> +$TEST_DIRECTORY/hex-xcode --direction=encode < EXPECTED.$test_count |\
> + $TEST_DIRECTORY/hex-xcode --direction=decode > OUTPUT.$test_count
> +test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
> +
> +test_begin_subtest "round trip 8bit chars"
> +echo '%c3%91%c3%a5%c3%b0%c3%a3%c3%a5%c3%a9-%c3%8f%c3%8a' > EXPECTED.$test_count
> +$TEST_DIRECTORY/hex-xcode --in-place --direction=decode < EXPECTED.$test_count |\
Did you intend to use --in-place here?
BR,
Jani.
> + $TEST_DIRECTORY/hex-xcode --direction=encode > OUTPUT.$test_count
> +test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
> +
> +test_begin_subtest "round trip (in-place)"
> +find $TEST_DIRECTORY/corpus -type f -print | sort | xargs cat > EXPECTED
> +$TEST_DIRECTORY/hex-xcode --in-place --direction=encode < EXPECTED |\
> + $TEST_DIRECTORY/hex-xcode --in-place --direction=decode > OUTPUT
> +test_expect_equal_file OUTPUT EXPECTED
> +
> +test_begin_subtest "punctuation (in-place)"
> +tag1='comic_swear=$&^%$^%\\//-+$^%$'
> +tag_enc1=$($TEST_DIRECTORY/hex-xcode --in-place --direction=encode "$tag1")
> +test_expect_equal "$tag_enc1" "comic_swear=%24%26%5e%25%24%5e%25%5c%5c%2f%2f-+%24%5e%25%24"
> +
> +test_begin_subtest "round trip newlines (in-place)"
> +printf 'this\n tag\t has\n spaces\n' > EXPECTED.$test_count
> +$TEST_DIRECTORY/hex-xcode --in-place --direction=encode < EXPECTED.$test_count |\
> + $TEST_DIRECTORY/hex-xcode --in-place --direction=decode > OUTPUT.$test_count
> +test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
> +
> +test_begin_subtest "round trip 8bit chars (in-place)"
> +echo '%c3%91%c3%a5%c3%b0%c3%a3%c3%a5%c3%a9-%c3%8f%c3%8a' > EXPECTED.$test_count
> +$TEST_DIRECTORY/hex-xcode --in-place --direction=decode < EXPECTED.$test_count |\
> + $TEST_DIRECTORY/hex-xcode --in-place --direction=encode > OUTPUT.$test_count
> +test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
> +
> +test_done
> diff --git a/test/notmuch-test b/test/notmuch-test
> index f275439..a6ef34f 100755
> --- a/test/notmuch-test
> +++ b/test/notmuch-test
> @@ -60,6 +60,7 @@ TESTS="
> emacs-hello
> emacs-show
> missing-headers
> + hex-escaping
> parse-time-string
> search-date
> "
> --
> 1.7.10.4
>
> _______________________________________________
> notmuch mailing list
> notmuch@notmuchmail.org
> http://notmuchmail.org/mailman/listinfo/notmuch
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [patch v3 3/6] test/hex-escaping: new test for hex escaping routines
2012-12-02 15:29 ` Jani Nikula
@ 2012-12-02 23:57 ` David Bremner
0 siblings, 0 replies; 14+ messages in thread
From: David Bremner @ 2012-12-02 23:57 UTC (permalink / raw)
To: Jani Nikula, notmuch
Jani Nikula <jani@nikula.org> writes:
>> +test_begin_subtest "round trip 8bit chars"
>> +echo '%c3%91%c3%a5%c3%b0%c3%a3%c3%a5%c3%a9-%c3%8f%c3%8a' > EXPECTED.$test_count
>> +$TEST_DIRECTORY/hex-xcode --in-place --direction=decode < EXPECTED.$test_count |\
>
> Did you intend to use --in-place here?
Good catch. Fixed in git.
d
^ permalink raw reply [flat|nested] 14+ messages in thread
* [patch v3 4/6] test: add database routines for testing
2012-12-02 13:33 v3 of testing for new tagging/dump/restore david
` (2 preceding siblings ...)
2012-12-02 13:33 ` [patch v3 3/6] test/hex-escaping: new test for hex escaping routines david
@ 2012-12-02 13:33 ` david
2012-12-02 13:33 ` [patch v3 5/6] test: add generator for random "stub" messages david
` (3 subsequent siblings)
7 siblings, 0 replies; 14+ messages in thread
From: david @ 2012-12-02 13:33 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
From: David Bremner <bremner@debian.org>
Initially, provide a way to create "stub" messages in the notmuch
database without corresponding files. This is essentially cut and
paste from lib/database.cc. This is a seperate file since we don't
want to export these symbols from libnotmuch or bloat the library with
non-exported code.
---
test/Makefile.local | 1 +
test/database-test.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++
test/database-test.h | 21 +++++++++++++++
3 files changed, 93 insertions(+)
create mode 100644 test/database-test.c
create mode 100644 test/database-test.h
diff --git a/test/Makefile.local b/test/Makefile.local
index 8da4c56..8479f91 100644
--- a/test/Makefile.local
+++ b/test/Makefile.local
@@ -45,5 +45,6 @@ CLEAN := $(CLEAN) $(dir)/smtp-dummy $(dir)/smtp-dummy.o \
$(dir)/symbol-test $(dir)/symbol-test.o \
$(dir)/arg-test $(dir)/arg-test.o \
$(dir)/hex-xcode $(dir)/hex-xcode.o \
+ $(dir)/database-test.o \
$(dir)/parse-time $(dir)/parse-time.o \
$(dir)/corpus.mail $(dir)/test-results $(dir)/tmp.*
diff --git a/test/database-test.c b/test/database-test.c
new file mode 100644
index 0000000..b8c3a67
--- /dev/null
+++ b/test/database-test.c
@@ -0,0 +1,71 @@
+/*
+ * Database routines intended only for testing, not exported from
+ * library.
+ *
+ * Copyright (c) 2012 David Bremner
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/ .
+ *
+ * Author: David Bremner <david@tethera.net>
+ */
+
+#include "notmuch-private.h"
+#include "database-test.h"
+
+notmuch_status_t
+notmuch_database_add_stub_message (notmuch_database_t *notmuch,
+ const char *message_id,
+ const char **tags)
+{
+ const char **tag;
+ notmuch_status_t ret;
+ notmuch_private_status_t private_status;
+ notmuch_message_t *message;
+
+ ret = _notmuch_database_ensure_writable (notmuch);
+ if (ret)
+ return ret;
+
+ message = _notmuch_message_create_for_message_id (notmuch,
+ message_id,
+ &private_status);
+ if (message == NULL) {
+ return COERCE_STATUS (private_status,
+ "Unexpected status value from _notmuch_message_create_for_message_id");
+
+ }
+
+ if (private_status != NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND)
+ return NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID;
+
+ _notmuch_message_add_term (message, "type", "mail");
+
+ if (tags) {
+ ret = notmuch_message_freeze (message);
+ if (ret)
+ return ret;
+
+ for (tag = tags; *tag; tag++) {
+ ret = notmuch_message_add_tag (message, *tag);
+ if (ret)
+ return ret;
+ }
+
+ ret = notmuch_message_thaw (message);
+ if (ret)
+ return ret;
+ }
+
+ return NOTMUCH_STATUS_SUCCESS;
+}
diff --git a/test/database-test.h b/test/database-test.h
new file mode 100644
index 0000000..84f7988
--- /dev/null
+++ b/test/database-test.h
@@ -0,0 +1,21 @@
+#ifndef _DATABASE_TEST_H
+#define _DATABASE_TEST_H
+/* Add a new stub message to the given notmuch database.
+ *
+ * At least the following return values are possible:
+ *
+ * NOTMUCH_STATUS_SUCCESS: Message successfully added to database.
+ *
+ * NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID: Message has the same message
+ * ID as another message already in the database.
+ *
+ * NOTMUCH_STATUS_READ_ONLY_DATABASE: Database was opened in read-only
+ * mode so no message can be added.
+ */
+
+notmuch_status_t
+notmuch_database_add_stub_message (notmuch_database_t *database,
+ const char *message_id,
+ const char **tag_list);
+
+#endif
--
1.7.10.4
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [patch v3 5/6] test: add generator for random "stub" messages
2012-12-02 13:33 v3 of testing for new tagging/dump/restore david
` (3 preceding siblings ...)
2012-12-02 13:33 ` [patch v3 4/6] test: add database routines for testing david
@ 2012-12-02 13:33 ` david
2012-12-02 13:33 ` [patch v3 6/6] test: add broken roundtrip test david
` (2 subsequent siblings)
7 siblings, 0 replies; 14+ messages in thread
From: david @ 2012-12-02 13:33 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
From: David Bremner <bremner@debian.org>
Initial use case is testing dump and restore, so we only have
message-ids and tags.
The message ID's are nothing like RFC compliant, but it doesn't seem
any harder to roundtrip random UTF-8 strings than RFC-compliant ones.
Tags are UTF-8, even though notmuch is in principle more generous than
that.
updated for id:m2wr04ocro.fsf@guru.guru-group.fi
- talk about Unicode value rather some specific encoding
- call talloc_realloc less times
---
test/.gitignore | 1 +
test/Makefile.local | 10 +++
test/basic | 1 +
test/random-corpus.c | 209 ++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 221 insertions(+)
create mode 100644 test/random-corpus.c
diff --git a/test/.gitignore b/test/.gitignore
index be7ab5e..1eff7ce 100644
--- a/test/.gitignore
+++ b/test/.gitignore
@@ -4,4 +4,5 @@ smtp-dummy
symbol-test
arg-test
hex-xcode
+random-corpus
tmp.*
diff --git a/test/Makefile.local b/test/Makefile.local
index 8479f91..6a9f15e 100644
--- a/test/Makefile.local
+++ b/test/Makefile.local
@@ -16,6 +16,14 @@ $(dir)/arg-test: $(dir)/arg-test.o command-line-arguments.o util/libutil.a
$(dir)/hex-xcode: $(dir)/hex-xcode.o command-line-arguments.o util/libutil.a
$(call quiet,CC) -I. $^ -o $@ -ltalloc
+random_corpus_deps = $(dir)/random-corpus.o $(dir)/database-test.o \
+ notmuch-config.o command-line-arguments.o \
+ lib/libnotmuch.a util/libutil.a \
+ parse-time-string/libparse-time-string.a
+
+$(dir)/random-corpus: $(random_corpus_deps)
+ $(call quiet,CC) $(CFLAGS_FINAL) $^ -o $@ $(CONFIGURE_LDFLAGS)
+
$(dir)/smtp-dummy: $(smtp_dummy_modules)
$(call quiet,CC) $^ -o $@
@@ -29,6 +37,7 @@ $(dir)/parse-time: $(dir)/parse-time.o parse-time-string/parse-time-string.o
TEST_BINARIES=$(dir)/arg-test \
$(dir)/hex-xcode \
+ $(dir)/random-corpus \
$(dir)/parse-time \
$(dir)/smtp-dummy \
$(dir)/symbol-test
@@ -46,5 +55,6 @@ CLEAN := $(CLEAN) $(dir)/smtp-dummy $(dir)/smtp-dummy.o \
$(dir)/arg-test $(dir)/arg-test.o \
$(dir)/hex-xcode $(dir)/hex-xcode.o \
$(dir)/database-test.o \
+ $(dir)/random-corpus $(dir)/random-corpus.o \
$(dir)/parse-time $(dir)/parse-time.o \
$(dir)/corpus.mail $(dir)/test-results $(dir)/tmp.*
diff --git a/test/basic b/test/basic
index c448ef8..1b2a7d2 100755
--- a/test/basic
+++ b/test/basic
@@ -59,6 +59,7 @@ available=$(find "$TEST_DIRECTORY" -maxdepth 1 -type f -perm +111 \
! -name hex-xcode \
! -name notmuch-test \
! -name parse-time \
+ ! -name random-corpus \
! -name smtp-dummy \
! -name symbol-test \
! -name test-verbose \
diff --git a/test/random-corpus.c b/test/random-corpus.c
new file mode 100644
index 0000000..f354d4b
--- /dev/null
+++ b/test/random-corpus.c
@@ -0,0 +1,209 @@
+/*
+ * Generate a random corpus of stub messages.
+ *
+ * Initial use case is testing dump and restore, so we only have
+ * message-ids and tags.
+ *
+ * Generated message-id's and tags are intentionally nasty.
+ *
+ * Copyright (c) 2012 David Bremner
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/ .
+ *
+ * Author: David Bremner <david@tethera.net>
+ */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <talloc.h>
+#include <string.h>
+#include <glib.h>
+#include <math.h>
+
+#include "notmuch-client.h"
+#include "command-line-arguments.h"
+#include "database-test.h"
+
+/* Current largest Unicode value defined. Note that most of these will
+ * be printed as boxes in most fonts.
+ */
+
+#define GLYPH_MAX 0x10FFFE
+
+
+typedef struct {
+ int weight;
+ int start;
+ int stop;
+} char_class_t;
+
+/*
+ * Choose about half ascii as test characters, as ascii
+ * punctation and whitespace is the main cause of problems for
+ * the (old) restore parser.
+ *
+ * We then favour code points with 2 byte encodings. Note that
+ * code points 0xD800-0xDFFF are forbidden in UTF-8.
+ */
+
+static const
+char_class_t char_class[] = { { 0.50 * GLYPH_MAX, 0x0001, 0x007f },
+ { 0.75 * GLYPH_MAX, 0x0080, 0x07ff },
+ { 0.88 * GLYPH_MAX, 0x0800, 0xd7ff },
+ { 0.90 * GLYPH_MAX, 0xE000, 0xffff },
+ { GLYPH_MAX, 0x10000, GLYPH_MAX } };
+
+static gunichar
+random_unichar ()
+{
+ int i;
+ int class = random () % GLYPH_MAX;
+ int size;
+
+ for (i = 0; char_class[i].weight < class; i++) /* nothing */;
+
+ size = char_class[i].stop - char_class[i].start + 1;
+
+ return char_class[i].start + (random () % size);
+}
+
+static char *
+random_utf8_string (void *ctx, size_t char_count)
+{
+ size_t offset = 0;
+ size_t i;
+ gchar *buf = NULL;
+ size_t buf_size = 0;
+
+ for (i = 0; i < char_count; i++) {
+ gunichar randomchar;
+ size_t written;
+
+ /* 6 for one glyph, one for null, one for luck */
+ while (buf_size <= offset + 8) {
+ buf_size = 2 * buf_size + 8;
+ buf = talloc_realloc (ctx, buf, gchar, buf_size);
+ }
+
+ randomchar = random_unichar ();
+
+ written = g_unichar_to_utf8 (randomchar, buf + offset);
+
+ if (written <= 0) {
+ fprintf (stderr, "error converting to utf8\n");
+ exit (1);
+ }
+
+ offset += written;
+
+ }
+ buf[offset] = 0;
+ return buf;
+}
+
+
+int
+main (int argc, char **argv)
+{
+
+ void *ctx = talloc_new (NULL);
+
+ char *config_path = NULL;
+ notmuch_config_t *config;
+ notmuch_database_t *notmuch;
+
+ int num_messages = 500;
+ int max_tags = 10;
+ // leave room for UTF-8 encoding.
+ int tag_len = NOTMUCH_TAG_MAX / 6;
+ // NOTMUCH_MESSAGE_ID_MAX is not exported, so we make a
+ // conservative guess.
+ int message_id_len = (NOTMUCH_TAG_MAX - 20) / 6;
+
+ int seed = 734569;
+
+ notmuch_opt_desc_t options[] = {
+ { NOTMUCH_OPT_STRING, &config_path, "config-path", 'c', 0 },
+ { NOTMUCH_OPT_INT, &num_messages, "num-messages", 'n', 0 },
+ { NOTMUCH_OPT_INT, &max_tags, "max-tags", 'm', 0 },
+ { NOTMUCH_OPT_INT, &message_id_len, "message-id-len", 'M', 0 },
+ { NOTMUCH_OPT_INT, &tag_len, "tag-len", 't', 0 },
+ { NOTMUCH_OPT_INT, &seed, "seed", 's', 0 },
+ { 0, 0, 0, 0, 0 }
+ };
+
+ int opt_index = parse_arguments (argc, argv, options, 1);
+
+ if (opt_index < 0)
+ exit (1);
+
+ if (message_id_len < 1) {
+ fprintf (stderr, "message id's must be least length 1\n");
+ exit (1);
+ }
+
+ if (config_path == NULL) {
+ fprintf (stderr, "configuration path must be specified");
+ exit (1);
+ }
+
+ config = notmuch_config_open (ctx, config_path, NULL);
+ if (config == NULL)
+ return 1;
+
+ if (notmuch_database_open (notmuch_config_get_database_path (config),
+ NOTMUCH_DATABASE_MODE_READ_WRITE, ¬much))
+ return 1;
+
+ srandom (seed);
+
+ int count;
+ for (count = 0; count < num_messages; count++) {
+ int j;
+ /* explicitly allow zero tags */
+ int num_tags = random () % (max_tags + 1);
+ /* message ids should be non-empty */
+ int this_mid_len = (random () % message_id_len) + 1;
+ const char **tag_list;
+ char *mid;
+ notmuch_status_t status;
+
+ do {
+ mid = random_utf8_string (ctx, this_mid_len);
+
+ tag_list = talloc_realloc (ctx, NULL, const char *, num_tags + 1);
+
+ for (j = 0; j < num_tags; j++) {
+ int this_tag_len = random () % tag_len + 1;
+
+ tag_list[j] = random_utf8_string (ctx, this_tag_len);
+ }
+
+ tag_list[j] = NULL;
+
+ status = notmuch_database_add_stub_message (notmuch, mid, tag_list);
+ } while (status == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID);
+
+ if (status != NOTMUCH_STATUS_SUCCESS) {
+ fprintf (stderr, "error %d adding message", status);
+ exit (status);
+ }
+ }
+
+ notmuch_database_destroy (notmuch);
+
+ talloc_free (ctx);
+
+ return 0;
+}
--
1.7.10.4
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [patch v3 6/6] test: add broken roundtrip test
2012-12-02 13:33 v3 of testing for new tagging/dump/restore david
` (4 preceding siblings ...)
2012-12-02 13:33 ` [patch v3 5/6] test: add generator for random "stub" messages david
@ 2012-12-02 13:33 ` david
2012-12-02 23:19 ` David Bremner
2012-12-02 15:44 ` v3 of testing for new tagging/dump/restore Jani Nikula
2012-12-03 11:38 ` David Bremner
7 siblings, 1 reply; 14+ messages in thread
From: david @ 2012-12-02 13:33 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
From: David Bremner <bremner@debian.org>
We demonstrate the current notmuch restore parser being confused by
message-id's and tags containing non alpha numeric characters
(particularly space and parentheses are problematic because they are
not escaped by notmuch dump).
We save the files as hex escaped on disk so that terminal emulators
will not get confused if the test fails (as we mostly expect it to do).
---
test/dump-restore | 22 ++++++++++++++++++++++
1 file changed, 22 insertions(+)
diff --git a/test/dump-restore b/test/dump-restore
index b05399c..adf0647 100755
--- a/test/dump-restore
+++ b/test/dump-restore
@@ -85,4 +85,26 @@ test_begin_subtest "dump --output=outfile -- from:cworth"
notmuch dump --output=dump-outfile-dash-inbox.actual -- from:cworth
test_expect_equal_file dump-cworth.expected dump-outfile-dash-inbox.actual
+test_begin_subtest 'roundtripping random message-ids and tags'
+ test_subtest_known_broken
+ ${TEST_DIRECTORY}/random-corpus --config-path=${NOTMUCH_CONFIG} \
+ --num-messages=100
+
+ notmuch dump| \
+ ${TEST_DIRECTORY}/hex-xcode --direction=encode| \
+ sort > EXPECTED.$test_count
+
+ # delete every second tag
+ notmuch tag $(notmuch search --output=tags '*' | \
+ awk '{ if (NR % 2 == 0) printf " -'%s'", $1 }') '*'
+
+ ${TEST_DIRECTORY}/hex-xcode --direction=decode < EXPECTED.$test_count | \
+ notmuch restore 2>/dev/null
+
+ notmuch dump| \
+ ${TEST_DIRECTORY}/hex-xcode --direction=encode| \
+ sort > OUTPUT.$test_count
+
+test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
+
test_done
--
1.7.10.4
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: v3 of testing for new tagging/dump/restore
2012-12-02 13:33 v3 of testing for new tagging/dump/restore david
` (5 preceding siblings ...)
2012-12-02 13:33 ` [patch v3 6/6] test: add broken roundtrip test david
@ 2012-12-02 15:44 ` Jani Nikula
2012-12-03 11:38 ` David Bremner
7 siblings, 0 replies; 14+ messages in thread
From: Jani Nikula @ 2012-12-02 15:44 UTC (permalink / raw)
To: david, notmuch
LGTM,
Jani.
On Sun, 02 Dec 2012, david@tethera.net wrote:
> Here is round 3 of the batch tagging stuff, but just the first six
> patches, which are more or less useful without the remaining one
> (although a bit epic if the only goal is add a single test).
>
> Detailed list of changes follows, the first one is a change to the
> encoding format. Looking forward to maybe doing tricks with maildirs
> and hardlinks, it seems like we might want to encode ':'. The fact
> that it might help MacOS X users we can consider neutral ;).
>
> - hex-escape.c: escape ':'
> - hex-escape.c: simpler buffer size calculations.
> - hex-xcode.c: uncrustify + delete extra blank lines
> - hex-xcode.c: provide inplace mode for test coverage
> - test/hex-escaping, copy tests for --in-place version of test
> - test/database-test.c: only thaw if we froze
> - random-corpus.c: use a table based scheme to favour shorted code points
> - random-corpus.c: uncrustify
> - random-corpus.c: delete blank line, reorder while loop
> - update roundtrip test to avoid need for random-corpus tag.
> - do not add tag "random-corpus" in random-corpus.c
>
> Things that I (knowingly) ignored
>
> - Jani suggested an extra talloc in random-corpus.c
> - I left the argument format of hex-xcode alone
> _______________________________________________
> notmuch mailing list
> notmuch@notmuchmail.org
> http://notmuchmail.org/mailman/listinfo/notmuch
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: v3 of testing for new tagging/dump/restore
2012-12-02 13:33 v3 of testing for new tagging/dump/restore david
` (6 preceding siblings ...)
2012-12-02 15:44 ` v3 of testing for new tagging/dump/restore Jani Nikula
@ 2012-12-03 11:38 ` David Bremner
7 siblings, 0 replies; 14+ messages in thread
From: David Bremner @ 2012-12-03 11:38 UTC (permalink / raw)
To: notmuch
david@tethera.net writes:
> Here is round 3 of the batch tagging stuff, but just the first six
> patches, which are more or less useful without the remaining one
> (although a bit epic if the only goal is add a single test).
I have pushed this series, with the two small amendments discussed in
this thread.
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Patch v3 5/6] test: add generator for random "stub" messages
2012-08-19 13:18 random corpus generator, v3 david
@ 2012-08-19 13:18 ` david
2012-09-08 13:38 ` Tomi Ollila
0 siblings, 1 reply; 14+ messages in thread
From: david @ 2012-08-19 13:18 UTC (permalink / raw)
To: notmuch; +Cc: David Bremner
From: David Bremner <bremner@debian.org>
Initial use case is testing dump and restore, so we only have
message-ids and tags.
The message ID's are nothing like RFC compliant, but it doesn't seem
any harder to roundtrip random UTF-8 strings than RFC-compliant ones.
Tags are UTF-8, even though notmuch is in principle more generous than
that.
---
test/.gitignore | 1 +
test/Makefile.local | 9 +++
test/basic | 2 +-
test/random-corpus.c | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 213 insertions(+), 1 deletion(-)
create mode 100644 test/random-corpus.c
diff --git a/test/.gitignore b/test/.gitignore
index be7ab5e..1eff7ce 100644
--- a/test/.gitignore
+++ b/test/.gitignore
@@ -4,4 +4,5 @@ smtp-dummy
symbol-test
arg-test
hex-xcode
+random-corpus
tmp.*
diff --git a/test/Makefile.local b/test/Makefile.local
index bb67ded..ad0fd91 100644
--- a/test/Makefile.local
+++ b/test/Makefile.local
@@ -16,6 +16,13 @@ $(dir)/arg-test: $(dir)/arg-test.o command-line-arguments.o util/libutil.a
$(dir)/hex-xcode: $(dir)/hex-xcode.o command-line-arguments.o util/libutil.a
$(call quiet,CC) -I. $^ -o $@ -ltalloc
+random_corpus_deps = $(dir)/random-corpus.o $(dir)/database-test.o \
+ notmuch-config.o command-line-arguments.o \
+ lib/libnotmuch.a util/libutil.a
+
+$(dir)/random-corpus: $(random_corpus_deps)
+ $(call quiet,CC) $(CFLAGS_FINAL) $^ -o $@ $(CONFIGURE_LDFLAGS)
+
$(dir)/smtp-dummy: $(smtp_dummy_modules)
$(call quiet,CC) $^ -o $@
@@ -26,6 +33,7 @@ $(dir)/symbol-test: $(dir)/symbol-test.o
TEST_BINARIES=$(dir)/arg-test \
$(dir)/hex-xcode \
+ $(dir)/random-corpus \
$(dir)/smtp-dummy \
$(dir)/symbol-test
@@ -42,4 +50,5 @@ CLEAN := $(CLEAN) $(dir)/smtp-dummy $(dir)/smtp-dummy.o \
$(dir)/arg-test $(dir)/arg-test.o \
$(dir)/hex-xcode $(dir)/hex-xcode.o \
$(dir)/database-test.o \
+ $(dir)/random-corpus $(dir)/random-corpus.o \
$(dir)/corpus.mail $(dir)/test-results $(dir)/tmp.*
diff --git a/test/basic b/test/basic
index af57026..5f8ed7a 100755
--- a/test/basic
+++ b/test/basic
@@ -54,7 +54,7 @@ test_begin_subtest 'Ensure that all available tests will be run by notmuch-test'
eval $(sed -n -e '/^TESTS="$/,/^"$/p' $TEST_DIRECTORY/notmuch-test)
tests_in_suite=$(for i in $TESTS; do echo $i; done | sort)
available=$(find "$TEST_DIRECTORY" -maxdepth 1 -type f -executable -printf '%f\n' | \
- sed -r -e "/^(aggregate-results.sh|notmuch-test|smtp-dummy|test-verbose|symbol-test|arg-test|hex-xcode)$/d" | \
+ sed -r -e "/^(aggregate-results.sh|notmuch-test|smtp-dummy|test-verbose|symbol-test|arg-test|hex-xcode|random-corpus)$/d" | \
sort)
test_expect_equal "$tests_in_suite" "$available"
diff --git a/test/random-corpus.c b/test/random-corpus.c
new file mode 100644
index 0000000..8c5b559
--- /dev/null
+++ b/test/random-corpus.c
@@ -0,0 +1,202 @@
+/*
+ * Generate a random corpus of stub messages.
+ *
+ * Initial use case is testing dump and restore, so we only have
+ * message-ids and tags.
+ *
+ * Generated message-id's and tags are intentionally nasty.
+ *
+ * Copyright (c) 2012 David Bremner
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/ .
+ *
+ * Author: David Bremner <david@tethera.net>
+ */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <talloc.h>
+#include <string.h>
+#include <glib.h>
+#include <math.h>
+
+#include "notmuch-client.h"
+#include "command-line-arguments.h"
+#include "database-test.h"
+
+/* Current largest UTF-32 value defined. Note that most of these will
+ * be printed as boxes in most fonts.
+ */
+
+#define GLYPH_MAX 0x10FFFE
+
+static gunichar
+random_unichar ()
+{
+ int start = 1, stop = GLYPH_MAX;
+ int class = random() % 2;
+
+ /*
+ * Choose about half ascii as test characters, as ascii
+ * punctation and whitespace is the main cause of problems for
+ * the (old) restore parser
+ */
+ switch (class) {
+ case 0:
+ /* ascii */
+ start = 0x01;
+ stop = 0x7f;
+ break;
+ case 1:
+ /* the rest of unicode */
+ start = 0x80;
+ stop = GLYPH_MAX;
+ }
+
+ if (start == stop)
+ return start;
+ else
+ return start + (random() % (stop - start + 1));
+}
+
+static char *
+random_utf8_string (void *ctx, size_t char_count)
+{
+
+ gchar *buf = NULL;
+ size_t buf_size = 0;
+
+ size_t offset = 0;
+
+ size_t i;
+
+ buf = talloc_realloc (ctx, NULL, gchar, char_count);
+ buf_size = char_count;
+
+ for (i = 0; i < char_count; i++) {
+ gunichar randomchar;
+ size_t written;
+
+ /* 6 for one glyph, one for null */
+ if (buf_size - offset < 8) {
+ buf_size += 16;
+ buf = talloc_realloc (ctx, buf, gchar, buf_size);
+ }
+
+ randomchar = random_unichar();
+
+ written = g_unichar_to_utf8 (randomchar, buf + offset);
+
+ if (written <= 0) {
+ fprintf (stderr, "error converting to utf8\n");
+ exit (1);
+ }
+
+ offset += written;
+
+ }
+ buf[offset] = 0;
+ return buf;
+}
+
+
+int
+main (int argc, char **argv)
+{
+
+ void *ctx = talloc_new (NULL);
+
+ char *config_path = NULL;
+ notmuch_config_t *config;
+ notmuch_database_t *notmuch;
+
+ int num_messages = 500;
+ int max_tags = 10;
+ // leave room for UTF-8 encoding.
+ int tag_len = NOTMUCH_TAG_MAX / 6;
+ // NOTMUCH_MESSAGE_ID_MAX is not exported, so we make a
+ // conservative guess.
+ int message_id_len = (NOTMUCH_TAG_MAX - 20) / 6;
+
+ int seed = 734569;
+
+ notmuch_opt_desc_t options[] = {
+ { NOTMUCH_OPT_STRING, &config_path, "config-path", 'c', 0 },
+ { NOTMUCH_OPT_INT, &num_messages, "num-messages", 'n', 0 },
+ { NOTMUCH_OPT_INT, &max_tags, "max-tags", 'm', 0 },
+ { NOTMUCH_OPT_INT, &message_id_len, "message-id-len", 'M', 0 },
+ { NOTMUCH_OPT_INT, &tag_len, "tag-len", 't', 0 },
+ { NOTMUCH_OPT_INT, &seed, "seed", 's', 0 },
+ { 0, 0, 0, 0, 0 }
+ };
+
+ int opt_index = parse_arguments (argc, argv, options, 1);
+
+ if (opt_index < 0)
+ exit (1);
+
+ if (config_path == NULL) {
+ fprintf (stderr, "configuration path must be specified");
+ exit (1);
+ }
+
+ config = notmuch_config_open (ctx, config_path, NULL);
+ if (config == NULL)
+ return 1;
+
+ if (notmuch_database_open (notmuch_config_get_database_path (config),
+ NOTMUCH_DATABASE_MODE_READ_WRITE, ¬much))
+ return 1;
+
+ srandom (seed);
+
+ int count;
+ for (count = 0; count < num_messages; count++) {
+ int j;
+ int num_tags = random () % (max_tags + 1);
+ int this_mid_len = random () % message_id_len + 1;
+ const char **tag_list;
+ char *mid;
+ notmuch_status_t status;
+
+ do {
+ mid = random_utf8_string (ctx, this_mid_len);
+
+ tag_list = talloc_realloc (ctx, NULL, const char *, num_tags + 2);
+
+ tag_list[0] = "random-corpus";
+
+ for (j = 0; j < num_tags; j++) {
+ int this_tag_len = random () % tag_len + 1;
+
+ tag_list[j + 1] = random_utf8_string (ctx, this_tag_len);
+ }
+
+ tag_list[j + 1] = NULL;
+
+ status = notmuch_database_add_stub_message (notmuch, mid, tag_list);
+ } while (status == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID);
+
+ if (status != NOTMUCH_STATUS_SUCCESS) {
+ fprintf (stderr, "error %d adding message", status);
+ exit (status);
+ }
+ }
+
+ notmuch_database_destroy (notmuch);
+
+ talloc_free (ctx);
+
+ return 0;
+}
--
1.7.10.4
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [Patch v3 5/6] test: add generator for random "stub" messages
2012-08-19 13:18 ` [Patch v3 5/6] test: add generator for random "stub" messages david
@ 2012-09-08 13:38 ` Tomi Ollila
0 siblings, 0 replies; 14+ messages in thread
From: Tomi Ollila @ 2012-09-08 13:38 UTC (permalink / raw)
To: david, notmuch; +Cc: David Bremner
On Sun, Aug 19 2012, david@tethera.net wrote:
> From: David Bremner <bremner@debian.org>
>
> Initial use case is testing dump and restore, so we only have
> message-ids and tags.
>
> The message ID's are nothing like RFC compliant, but it doesn't seem
> any harder to roundtrip random UTF-8 strings than RFC-compliant ones.
>
> Tags are UTF-8, even though notmuch is in principle more generous than
> that.
> ---
Mostly LGTM (the whole series). Few comments inline...
Finally, 6/6 adds known broken test -- when will we see this code
taken into use and the broken test fixed :)
> test/.gitignore | 1 +
> test/Makefile.local | 9 +++
> test/basic | 2 +-
> test/random-corpus.c | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 213 insertions(+), 1 deletion(-)
> create mode 100644 test/random-corpus.c
[ ... ]
>
> diff --git a/test/random-corpus.c b/test/random-corpus.c
> new file mode 100644
> index 0000000..8c5b559
> --- /dev/null
> +++ b/test/random-corpus.c
[ ... ]
> +
> +/* Current largest UTF-32 value defined. Note that most of these will
> + * be printed as boxes in most fonts.
> + */
Should we be talking about UTF-8 valies. UTF-8 (currently has the same
limit).
> +
> +#define GLYPH_MAX 0x10FFFE
> +
> +static gunichar
> +random_unichar ()
> +{
> + int start = 1, stop = GLYPH_MAX;
> + int class = random() % 2;
> +
> + /*
> + * Choose about half ascii as test characters, as ascii
> + * punctation and whitespace is the main cause of problems for
> + * the (old) restore parser
> + */
> + switch (class) {
> + case 0:
> + /* ascii */
> + start = 0x01;
> + stop = 0x7f;
> + break;
> + case 1:
> + /* the rest of unicode */
> + start = 0x80;
> + stop = GLYPH_MAX;
> + }
> +
> + if (start == stop)
> + return start;
> + else
> + return start + (random() % (stop - start + 1));
> +}
> +
> +static char *
> +random_utf8_string (void *ctx, size_t char_count)
> +{
> +
> + gchar *buf = NULL;
> + size_t buf_size = 0;
> +
> + size_t offset = 0;
> +
> + size_t i;
> +
> + buf = talloc_realloc (ctx, NULL, gchar, char_count);
> + buf_size = char_count;
> +
> + for (i = 0; i < char_count; i++) {
> + gunichar randomchar;
> + size_t written;
> +
> + /* 6 for one glyph, one for null */
> + if (buf_size - offset < 8) {
> + buf_size += 16;
> + buf = talloc_realloc (ctx, buf, gchar, buf_size);
This reallocation will hit many times, as originally there was just
char_count bytes allocated -- this limit will probably get hit before
halfway the creation of random string (half uses 1 byte, other half
2, 3 or 4 bytes, mostly 4 (even only half of the 4-byte range is used...)
Maybe originally allocating char_count * 2 + 8 and if realloc required
(char_count - i) * 2 + 8... or maybe better, just doing the latter
realloc and replacing first with buf = NULL; buf_size = 0;
Alternatively you could play with random states; calculate size,
reset random state, alloc size + 1 and write chars.
> + }
> +
> + randomchar = random_unichar();
> +
> + written = g_unichar_to_utf8 (randomchar, buf + offset);
> +
> + if (written <= 0) {
> + fprintf (stderr, "error converting to utf8\n");
> + exit (1);
> + }
> +
> + offset += written;
> +
> + }
Above there is extra newline. There are a few others in other
files (at least after opening and before closing brace).
Maybe uncrustify your source :)
> + buf[offset] = 0;
> + return buf;
> +}
> +
[ ... ]
Tomi
^ permalink raw reply [flat|nested] 14+ messages in thread