unofficial mirror of notmuch@notmuchmail.org
 help / color / mirror / code / Atom feed
* random corpus generator, v3
@ 2012-08-19 13:18 david
  2012-08-19 13:18 ` [Patch v3 1/6] hex-escape: (en|de)code strings to/from restricted character set david
                   ` (6 more replies)
  0 siblings, 7 replies; 9+ messages in thread
From: david @ 2012-08-19 13:18 UTC (permalink / raw)
  To: notmuch

This obsoletes the series at:

     id:"1344888831-4301-1-git-send-email-bremner@debian.org"

Changes since v2:

- clean up new test-binaries and objects

- remove the "set -o pipefail" leftover from debugging.  Possibly this
  makes sense as a global setting, but in a seperate patch.

- add hex-escape to test/basic

- rebase against updated master.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Patch v3 1/6] hex-escape: (en|de)code strings to/from restricted character set
  2012-08-19 13:18 random corpus generator, v3 david
@ 2012-08-19 13:18 ` david
  2012-08-19 13:18 ` [Patch v3 2/6] test/hex-xcode: new test binary david
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 9+ messages in thread
From: david @ 2012-08-19 13:18 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

The character set is chosen to be suitable for pathnames, and the same
as that used by contrib/nmbug

[With additions by Jani Nikula]
---
 util/Makefile.local |    2 +-
 util/hex-escape.c   |  168 +++++++++++++++++++++++++++++++++++++++++++++++++++
 util/hex-escape.h   |   41 +++++++++++++
 3 files changed, 210 insertions(+), 1 deletion(-)
 create mode 100644 util/hex-escape.c
 create mode 100644 util/hex-escape.h

diff --git a/util/Makefile.local b/util/Makefile.local
index c7cae61..3ca623e 100644
--- a/util/Makefile.local
+++ b/util/Makefile.local
@@ -3,7 +3,7 @@
 dir := util
 extra_cflags += -I$(srcdir)/$(dir)
 
-libutil_c_srcs := $(dir)/xutil.c $(dir)/error_util.c
+libutil_c_srcs := $(dir)/xutil.c $(dir)/error_util.c $(dir)/hex-escape.c
 
 libutil_modules := $(libutil_c_srcs:.c=.o)
 
diff --git a/util/hex-escape.c b/util/hex-escape.c
new file mode 100644
index 0000000..d8905d0
--- /dev/null
+++ b/util/hex-escape.c
@@ -0,0 +1,168 @@
+/* hex-escape.c -  Manage encoding and decoding of byte strings into path names
+ *
+ * Copyright (c) 2011 David Bremner
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/ .
+ *
+ * Author: David Bremner <david@tethera.net>
+ */
+
+#include <assert.h>
+#include <string.h>
+#include <talloc.h>
+#include <ctype.h>
+#include "error_util.h"
+#include "hex-escape.h"
+
+static const size_t default_buf_size = 1024;
+
+static const char *output_charset =
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-_@=.:,";
+
+static const char escape_char = '%';
+
+static int
+is_output (char c)
+{
+    return (strchr (output_charset, c) != NULL);
+}
+
+static int
+maybe_realloc (void *ctx, size_t needed, char **out, size_t *out_size)
+{
+    if (*out_size < needed) {
+
+	if (*out == NULL)
+	    *out = talloc_size (ctx, needed);
+	else
+	    *out = talloc_realloc (ctx, *out, char, needed);
+
+	if (*out == NULL)
+	    return 0;
+
+	*out_size = needed;
+    }
+    return 1;
+}
+
+hex_status_t
+hex_encode (void *ctx, const char *in, char **out, size_t *out_size)
+{
+
+    const unsigned char *p;
+    char *q;
+
+    size_t escape_count = 0;
+    size_t len = 0;
+    size_t needed;
+
+    assert (ctx); assert (in); assert (out); assert (out_size);
+
+    for (p = (unsigned char *) in; *p; p++) {
+	escape_count += (!is_output (*p));
+	len++;
+    }
+
+    needed = len + escape_count * 2 + 1;
+
+    if (*out == NULL)
+	*out_size = 0;
+
+    if (!maybe_realloc (ctx, needed, out, out_size))
+	return HEX_OUT_OF_MEMORY;
+
+    q = *out;
+    p = (unsigned char *) in;
+
+    while (*p) {
+	if (is_output (*p)) {
+	    *q++ = *p++;
+	} else {
+	    sprintf (q, "%%%02x", *p++);
+	    q += 3;
+	}
+    }
+
+    *q = '\0';
+    return HEX_SUCCESS;
+}
+
+/* Hex decode 'in' to 'out'.
+ *
+ * This must succeed for in == out to support hex_decode_inplace().
+ */
+static hex_status_t
+hex_decode_internal (const char *in, unsigned char *out)
+{
+    char buf[3];
+
+    while (*in) {
+	if (*in == escape_char) {
+	    char *endp;
+
+	    /* This also handles unexpected end-of-string. */
+	    if (!isxdigit ((unsigned char) in[1]) ||
+		!isxdigit ((unsigned char) in[2]))
+		return HEX_SYNTAX_ERROR;
+
+	    buf[0] = in[1];
+	    buf[1] = in[2];
+	    buf[2] = '\0';
+
+	    *out = strtoul (buf, &endp, 16);
+
+	    if (endp != buf + 2)
+		return HEX_SYNTAX_ERROR;
+
+	    in += 3;
+	    out++;
+	} else {
+	    *out++ = *in++;
+	}
+    }
+
+    *out = '\0';
+
+    return HEX_SUCCESS;
+}
+
+hex_status_t
+hex_decode_inplace (char *s)
+{
+    /* A decoded string is never longer than the encoded one, so it is
+     * safe to decode a string onto itself. */
+    return hex_decode_internal (s, (unsigned char *) s);
+}
+
+hex_status_t
+hex_decode (void *ctx, const char *in, char **out, size_t * out_size)
+{
+    const char *p;
+    size_t escape_count = 0;
+    size_t needed = 0;
+
+    assert (ctx); assert (in); assert (out); assert (out_size);
+
+    size_t len = strlen (in);
+
+    for (p = in; *p; p++)
+	escape_count += (*p == escape_char);
+
+    needed = len - escape_count * 2 + 1;
+
+    if (!maybe_realloc (ctx, needed, out, out_size))
+	return HEX_OUT_OF_MEMORY;
+
+    return hex_decode_internal (in, (unsigned char *) *out);
+}
diff --git a/util/hex-escape.h b/util/hex-escape.h
new file mode 100644
index 0000000..5182042
--- /dev/null
+++ b/util/hex-escape.h
@@ -0,0 +1,41 @@
+#ifndef _HEX_ESCAPE_H
+#define _HEX_ESCAPE_H
+
+typedef enum hex_status {
+    HEX_SUCCESS = 0,
+    HEX_SYNTAX_ERROR,
+    HEX_OUT_OF_MEMORY
+} hex_status_t;
+
+/*
+ * The API for hex_encode() and hex_decode() is modelled on that for
+ * getline.
+ *
+ * If 'out' points to a NULL pointer a char array of the appropriate
+ * size is allocated using talloc, and out_size is updated.
+ *
+ * If 'out' points to a non-NULL pointer, it assumed to describe an
+ * existing char array, with the size given in *out_size.  This array
+ * may be resized by talloc_realloc if needed; in this case *out_size
+ * will also be updated.
+ *
+ * Note that it is an error to pass a NULL pointer for any parameter
+ * of these routines.
+ */
+
+hex_status_t
+hex_encode (void *talloc_ctx, const char *in, char **out,
+            size_t *out_size);
+
+hex_status_t
+hex_decode (void *talloc_ctx, const char *in, char **out,
+            size_t *out_size);
+
+/*
+ * Non-allocating hex decode to decode 's' in-place. The length of the
+ * result is always equal to or shorter than the length of the
+ * original.
+ */
+hex_status_t
+hex_decode_inplace (char *s);
+#endif
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Patch v3 2/6] test/hex-xcode: new test binary
  2012-08-19 13:18 random corpus generator, v3 david
  2012-08-19 13:18 ` [Patch v3 1/6] hex-escape: (en|de)code strings to/from restricted character set david
@ 2012-08-19 13:18 ` david
  2012-08-19 13:18 ` [Patch v3 3/6] test/hex-escaping: new test for hex escaping routines david
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 9+ messages in thread
From: david @ 2012-08-19 13:18 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

This program is used both as a test-bed/unit-tester for
../util/hex-escape.c, and also as a utility in future tests of dump
and restore.
---
 test/.gitignore     |    1 +
 test/Makefile.local |   11 +++++-
 test/basic          |    2 +-
 test/hex-xcode.c    |  103 +++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 115 insertions(+), 2 deletions(-)
 create mode 100644 test/hex-xcode.c

diff --git a/test/.gitignore b/test/.gitignore
index e63c689..be7ab5e 100644
--- a/test/.gitignore
+++ b/test/.gitignore
@@ -3,4 +3,5 @@ corpus.mail
 smtp-dummy
 symbol-test
 arg-test
+hex-xcode
 tmp.*
diff --git a/test/Makefile.local b/test/Makefile.local
index c7f1435..2622185 100644
--- a/test/Makefile.local
+++ b/test/Makefile.local
@@ -13,6 +13,9 @@ smtp_dummy_modules = $(smtp_dummy_srcs:.c=.o)
 $(dir)/arg-test: $(dir)/arg-test.o command-line-arguments.o util/libutil.a
 	$(call quiet,CC) -I. $^ -o $@
 
+$(dir)/hex-xcode: $(dir)/hex-xcode.o command-line-arguments.o util/libutil.a
+	$(call quiet,CC) -I. $^ -o $@ -ltalloc
+
 $(dir)/smtp-dummy: $(smtp_dummy_modules)
 	$(call quiet,CC) $^ -o $@
 
@@ -21,7 +24,12 @@ $(dir)/symbol-test: $(dir)/symbol-test.o
 
 .PHONY: test check
 
-test-binaries: $(dir)/arg-test $(dir)/smtp-dummy $(dir)/symbol-test
+TEST_BINARIES=$(dir)/arg-test \
+	      $(dir)/hex-xcode \
+	      $(dir)/smtp-dummy \
+	      $(dir)/symbol-test
+
+test-binaries: $(TEST_BINARIES)
 
 test:	all test-binaries
 	@${dir}/notmuch-test $(OPTIONS)
@@ -32,4 +40,5 @@ SRCS := $(SRCS) $(smtp_dummy_srcs)
 CLEAN := $(CLEAN) $(dir)/smtp-dummy $(dir)/smtp-dummy.o \
 	 $(dir)/symbol-test $(dir)/symbol-test.o \
 	 $(dir)/arg-test $(dir)/arg-test.o \
+	 $(dir)/hex-xcode $(dir)/hex-xcode.o \
 	 $(dir)/corpus.mail $(dir)/test-results $(dir)/tmp.*
diff --git a/test/basic b/test/basic
index d6aed24..af57026 100755
--- a/test/basic
+++ b/test/basic
@@ -54,7 +54,7 @@ test_begin_subtest 'Ensure that all available tests will be run by notmuch-test'
 eval $(sed -n -e '/^TESTS="$/,/^"$/p' $TEST_DIRECTORY/notmuch-test)
 tests_in_suite=$(for i in $TESTS; do echo $i; done | sort)
 available=$(find "$TEST_DIRECTORY" -maxdepth 1 -type f -executable -printf '%f\n' | \
-    sed -r -e "/^(aggregate-results.sh|notmuch-test|smtp-dummy|test-verbose|symbol-test|arg-test)$/d" | \
+    sed -r -e "/^(aggregate-results.sh|notmuch-test|smtp-dummy|test-verbose|symbol-test|arg-test|hex-xcode)$/d" | \
     sort)
 test_expect_equal "$tests_in_suite" "$available"
 
diff --git a/test/hex-xcode.c b/test/hex-xcode.c
new file mode 100644
index 0000000..eec6541
--- /dev/null
+++ b/test/hex-xcode.c
@@ -0,0 +1,103 @@
+/* No, nothing to to with IDE from Apple Inc.
+   testbed for ../util/hex-escape.c.
+
+   usage:
+   hex-xcode [--direction=(encode|decode)] [--omit-newline] < file
+   hex-xcode [--direction=(encode|decode)] [--omit-newline] arg1 arg2 arg3 ...
+
+ */
+
+#include "notmuch-client.h"
+#include "hex-escape.h"
+#include <assert.h>
+
+
+enum direction {
+    ENCODE,
+    DECODE
+};
+
+static int
+xcode (void *ctx, enum direction dir, char *in, char **buf_p, size_t *size_p)
+{
+    hex_status_t status;
+
+    if (dir == ENCODE)
+	status = hex_encode (ctx, in, buf_p, size_p);
+    else
+	status = hex_decode (ctx, in, buf_p, size_p);
+
+    if (status == HEX_SUCCESS)
+	fputs (*buf_p, stdout);
+
+    return status;
+}
+
+
+int
+main (int argc, char **argv)
+{
+
+
+    enum direction dir = DECODE;
+    int omit_newline = FALSE;
+
+    notmuch_opt_desc_t options[] = {
+	{ NOTMUCH_OPT_KEYWORD, &dir, "direction", 'd',
+	  (notmuch_keyword_t []){ { "encode", ENCODE },
+				  { "decode", DECODE },
+				  { 0, 0 } } },
+	{ NOTMUCH_OPT_BOOLEAN, &omit_newline, "omit-newline", 'n', 0 },
+	{ 0, 0, 0, 0, 0 }
+    };
+
+    int opt_index = parse_arguments (argc, argv, options, 1);
+
+    if (opt_index < 0)
+	exit (1);
+
+    void *ctx = talloc_new (NULL);
+
+    char *line = NULL;
+    size_t line_size;
+    ssize_t line_len;
+
+    char *buffer = NULL;
+    size_t buf_size = 0;
+
+    notmuch_bool_t read_stdin = TRUE;
+
+    for (; opt_index < argc; opt_index++) {
+
+	if (xcode (ctx, dir, argv[opt_index],
+		   &buffer, &buf_size) != HEX_SUCCESS)
+	    return 1;
+
+	if (!omit_newline)
+	    putchar ('\n');
+
+	read_stdin = FALSE;
+    }
+
+    if (!read_stdin)
+	return 0;
+
+    while ((line_len = getline (&line, &line_size, stdin)) != -1) {
+
+	chomp_newline (line);
+
+	if (xcode (ctx, dir, line, &buffer, &buf_size) != HEX_SUCCESS)
+	    return 1;
+
+	if (!omit_newline)
+	    putchar ('\n');
+
+    }
+
+    if (line)
+	free (line);
+
+    talloc_free (ctx);
+
+    return 0;
+}
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Patch v3 3/6] test/hex-escaping: new test for hex escaping routines
  2012-08-19 13:18 random corpus generator, v3 david
  2012-08-19 13:18 ` [Patch v3 1/6] hex-escape: (en|de)code strings to/from restricted character set david
  2012-08-19 13:18 ` [Patch v3 2/6] test/hex-xcode: new test binary david
@ 2012-08-19 13:18 ` david
  2012-08-19 13:18 ` [Patch v3 4/6] test: add database routines for testing david
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 9+ messages in thread
From: david @ 2012-08-19 13:18 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

These are more like unit tests, to (try to) make sure the library
functionality is working before building more complicated things on
top of it.
---
 test/hex-escaping |   26 ++++++++++++++++++++++++++
 test/notmuch-test |    1 +
 2 files changed, 27 insertions(+)
 create mode 100755 test/hex-escaping

diff --git a/test/hex-escaping b/test/hex-escaping
new file mode 100755
index 0000000..f34cc8c
--- /dev/null
+++ b/test/hex-escaping
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+test_description="hex encoding and decoding"
+. ./test-lib.sh
+
+test_begin_subtest "round trip"
+find $TEST_DIRECTORY/corpus -type f -print | sort | xargs cat > EXPECTED
+$TEST_DIRECTORY/hex-xcode --direction=encode < EXPECTED | $TEST_DIRECTORY/hex-xcode --direction=decode > OUTPUT
+test_expect_equal_file OUTPUT EXPECTED
+
+test_begin_subtest "punctuation"
+tag1='comic_swear=$&^%$^%\\//-+$^%$'
+tag_enc1=$($TEST_DIRECTORY/hex-xcode --direction=encode "$tag1")
+test_expect_equal "$tag_enc1" "comic_swear=%24%26%5e%25%24%5e%25%5c%5c%2f%2f-+%24%5e%25%24"
+
+test_begin_subtest "round trip newlines"
+printf 'this\n tag\t has\n spaces\n' > EXPECTED.$test_count
+$TEST_DIRECTORY/hex-xcode --direction=encode  < EXPECTED.$test_count |\
+	$TEST_DIRECTORY/hex-xcode --direction=decode > OUTPUT.$test_count
+test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
+
+test_begin_subtest "round trip 8bit chars"
+echo '%c3%91%c3%a5%c3%b0%c3%a3%c3%a5%c3%a9-%c3%8f%c3%8a' > EXPECTED.$test_count
+$TEST_DIRECTORY/hex-xcode --direction=decode  < EXPECTED.$test_count |\
+	$TEST_DIRECTORY/hex-xcode --direction=encode > OUTPUT.$test_count
+test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
+test_done
diff --git a/test/notmuch-test b/test/notmuch-test
index cc732c3..43b5734 100755
--- a/test/notmuch-test
+++ b/test/notmuch-test
@@ -60,6 +60,7 @@ TESTS="
   emacs-hello
   emacs-show
   missing-headers
+  hex-escaping
 "
 TESTS=${NOTMUCH_TESTS:=$TESTS}
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Patch v3 4/6] test: add database routines for testing.
  2012-08-19 13:18 random corpus generator, v3 david
                   ` (2 preceding siblings ...)
  2012-08-19 13:18 ` [Patch v3 3/6] test/hex-escaping: new test for hex escaping routines david
@ 2012-08-19 13:18 ` david
  2012-08-19 13:18 ` [Patch v3 5/6] test: add generator for random "stub" messages david
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 9+ messages in thread
From: david @ 2012-08-19 13:18 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

Initially, provide a way to create "stub" messages in the notmuch
database without corresponding files.  This is essentially cut and
paste from lib/database.cc. This is a seperate file since we don't
want to export these symbols from libnotmuch or bloat the library with
non-exported code.
---
 test/Makefile.local  |    1 +
 test/database-test.c |   72 ++++++++++++++++++++++++++++++++++++++++++++++++++
 test/database-test.h |   21 +++++++++++++++
 3 files changed, 94 insertions(+)
 create mode 100644 test/database-test.c
 create mode 100644 test/database-test.h

diff --git a/test/Makefile.local b/test/Makefile.local
index 2622185..bb67ded 100644
--- a/test/Makefile.local
+++ b/test/Makefile.local
@@ -41,4 +41,5 @@ CLEAN := $(CLEAN) $(dir)/smtp-dummy $(dir)/smtp-dummy.o \
 	 $(dir)/symbol-test $(dir)/symbol-test.o \
 	 $(dir)/arg-test $(dir)/arg-test.o \
 	 $(dir)/hex-xcode $(dir)/hex-xcode.o \
+	 $(dir)/database-test.o \
 	 $(dir)/corpus.mail $(dir)/test-results $(dir)/tmp.*
diff --git a/test/database-test.c b/test/database-test.c
new file mode 100644
index 0000000..f0f1c8e
--- /dev/null
+++ b/test/database-test.c
@@ -0,0 +1,72 @@
+/*
+ * Database routines intended only for testing, not exported from
+ * library.
+ *
+ * Copyright (c) 2012 David Bremner
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/ .
+ *
+ * Author: David Bremner <david@tethera.net>
+ */
+
+#include "notmuch-private.h"
+#include "database-test.h"
+
+notmuch_status_t
+notmuch_database_add_stub_message (notmuch_database_t *notmuch,
+				   const char *message_id,
+				   const char **tags)
+{
+    const char **tag;
+    notmuch_status_t ret;
+    notmuch_private_status_t private_status;
+    notmuch_message_t *message;
+
+    ret = _notmuch_database_ensure_writable (notmuch);
+    if (ret)
+	return ret;
+
+    message = _notmuch_message_create_for_message_id (notmuch,
+						      message_id,
+						      &private_status);
+    if (message == NULL) {
+	return COERCE_STATUS (private_status,
+			      "Unexpected status value from _notmuch_message_create_for_message_id");
+
+    }
+
+    if (private_status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) {
+	_notmuch_message_add_term (message, "type", "mail");
+    } else {
+	return NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID;
+    }
+
+    if (tags) {
+	ret = notmuch_message_freeze (message);
+	if (ret)
+	    return ret;
+
+	for (tag = tags; *tag; tag++) {
+	    ret = notmuch_message_add_tag (message, *tag);
+	    if (ret)
+		return ret;
+	}
+    }
+
+    ret = notmuch_message_thaw (message);
+    if (ret)
+	return ret;
+
+    return NOTMUCH_STATUS_SUCCESS;
+}
diff --git a/test/database-test.h b/test/database-test.h
new file mode 100644
index 0000000..84f7988
--- /dev/null
+++ b/test/database-test.h
@@ -0,0 +1,21 @@
+#ifndef _DATABASE_TEST_H
+#define _DATABASE_TEST_H
+/* Add a new stub message to the given notmuch database.
+ *
+ * At least the following return values are possible:
+ *
+ * NOTMUCH_STATUS_SUCCESS: Message successfully added to database.
+ *
+ * NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID: Message has the same message
+ *	ID as another message already in the database.
+ *
+ * NOTMUCH_STATUS_READ_ONLY_DATABASE: Database was opened in read-only
+ *	mode so no message can be added.
+ */
+
+notmuch_status_t
+notmuch_database_add_stub_message (notmuch_database_t *database,
+				   const char *message_id,
+				   const char **tag_list);
+
+#endif
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Patch v3 5/6] test: add generator for random "stub" messages
  2012-08-19 13:18 random corpus generator, v3 david
                   ` (3 preceding siblings ...)
  2012-08-19 13:18 ` [Patch v3 4/6] test: add database routines for testing david
@ 2012-08-19 13:18 ` david
  2012-09-08 13:38   ` Tomi Ollila
  2012-08-19 13:18 ` [Patch v3 6/6] test: add broken roundtrip test david
  2012-10-20  4:16 ` random corpus generator, v3 Ethan Glasser-Camp
  6 siblings, 1 reply; 9+ messages in thread
From: david @ 2012-08-19 13:18 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

Initial use case is testing dump and restore, so we only have
message-ids and tags.

The message ID's are nothing like RFC compliant, but it doesn't seem
any harder to roundtrip random UTF-8 strings than RFC-compliant ones.

Tags are UTF-8, even though notmuch is in principle more generous than
that.
---
 test/.gitignore      |    1 +
 test/Makefile.local  |    9 +++
 test/basic           |    2 +-
 test/random-corpus.c |  202 ++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 213 insertions(+), 1 deletion(-)
 create mode 100644 test/random-corpus.c

diff --git a/test/.gitignore b/test/.gitignore
index be7ab5e..1eff7ce 100644
--- a/test/.gitignore
+++ b/test/.gitignore
@@ -4,4 +4,5 @@ smtp-dummy
 symbol-test
 arg-test
 hex-xcode
+random-corpus
 tmp.*
diff --git a/test/Makefile.local b/test/Makefile.local
index bb67ded..ad0fd91 100644
--- a/test/Makefile.local
+++ b/test/Makefile.local
@@ -16,6 +16,13 @@ $(dir)/arg-test: $(dir)/arg-test.o command-line-arguments.o util/libutil.a
 $(dir)/hex-xcode: $(dir)/hex-xcode.o command-line-arguments.o util/libutil.a
 	$(call quiet,CC) -I. $^ -o $@ -ltalloc
 
+random_corpus_deps =  $(dir)/random-corpus.o  $(dir)/database-test.o \
+			notmuch-config.o command-line-arguments.o \
+			lib/libnotmuch.a util/libutil.a
+
+$(dir)/random-corpus: $(random_corpus_deps)
+	$(call quiet,CC) $(CFLAGS_FINAL) $^ -o $@ $(CONFIGURE_LDFLAGS)
+
 $(dir)/smtp-dummy: $(smtp_dummy_modules)
 	$(call quiet,CC) $^ -o $@
 
@@ -26,6 +33,7 @@ $(dir)/symbol-test: $(dir)/symbol-test.o
 
 TEST_BINARIES=$(dir)/arg-test \
 	      $(dir)/hex-xcode \
+	      $(dir)/random-corpus \
 	      $(dir)/smtp-dummy \
 	      $(dir)/symbol-test
 
@@ -42,4 +50,5 @@ CLEAN := $(CLEAN) $(dir)/smtp-dummy $(dir)/smtp-dummy.o \
 	 $(dir)/arg-test $(dir)/arg-test.o \
 	 $(dir)/hex-xcode $(dir)/hex-xcode.o \
 	 $(dir)/database-test.o \
+	 $(dir)/random-corpus $(dir)/random-corpus.o \
 	 $(dir)/corpus.mail $(dir)/test-results $(dir)/tmp.*
diff --git a/test/basic b/test/basic
index af57026..5f8ed7a 100755
--- a/test/basic
+++ b/test/basic
@@ -54,7 +54,7 @@ test_begin_subtest 'Ensure that all available tests will be run by notmuch-test'
 eval $(sed -n -e '/^TESTS="$/,/^"$/p' $TEST_DIRECTORY/notmuch-test)
 tests_in_suite=$(for i in $TESTS; do echo $i; done | sort)
 available=$(find "$TEST_DIRECTORY" -maxdepth 1 -type f -executable -printf '%f\n' | \
-    sed -r -e "/^(aggregate-results.sh|notmuch-test|smtp-dummy|test-verbose|symbol-test|arg-test|hex-xcode)$/d" | \
+    sed -r -e "/^(aggregate-results.sh|notmuch-test|smtp-dummy|test-verbose|symbol-test|arg-test|hex-xcode|random-corpus)$/d" | \
     sort)
 test_expect_equal "$tests_in_suite" "$available"
 
diff --git a/test/random-corpus.c b/test/random-corpus.c
new file mode 100644
index 0000000..8c5b559
--- /dev/null
+++ b/test/random-corpus.c
@@ -0,0 +1,202 @@
+/*
+ * Generate a random corpus of stub messages.
+ *
+ * Initial use case is testing dump and restore, so we only have
+ * message-ids and tags.
+ *
+ * Generated message-id's and tags are intentionally nasty.
+ *
+ * Copyright (c) 2012 David Bremner
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/ .
+ *
+ * Author: David Bremner <david@tethera.net>
+ */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <talloc.h>
+#include <string.h>
+#include <glib.h>
+#include <math.h>
+
+#include "notmuch-client.h"
+#include "command-line-arguments.h"
+#include "database-test.h"
+
+/* Current largest UTF-32 value defined. Note that most of these will
+ * be printed as boxes in most fonts.
+ */
+
+#define GLYPH_MAX 0x10FFFE
+
+static gunichar
+random_unichar ()
+{
+    int start = 1, stop = GLYPH_MAX;
+    int class = random() % 2;
+
+    /*
+     *  Choose about half ascii as test characters, as ascii
+     *  punctation and whitespace is the main cause of problems for
+     *  the (old) restore parser
+    */
+    switch (class) {
+    case 0:
+	/* ascii */
+	start = 0x01;
+	stop = 0x7f;
+	break;
+    case 1:
+	/* the rest of unicode */
+	start = 0x80;
+	stop = GLYPH_MAX;
+    }
+
+    if (start == stop)
+	return start;
+    else
+	return start + (random() % (stop - start + 1));
+}
+
+static char *
+random_utf8_string (void *ctx, size_t char_count)
+{
+
+    gchar *buf = NULL;
+    size_t buf_size = 0;
+
+    size_t offset = 0;
+
+    size_t i;
+
+    buf = talloc_realloc (ctx, NULL, gchar, char_count);
+    buf_size = char_count;
+
+    for (i = 0; i < char_count; i++) {
+	gunichar randomchar;
+	size_t written;
+
+	/* 6 for one glyph, one for null */
+	if (buf_size - offset < 8) {
+	    buf_size += 16;
+	    buf = talloc_realloc (ctx, buf, gchar, buf_size);
+	}
+
+	randomchar = random_unichar();
+
+	written = g_unichar_to_utf8 (randomchar, buf + offset);
+
+	if (written <= 0) {
+	    fprintf (stderr, "error converting to utf8\n");
+	    exit (1);
+	}
+
+	offset += written;
+
+    }
+    buf[offset] = 0;
+    return buf;
+}
+
+
+int
+main (int argc, char **argv)
+{
+
+    void *ctx = talloc_new (NULL);
+
+    char *config_path  = NULL;
+    notmuch_config_t *config;
+    notmuch_database_t *notmuch;
+
+    int num_messages = 500;
+    int max_tags = 10;
+    // leave room for UTF-8 encoding.
+    int tag_len = NOTMUCH_TAG_MAX / 6;
+    // NOTMUCH_MESSAGE_ID_MAX is not exported, so we make a
+    // conservative guess.
+    int message_id_len = (NOTMUCH_TAG_MAX - 20) / 6;
+
+    int seed = 734569;
+
+    notmuch_opt_desc_t options[] = {
+	{ NOTMUCH_OPT_STRING, &config_path, "config-path", 'c', 0 },
+	{ NOTMUCH_OPT_INT, &num_messages, "num-messages", 'n', 0 },
+	{ NOTMUCH_OPT_INT, &max_tags, "max-tags", 'm', 0 },
+	{ NOTMUCH_OPT_INT, &message_id_len, "message-id-len", 'M', 0 },
+	{ NOTMUCH_OPT_INT, &tag_len, "tag-len", 't', 0 },
+	{ NOTMUCH_OPT_INT, &seed, "seed", 's', 0 },
+	{ 0, 0, 0, 0, 0 }
+    };
+
+    int opt_index = parse_arguments (argc, argv, options, 1);
+
+    if (opt_index < 0)
+	exit (1);
+
+    if (config_path == NULL) {
+	fprintf (stderr, "configuration path must be specified");
+	exit (1);
+    }
+
+    config = notmuch_config_open (ctx, config_path, NULL);
+    if (config == NULL)
+	return 1;
+
+    if (notmuch_database_open (notmuch_config_get_database_path (config),
+			       NOTMUCH_DATABASE_MODE_READ_WRITE, &notmuch))
+	return 1;
+
+    srandom (seed);
+
+    int count;
+    for (count = 0; count < num_messages; count++) {
+	int j;
+	int num_tags = random () % (max_tags + 1);
+	int this_mid_len = random () % message_id_len + 1;
+	const char **tag_list;
+	char *mid;
+	notmuch_status_t status;
+
+	do {
+	    mid = random_utf8_string (ctx, this_mid_len);
+
+	    tag_list = talloc_realloc (ctx, NULL, const char *, num_tags + 2);
+
+	    tag_list[0] = "random-corpus";
+
+	    for (j = 0; j < num_tags; j++) {
+		int this_tag_len = random () % tag_len + 1;
+
+		tag_list[j + 1] = random_utf8_string (ctx, this_tag_len);
+	    }
+
+	    tag_list[j + 1] = NULL;
+
+	    status = notmuch_database_add_stub_message (notmuch, mid, tag_list);
+	} while (status == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID);
+
+	if (status != NOTMUCH_STATUS_SUCCESS) {
+	    fprintf (stderr, "error %d adding message", status);
+	    exit (status);
+	}
+    }
+
+    notmuch_database_destroy (notmuch);
+
+    talloc_free (ctx);
+
+    return 0;
+}
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Patch v3 6/6] test: add broken roundtrip test
  2012-08-19 13:18 random corpus generator, v3 david
                   ` (4 preceding siblings ...)
  2012-08-19 13:18 ` [Patch v3 5/6] test: add generator for random "stub" messages david
@ 2012-08-19 13:18 ` david
  2012-10-20  4:16 ` random corpus generator, v3 Ethan Glasser-Camp
  6 siblings, 0 replies; 9+ messages in thread
From: david @ 2012-08-19 13:18 UTC (permalink / raw)
  To: notmuch; +Cc: David Bremner

From: David Bremner <bremner@debian.org>

We demonstrate the current notmuch restore parser being confused by
message-id's and tags containing non alpha numeric characters
(particularly space and parentheses are problematic because they are
not escaped by notmuch dump).

We save the files as hex escaped on disk so that the output from the
failing test will not confuse the terminal emulator of people running
the test.
---
 test/dump-restore |    9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/test/dump-restore b/test/dump-restore
index f25f7cf..218c9b7 100755
--- a/test/dump-restore
+++ b/test/dump-restore
@@ -78,4 +78,13 @@ test_begin_subtest "dump --output=outfile -- from:cworth"
 notmuch dump --output=dump-outfile-dash-inbox.actual -- from:cworth
 test_expect_equal_file dump-cworth.expected dump-outfile-dash-inbox.actual
 
+test_expect_success 'roundtripping random message-ids and tags' \
+    'test_subtest_known_broken &&
+     ${TEST_DIRECTORY}/random-corpus --num-messages=10 --config-path=${NOTMUCH_CONFIG} &&
+     notmuch dump | ${TEST_DIRECTORY}/hex-xcode --direction=encode > EXPECTED.$test_count &&
+     notmuch tag -random-corpus tag:random-corpus &&
+     ${TEST_DIRECTORY}/hex-xcode --direction=decode < EXPECTED.$test_count | notmuch restore 2>/dev/null &&
+     notmuch dump | ${TEST_DIRECTORY}/hex-xcode --direction=encode > OUTPUT.$test_count &&
+     test_cmp EXPECTED.$test_count OUTPUT.$test_count'
+
 test_done
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [Patch v3 5/6] test: add generator for random "stub" messages
  2012-08-19 13:18 ` [Patch v3 5/6] test: add generator for random "stub" messages david
@ 2012-09-08 13:38   ` Tomi Ollila
  0 siblings, 0 replies; 9+ messages in thread
From: Tomi Ollila @ 2012-09-08 13:38 UTC (permalink / raw)
  To: david, notmuch; +Cc: David Bremner

On Sun, Aug 19 2012, david@tethera.net wrote:

> From: David Bremner <bremner@debian.org>
>
> Initial use case is testing dump and restore, so we only have
> message-ids and tags.
>
> The message ID's are nothing like RFC compliant, but it doesn't seem
> any harder to roundtrip random UTF-8 strings than RFC-compliant ones.
>
> Tags are UTF-8, even though notmuch is in principle more generous than
> that.
> ---

Mostly LGTM (the whole series). Few comments inline...

Finally, 6/6 adds known broken test -- when will we see this code
taken into use and the broken test fixed :)

>  test/.gitignore      |    1 +
>  test/Makefile.local  |    9 +++
>  test/basic           |    2 +-
>  test/random-corpus.c |  202 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 213 insertions(+), 1 deletion(-)
>  create mode 100644 test/random-corpus.c

[ ... ]

>  
> diff --git a/test/random-corpus.c b/test/random-corpus.c
> new file mode 100644
> index 0000000..8c5b559
> --- /dev/null
> +++ b/test/random-corpus.c

[ ... ]

> +
> +/* Current largest UTF-32 value defined. Note that most of these will
> + * be printed as boxes in most fonts.
> + */

Should we be talking about UTF-8 valies. UTF-8 (currently has the same
limit).

> +
> +#define GLYPH_MAX 0x10FFFE
> +
> +static gunichar
> +random_unichar ()
> +{
> +    int start = 1, stop = GLYPH_MAX;
> +    int class = random() % 2;
> +
> +    /*
> +     *  Choose about half ascii as test characters, as ascii
> +     *  punctation and whitespace is the main cause of problems for
> +     *  the (old) restore parser
> +    */
> +    switch (class) {
> +    case 0:
> +	/* ascii */
> +	start = 0x01;
> +	stop = 0x7f;
> +	break;
> +    case 1:
> +	/* the rest of unicode */
> +	start = 0x80;
> +	stop = GLYPH_MAX;
> +    }
> +
> +    if (start == stop)
> +	return start;
> +    else
> +	return start + (random() % (stop - start + 1));
> +}
> +
> +static char *
> +random_utf8_string (void *ctx, size_t char_count)
> +{
> +
> +    gchar *buf = NULL;
> +    size_t buf_size = 0;
> +
> +    size_t offset = 0;
> +
> +    size_t i;
> +
> +    buf = talloc_realloc (ctx, NULL, gchar, char_count);
> +    buf_size = char_count;
> +
> +    for (i = 0; i < char_count; i++) {
> +	gunichar randomchar;
> +	size_t written;
> +
> +	/* 6 for one glyph, one for null */
> +	if (buf_size - offset < 8) {
> +	    buf_size += 16;
> +	    buf = talloc_realloc (ctx, buf, gchar, buf_size);

This reallocation will hit many times, as originally there was just
char_count bytes allocated -- this limit will probably get hit before 
halfway the creation of random string (half uses 1 byte, other half
2, 3 or 4 bytes, mostly 4 (even only half of the 4-byte range is used...)

Maybe originally allocating char_count * 2 + 8 and if realloc required
(char_count - i) * 2 + 8... or maybe better, just doing the latter
realloc and replacing first with buf = NULL; buf_size = 0;

Alternatively you could play with random states; calculate size,
reset random state, alloc size + 1 and write chars.

> +	}
> +
> +	randomchar = random_unichar();
> +
> +	written = g_unichar_to_utf8 (randomchar, buf + offset);
> +
> +	if (written <= 0) {
> +	    fprintf (stderr, "error converting to utf8\n");
> +	    exit (1);
> +	}
> +
> +	offset += written;
> +
> +    }

Above there is extra newline. There are a few others in other
files (at least after opening and before closing brace).
Maybe uncrustify your source :)

> +    buf[offset] = 0;
> +    return buf;
> +}
> +

[ ... ]


Tomi

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: random corpus generator, v3
  2012-08-19 13:18 random corpus generator, v3 david
                   ` (5 preceding siblings ...)
  2012-08-19 13:18 ` [Patch v3 6/6] test: add broken roundtrip test david
@ 2012-10-20  4:16 ` Ethan Glasser-Camp
  6 siblings, 0 replies; 9+ messages in thread
From: Ethan Glasser-Camp @ 2012-10-20  4:16 UTC (permalink / raw)
  To: david, notmuch

david@tethera.net writes:

> This obsoletes the series at:
>
>      id:"1344888831-4301-1-git-send-email-bremner@debian.org"
>
> Changes since v2:
>
> - clean up new test-binaries and objects
>
> - remove the "set -o pipefail" leftover from debugging.  Possibly this
>   makes sense as a global setting, but in a seperate patch.
>
> - add hex-escape to test/basic
>
> - rebase against updated master.

Hi! This looks pretty good to me and I am for improving the test
infrastructure.

Some minor problems:

- Patch 2 doesn't apply; neither do patches 4 or 5, presumably due to changes
  that weren't made due to patch 2.

- Commit message discipline: the subject line of patch 4 ends in a
  period. "Seperate" is spelled by most people as "separate", though I
  would encourage you to buck the trend if you are so inclined.

- In patch 4:

> +    if (private_status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) {
> +       _notmuch_message_add_term (message, "type", "mail");
> +    } else {
> +       return NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID;
> +    }

Why not switch the branches? That is, check for private_status !=
NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND and return immediately?

- In patch 5:

> +    for (count = 0; count < num_messages; count++) {
> +	int j;
> +	int num_tags = random () % (max_tags + 1);
> +	int this_mid_len = random () % message_id_len + 1;

This looks odd. I'm pretty sure it's correct, but my brain keeps saying,
"Why are there no parentheses on (message_id_len + 1)?" Maybe just a
comment that message ids must be at least one character long, or the
ranges of values necessary for both of these variables.

Ethan

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2012-10-20  4:16 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-08-19 13:18 random corpus generator, v3 david
2012-08-19 13:18 ` [Patch v3 1/6] hex-escape: (en|de)code strings to/from restricted character set david
2012-08-19 13:18 ` [Patch v3 2/6] test/hex-xcode: new test binary david
2012-08-19 13:18 ` [Patch v3 3/6] test/hex-escaping: new test for hex escaping routines david
2012-08-19 13:18 ` [Patch v3 4/6] test: add database routines for testing david
2012-08-19 13:18 ` [Patch v3 5/6] test: add generator for random "stub" messages david
2012-09-08 13:38   ` Tomi Ollila
2012-08-19 13:18 ` [Patch v3 6/6] test: add broken roundtrip test david
2012-10-20  4:16 ` random corpus generator, v3 Ethan Glasser-Camp

Code repositories for project(s) associated with this public inbox

	https://yhetil.org/notmuch.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).