* v7 gzip dump restore patches @ 2014-04-05 15:43 David Bremner 2014-04-05 15:43 ` [Patch v7 1/6] dump: support gzipped and atomic output David Bremner ` (7 more replies) 0 siblings, 8 replies; 12+ messages in thread From: David Bremner @ 2014-04-05 15:43 UTC (permalink / raw) To: notmuch Supercedes id:1396554083-3892-2-git-send-email-david@tethera.net - adds new analogues of strerror - util_error_string - gz_error_string Interdiff: diff --git a/configure b/configure index 1d624f7..83b4af7 100755 --- a/configure +++ b/configure @@ -509,7 +509,7 @@ EOF echo " http://xapian.org/" fi if [ $have_zlib -eq 0 ]; then - echo " zlib library (including development files such as headers)" + echo " zlib library (>= version 1.2.5.2, including development files such as headers)" echo " http://zlib.net/" echo fi diff --git a/notmuch-dump.c b/notmuch-dump.c index 2a7252a..2849eab 100644 --- a/notmuch-dump.c +++ b/notmuch-dump.c @@ -127,7 +127,7 @@ notmuch_database_dump (notmuch_database_t *notmuch, dump_format_t output_format, notmuch_bool_t gzip_output) { - gzFile output; + gzFile output = NULL; const char *mode = gzip_output ? "w9" : "wT"; const char *name_for_error = output_file_name ? output_file_name : "stdout"; @@ -178,7 +178,10 @@ notmuch_database_dump (notmuch_database_t *notmuch, } if (gzclose_w (output) != Z_OK) { + fprintf (stderr, "Error closing %s: %s\n", name_for_error, + gzerror (output, NULL)); ret = EXIT_FAILURE; + output = NULL; goto DONE; } @@ -192,6 +195,9 @@ notmuch_database_dump (notmuch_database_t *notmuch, } DONE: + if (ret != EXIT_SUCCESS && output) + (void) gzclose_w (output); + if (ret != EXIT_SUCCESS && output_file_name) (void) unlink (tempname); diff --git a/notmuch-restore.c b/notmuch-restore.c index eb5b7b2..7abee0a 100644 --- a/notmuch-restore.c +++ b/notmuch-restore.c @@ -129,7 +129,8 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[]) tag_op_list_t *tag_ops; char *input_file_name = NULL; - gzFile input; + const char *name_for_error = NULL; + gzFile input = NULL; char *line = NULL; void *line_ctx = NULL; ssize_t line_len; @@ -157,19 +158,26 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[]) }; opt_index = parse_arguments (argc, argv, options, 1); - if (opt_index < 0) - return EXIT_FAILURE; + if (opt_index < 0) { + ret = EXIT_FAILURE; + goto DONE; + } + + name_for_error = input_file_name ? input_file_name : "stdin"; if (! accumulate) flags |= TAG_FLAG_REMOVE_ALL; + errno = 0; if (input_file_name) input = gzopen (input_file_name, "r"); else { int infd = dup (STDIN_FILENO); if (infd < 0) { - fprintf (stderr, "Error duping stdin\n"); - return EXIT_FAILURE; + fprintf (stderr, "Error duping stdin: %s\n", + strerror (errno)); + ret = EXIT_FAILURE; + goto DONE; } input = gzdopen (infd, "r"); if (! input) @@ -178,19 +186,22 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[]) if (input == NULL) { fprintf (stderr, "Error opening %s for (gzip) reading: %s\n", - input_file_name ? input_file_name : "stdin", strerror (errno)); - return EXIT_FAILURE; + name_for_error, strerror (errno)); + ret = EXIT_FAILURE; + goto DONE; } if (opt_index < argc) { fprintf (stderr, "Unused positional parameter: %s\n", argv[opt_index]); - return EXIT_FAILURE; + ret = EXIT_FAILURE; + goto DONE; } tag_ops = tag_op_list_create (config); if (tag_ops == NULL) { fprintf (stderr, "Out of memory.\n"); - return EXIT_FAILURE; + ret = EXIT_FAILURE; + goto DONE; } do { @@ -199,12 +210,17 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[]) status = gz_getline (line_ctx, &line, &line_len, input); /* empty input file not considered an error */ - if (status == UTIL_EOF) - return EXIT_SUCCESS; - - if (status) - return EXIT_FAILURE; + if (status == UTIL_EOF) { + ret = EXIT_SUCCESS; + goto DONE; + } + if (status) { + fprintf (stderr, "Error reading (gzipped) input: %s\n", + gz_error_string(status, input)); + ret = EXIT_FAILURE; + goto DONE; + } } while ((line_len == 0) || (line[0] == '#') || /* the cast is safe because we checked about for line_len < 0 */ @@ -269,17 +285,37 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[]) if (ret) break; - } while (gz_getline (line_ctx, &line, &line_len, input) == UTIL_SUCCESS); + } while (! (ret = gz_getline (line_ctx, &line, &line_len, input))); + - if (line_ctx != NULL) - talloc_free (line_ctx); + /* EOF is normal loop termination condition, UTIL_SUCCESS is + * impossible here */ + if (ret == UTIL_EOF) { + ret = UTIL_SUCCESS; + } else { + fprintf (stderr, "Error reading (gzipped) input: %s\n", + gz_error_string (ret, input)); + } + + /* currently this should not be after DONE: since we don't + * know if the xregcomp was reached + */ if (input_format == DUMP_FORMAT_SUP) regfree (®ex); - notmuch_database_destroy (notmuch); + DONE: + if (line_ctx != NULL) + talloc_free (line_ctx); - gzclose_r (input); + if (notmuch) + notmuch_database_destroy (notmuch); + + if (input && gzclose_r (input)) { + fprintf (stderr, "Error closing %s: %s\n", + name_for_error, gzerror (input, NULL)); + ret = EXIT_FAILURE; + } return ret ? EXIT_FAILURE : EXIT_SUCCESS; } diff --git a/util/Makefile.local b/util/Makefile.local index e2a5b65..905f237 100644 --- a/util/Makefile.local +++ b/util/Makefile.local @@ -4,7 +4,8 @@ dir := util extra_cflags += -I$(srcdir)/$(dir) libutil_c_srcs := $(dir)/xutil.c $(dir)/error_util.c $(dir)/hex-escape.c \ - $(dir)/string-util.c $(dir)/talloc-extra.c $(dir)/zlib-extra.c + $(dir)/string-util.c $(dir)/talloc-extra.c $(dir)/zlib-extra.c \ + $(dir)/util.c libutil_modules := $(libutil_c_srcs:.c=.o) diff --git a/util/util.c b/util/util.c new file mode 100644 index 0000000..3bd305d --- /dev/null +++ b/util/util.c @@ -0,0 +1,24 @@ +#include "util.h" +#include "error_util.h" +#include <string.h> +#include <errno.h> + +const char * +util_error_string (util_status_t errnum) +{ + switch (errnum) { + case UTIL_SUCCESS: + return "none"; + case UTIL_OUT_OF_MEMORY: + return "out of memory"; + case UTIL_EOF: + return "end of file"; + case UTIL_ERRNO: + return strerror (errno); + case UTIL_GZERROR: + /* we lack context to be more informative here */ + return "zlib error"; + default: + INTERNAL_ERROR("unexpected error status %d", errnum); + } +} diff --git a/util/util.h b/util/util.h index 8663cfc..d12fadb 100644 --- a/util/util.h +++ b/util/util.h @@ -2,11 +2,28 @@ #define _UTIL_H typedef enum util_status { + /** + * No error occurred. + */ UTIL_SUCCESS = 0, - UTIL_ERROR = 1, + /** + * Out of memory. + */ UTIL_OUT_OF_MEMORY, + /** + * End of stream reached while attempting to read. + */ UTIL_EOF, - UTIL_FILE, + /** + * Low level error occured, consult errno. + */ + UTIL_ERRNO, + /** + * Zlib error occured, call gzerror for details. + */ + UTIL_GZERROR } util_status_t; +const char * +util_error_string (util_status_t status); #endif diff --git a/util/zlib-extra.c b/util/zlib-extra.c index 922ab52..cb34845 100644 --- a/util/zlib-extra.c +++ b/util/zlib-extra.c @@ -54,9 +54,9 @@ gz_getline (void *talloc_ctx, char **bufptr, ssize_t *bytes_read, gzFile stream) else goto SUCCESS; case Z_ERRNO: - return UTIL_FILE; + return UTIL_ERRNO; default: - return UTIL_ERROR; + return UTIL_GZERROR; } } @@ -75,3 +75,11 @@ gz_getline (void *talloc_ctx, char **bufptr, ssize_t *bytes_read, gzFile stream) *bytes_read = offset; return UTIL_SUCCESS; } + +const char *gz_error_string (util_status_t status, gzFile file) +{ + if (status == UTIL_GZERROR) + return gzerror (file, NULL); + else + return util_error_string (status); +} diff --git a/util/zlib-extra.h b/util/zlib-extra.h index ed67115..dbaa0b9 100644 --- a/util/zlib-extra.h +++ b/util/zlib-extra.h @@ -4,8 +4,22 @@ #include "util.h" #include <zlib.h> -/* Like getline, but read from a gzFile. Allocation is with talloc */ +/* Like getline, but read from a gzFile. Allocation is with talloc. + * Returns: + * + * UTIL_SUCCESS, UTIL_OUT_OF_MEMORY, UTIL_ERRNO, UTIL_GZERROR + * Consult util.h for description + * + * UTIL_EOF End of file encountered before + * any characters read + */ util_status_t gz_getline (void *ctx, char **lineptr, ssize_t *bytes_read, gzFile stream); +/* return a suitable error string based on the return status + * from gz_readline + */ + +const char * +gz_error_string (util_status_t status, gzFile stream); #endif ^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Patch v7 1/6] dump: support gzipped and atomic output 2014-04-05 15:43 v7 gzip dump restore patches David Bremner @ 2014-04-05 15:43 ` David Bremner 2014-04-05 15:43 ` [Patch v7 2/6] util: add gz_readline David Bremner ` (6 subsequent siblings) 7 siblings, 0 replies; 12+ messages in thread From: David Bremner @ 2014-04-05 15:43 UTC (permalink / raw) To: notmuch The main goal is to support gzipped output for future internal calls (e.g. from notmuch-new) to notmuch_database_dump. The additional dependency is not very heavy since xapian already pulls in zlib. We want the dump to be "atomic", in the sense that after running the dump file is either present and complete, or not present. This avoids certain classes of mishaps involving overwriting a good backup with a bad or partial one. --- INSTALL | 20 +++++++-- Makefile.local | 2 +- configure | 28 +++++++++++-- doc/man1/notmuch-dump.rst | 3 ++ notmuch-client.h | 4 +- notmuch-dump.c | 101 +++++++++++++++++++++++++++++++++++++--------- test/T240-dump-restore.sh | 12 ++++++ 7 files changed, 142 insertions(+), 28 deletions(-) diff --git a/INSTALL b/INSTALL index 690b0ef..b543c50 100644 --- a/INSTALL +++ b/INSTALL @@ -20,8 +20,8 @@ configure stage. Dependencies ------------ -Notmuch depends on three libraries: Xapian, GMime 2.4 or 2.6, and -Talloc which are each described below: +Notmuch depends on four libraries: Xapian, GMime 2.4 or 2.6, +Talloc, and zlib which are each described below: Xapian ------ @@ -60,6 +60,18 @@ Talloc which are each described below: Talloc is available from http://talloc.samba.org/ + zlib + ---- + + zlib is an extremely popular compression library. It is used + by Xapian, so if you installed that you will already have + zlib. You may need to install the zlib headers separately. + + Notmuch needs the transparent write feature of zlib introduced + in version 1.2.5.2 (Dec. 2011). + + zlib is available from http://zlib.net + Building Documentation ---------------------- @@ -79,11 +91,11 @@ dependencies with a simple simple command line. For example: For Debian and similar: - sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev python-sphinx + sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev zlib1g-dev python-sphinx For Fedora and similar: - sudo yum install xapian-core-devel gmime-devel libtalloc-devel python-sphinx + sudo yum install xapian-core-devel gmime-devel libtalloc-devel zlib-devel python-sphinx On other systems, a similar command can be used, but the details of the package names may be different. diff --git a/Makefile.local b/Makefile.local index cb7b106..e5a20a7 100644 --- a/Makefile.local +++ b/Makefile.local @@ -41,7 +41,7 @@ PV_FILE=bindings/python/notmuch/version.py # Smash together user's values with our extra values FINAL_CFLAGS = -DNOTMUCH_VERSION=$(VERSION) $(CPPFLAGS) $(CFLAGS) $(WARN_CFLAGS) $(extra_cflags) $(CONFIGURE_CFLAGS) FINAL_CXXFLAGS = $(CPPFLAGS) $(CXXFLAGS) $(WARN_CXXFLAGS) $(extra_cflags) $(extra_cxxflags) $(CONFIGURE_CXXFLAGS) -FINAL_NOTMUCH_LDFLAGS = $(LDFLAGS) -Lutil -lutil -Llib -lnotmuch $(AS_NEEDED_LDFLAGS) $(GMIME_LDFLAGS) $(TALLOC_LDFLAGS) +FINAL_NOTMUCH_LDFLAGS = $(LDFLAGS) -Lutil -lutil -Llib -lnotmuch $(AS_NEEDED_LDFLAGS) $(GMIME_LDFLAGS) $(TALLOC_LDFLAGS) $(ZLIB_LDFLAGS) FINAL_NOTMUCH_LINKER = CC ifneq ($(LINKER_RESOLVES_LIBRARY_DEPENDENCIES),1) FINAL_NOTMUCH_LDFLAGS += $(CONFIGURE_LDFLAGS) diff --git a/configure b/configure index 1d430b9..83b4af7 100755 --- a/configure +++ b/configure @@ -340,6 +340,18 @@ else errors=$((errors + 1)) fi +printf "Checking for zlib (>= 1.2.5.2)... " +have_zlib=0 +if pkg-config --atleast-version=1.2.5.2 zlib; then + printf "Yes.\n" + have_zlib=1 + zlib_cflags=$(pkg-config --cflags zlib) + zlib_ldflags=$(pkg-config --libs zlib) +else + printf "No.\n" + errors=$((errors + 1)) +fi + printf "Checking for talloc development files... " if pkg-config --exists talloc; then printf "Yes.\n" @@ -496,6 +508,11 @@ EOF echo " Xapian library (including development files such as headers)" echo " http://xapian.org/" fi + if [ $have_zlib -eq 0 ]; then + echo " zlib library (>= version 1.2.5.2, including development files such as headers)" + echo " http://zlib.net/" + echo + fi if [ $have_gmime -eq 0 ]; then echo " Either GMime 2.4 library" $GMIME_24_VERSION_CTR "or GMime 2.6 library" $GMIME_26_VERSION_CTR echo " (including development files such as headers)" @@ -519,11 +536,11 @@ case a simple command will install everything you need. For example: On Debian and similar systems: - sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev + sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev zlib1g-dev Or on Fedora and similar systems: - sudo yum install xapian-core-devel gmime-devel libtalloc-devel + sudo yum install xapian-core-devel gmime-devel libtalloc-devel zlib-devel On other systems, similar commands can be used, but the details of the package names may be different. @@ -844,6 +861,10 @@ XAPIAN_LDFLAGS = ${xapian_ldflags} GMIME_CFLAGS = ${gmime_cflags} GMIME_LDFLAGS = ${gmime_ldflags} +# Flags needed to compile and link against zlib +ZLIB_CFLAGS = ${zlib_cflags} +ZLIB_LDFLAGS = ${zlib_ldflags} + # Flags needed to compile and link against talloc TALLOC_CFLAGS = ${talloc_cflags} TALLOC_LDFLAGS = ${talloc_ldflags} @@ -882,6 +903,7 @@ CONFIGURE_CFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS) \\ -DUTIL_BYTE_ORDER=\$(UTIL_BYTE_ORDER) CONFIGURE_CXXFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS) \\ + \$(ZLIB_CFLAGS) \\ \$(TALLOC_CFLAGS) -DHAVE_VALGRIND=\$(HAVE_VALGRIND) \\ \$(VALGRIND_CFLAGS) \$(XAPIAN_CXXFLAGS) \\ -DHAVE_STRCASESTR=\$(HAVE_STRCASESTR) \\ @@ -892,5 +914,5 @@ CONFIGURE_CXXFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS) \\ -DHAVE_XAPIAN_COMPACT=\$(HAVE_XAPIAN_COMPACT) \\ -DUTIL_BYTE_ORDER=\$(UTIL_BYTE_ORDER) -CONFIGURE_LDFLAGS = \$(GMIME_LDFLAGS) \$(TALLOC_LDFLAGS) \$(XAPIAN_LDFLAGS) +CONFIGURE_LDFLAGS = \$(GMIME_LDFLAGS) \$(TALLOC_LDFLAGS) \$(ZLIB_LDFLAGS) \$(XAPIAN_LDFLAGS) EOF diff --git a/doc/man1/notmuch-dump.rst b/doc/man1/notmuch-dump.rst index 17d1da5..d94cb4f 100644 --- a/doc/man1/notmuch-dump.rst +++ b/doc/man1/notmuch-dump.rst @@ -19,6 +19,9 @@ recreated from the messages themselves. The output of notmuch dump is therefore the only critical thing to backup (and much more friendly to incremental backup than the native database files.) +``--gzip`` + Compress the output in a format compatible with **gzip(1)**. + ``--format=(sup|batch-tag)`` Notmuch restore supports two plain text dump formats, both with one message-id per line, followed by a list of tags. diff --git a/notmuch-client.h b/notmuch-client.h index d110648..e1efbe0 100644 --- a/notmuch-client.h +++ b/notmuch-client.h @@ -450,7 +450,9 @@ typedef enum dump_formats { int notmuch_database_dump (notmuch_database_t *notmuch, const char *output_file_name, - const char *query_str, dump_format_t output_format); + const char *query_str, + dump_format_t output_format, + notmuch_bool_t gzip_output); #include "command-line-arguments.h" #endif diff --git a/notmuch-dump.c b/notmuch-dump.c index 21702d7..2849eab 100644 --- a/notmuch-dump.c +++ b/notmuch-dump.c @@ -21,9 +21,11 @@ #include "notmuch-client.h" #include "hex-escape.h" #include "string-util.h" +#include <zlib.h> + static int -database_dump_file (notmuch_database_t *notmuch, FILE *output, +database_dump_file (notmuch_database_t *notmuch, gzFile output, const char *query_str, int output_format) { notmuch_query_t *query; @@ -69,7 +71,7 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output, } if (output_format == DUMP_FORMAT_SUP) { - fprintf (output, "%s (", message_id); + gzprintf (output, "%s (", message_id); } for (tags = notmuch_message_get_tags (message); @@ -78,12 +80,12 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output, const char *tag_str = notmuch_tags_get (tags); if (! first) - fputs (" ", output); + gzputs (output, " "); first = 0; if (output_format == DUMP_FORMAT_SUP) { - fputs (tag_str, output); + gzputs (output, tag_str); } else { if (hex_encode (notmuch, tag_str, &buffer, &buffer_size) != HEX_SUCCESS) { @@ -91,12 +93,12 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output, tag_str); return EXIT_FAILURE; } - fprintf (output, "+%s", buffer); + gzprintf (output, "+%s", buffer); } } if (output_format == DUMP_FORMAT_SUP) { - fputs (")\n", output); + gzputs (output, ")\n"); } else { if (make_boolean_term (notmuch, "id", message_id, &buffer, &buffer_size)) { @@ -104,7 +106,7 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output, message_id, strerror (errno)); return EXIT_FAILURE; } - fprintf (output, " -- %s\n", buffer); + gzprintf (output, " -- %s\n", buffer); } notmuch_message_destroy (message); @@ -121,24 +123,83 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output, int notmuch_database_dump (notmuch_database_t *notmuch, const char *output_file_name, - const char *query_str, dump_format_t output_format) + const char *query_str, + dump_format_t output_format, + notmuch_bool_t gzip_output) { - FILE *output = stdout; - int ret; + gzFile output = NULL; + const char *mode = gzip_output ? "w9" : "wT"; + const char *name_for_error = output_file_name ? output_file_name : "stdout"; + + char *tempname = NULL; + int outfd = -1; + + int ret = -1; if (output_file_name) { - output = fopen (output_file_name, "w"); - if (output == NULL) { - fprintf (stderr, "Error opening %s for writing: %s\n", - output_file_name, strerror (errno)); - return EXIT_FAILURE; - } + tempname = talloc_asprintf (notmuch, "%s.XXXXXX", output_file_name); + outfd = mkstemp (tempname); + } else { + outfd = dup (STDOUT_FILENO); + } + + if (outfd < 0) { + fprintf (stderr, "Bad output file %s\n", name_for_error); + goto DONE; + } + + output = gzdopen (outfd, mode); + + if (output == NULL) { + fprintf (stderr, "Error opening %s for (gzip) writing: %s\n", + name_for_error, strerror (errno)); + if (close (outfd)) + fprintf (stderr, "Error closing %s during shutdown: %s\n", + name_for_error, strerror (errno)); + goto DONE; } ret = database_dump_file (notmuch, output, query_str, output_format); + if (ret) goto DONE; + + ret = gzflush (output, Z_FINISH); + if (ret) { + fprintf (stderr, "Error flushing output: %s\n", gzerror (output, NULL)); + goto DONE; + } + + if (output_file_name) { + ret = fdatasync (outfd); + if (ret) { + fprintf (stderr, "Error syncing %s to disk: %s\n", + name_for_error, strerror (errno)); + goto DONE; + } + } + + if (gzclose_w (output) != Z_OK) { + fprintf (stderr, "Error closing %s: %s\n", name_for_error, + gzerror (output, NULL)); + ret = EXIT_FAILURE; + output = NULL; + goto DONE; + } + + if (output_file_name) { + ret = rename (tempname, output_file_name); + if (ret) { + fprintf (stderr, "Error renaming %s to %s: %s\n", + tempname, output_file_name, strerror (errno)); + goto DONE; + } + + } + DONE: + if (ret != EXIT_SUCCESS && output) + (void) gzclose_w (output); - if (output != stdout) - fclose (output); + if (ret != EXIT_SUCCESS && output_file_name) + (void) unlink (tempname); return ret; } @@ -158,6 +219,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[]) int opt_index; int output_format = DUMP_FORMAT_BATCH_TAG; + notmuch_bool_t gzip_output = 0; notmuch_opt_desc_t options[] = { { NOTMUCH_OPT_KEYWORD, &output_format, "format", 'f', @@ -165,6 +227,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[]) { "batch-tag", DUMP_FORMAT_BATCH_TAG }, { 0, 0 } } }, { NOTMUCH_OPT_STRING, &output_file_name, "output", 'o', 0 }, + { NOTMUCH_OPT_BOOLEAN, &gzip_output, "gzip", 'z', 0 }, { 0, 0, 0, 0, 0 } }; @@ -181,7 +244,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[]) } ret = notmuch_database_dump (notmuch, output_file_name, query_str, - output_format); + output_format, gzip_output); notmuch_database_destroy (notmuch); diff --git a/test/T240-dump-restore.sh b/test/T240-dump-restore.sh index 0004438..d79aca8 100755 --- a/test/T240-dump-restore.sh +++ b/test/T240-dump-restore.sh @@ -68,6 +68,18 @@ test_begin_subtest "dump --output=outfile --" notmuch dump --output=dump-1-arg-dash.actual -- test_expect_equal_file dump.expected dump-1-arg-dash.actual +# gzipped output + +test_begin_subtest "dump --gzip" +notmuch dump --gzip > dump-gzip.gz +gunzip dump-gzip.gz +test_expect_equal_file dump.expected dump-gzip + +test_begin_subtest "dump --gzip --output=outfile" +notmuch dump --gzip --output=dump-gzip-outfile.gz +gunzip dump-gzip-outfile.gz +test_expect_equal_file dump.expected dump-gzip-outfile + # Note, we assume all messages from cworth have a message-id # containing cworth.org -- 1.9.0 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Patch v7 2/6] util: add gz_readline 2014-04-05 15:43 v7 gzip dump restore patches David Bremner 2014-04-05 15:43 ` [Patch v7 1/6] dump: support gzipped and atomic output David Bremner @ 2014-04-05 15:43 ` David Bremner 2014-04-06 10:21 ` Tomi Ollila 2014-04-05 15:43 ` [Patch v7 3/6] test: restore with missing final newline David Bremner ` (5 subsequent siblings) 7 siblings, 1 reply; 12+ messages in thread From: David Bremner @ 2014-04-05 15:43 UTC (permalink / raw) To: notmuch The idea is to provide a more or less drop in replacement for readline to read from zlib/gzip streams. Take the opportunity to replace malloc with talloc. --- util/Makefile.local | 3 +- util/util.c | 24 +++++++++++++++ util/util.h | 29 ++++++++++++++++++ util/zlib-extra.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++ util/zlib-extra.h | 25 ++++++++++++++++ 5 files changed, 165 insertions(+), 1 deletion(-) create mode 100644 util/util.c create mode 100644 util/util.h create mode 100644 util/zlib-extra.c create mode 100644 util/zlib-extra.h diff --git a/util/Makefile.local b/util/Makefile.local index 29c0ce6..905f237 100644 --- a/util/Makefile.local +++ b/util/Makefile.local @@ -4,7 +4,8 @@ dir := util extra_cflags += -I$(srcdir)/$(dir) libutil_c_srcs := $(dir)/xutil.c $(dir)/error_util.c $(dir)/hex-escape.c \ - $(dir)/string-util.c $(dir)/talloc-extra.c + $(dir)/string-util.c $(dir)/talloc-extra.c $(dir)/zlib-extra.c \ + $(dir)/util.c libutil_modules := $(libutil_c_srcs:.c=.o) diff --git a/util/util.c b/util/util.c new file mode 100644 index 0000000..3bd305d --- /dev/null +++ b/util/util.c @@ -0,0 +1,24 @@ +#include "util.h" +#include "error_util.h" +#include <string.h> +#include <errno.h> + +const char * +util_error_string (util_status_t errnum) +{ + switch (errnum) { + case UTIL_SUCCESS: + return "none"; + case UTIL_OUT_OF_MEMORY: + return "out of memory"; + case UTIL_EOF: + return "end of file"; + case UTIL_ERRNO: + return strerror (errno); + case UTIL_GZERROR: + /* we lack context to be more informative here */ + return "zlib error"; + default: + INTERNAL_ERROR("unexpected error status %d", errnum); + } +} diff --git a/util/util.h b/util/util.h new file mode 100644 index 0000000..d12fadb --- /dev/null +++ b/util/util.h @@ -0,0 +1,29 @@ +#ifndef _UTIL_H +#define _UTIL_H + +typedef enum util_status { + /** + * No error occurred. + */ + UTIL_SUCCESS = 0, + /** + * Out of memory. + */ + UTIL_OUT_OF_MEMORY, + /** + * End of stream reached while attempting to read. + */ + UTIL_EOF, + /** + * Low level error occured, consult errno. + */ + UTIL_ERRNO, + /** + * Zlib error occured, call gzerror for details. + */ + UTIL_GZERROR +} util_status_t; + +const char * +util_error_string (util_status_t status); +#endif diff --git a/util/zlib-extra.c b/util/zlib-extra.c new file mode 100644 index 0000000..cb34845 --- /dev/null +++ b/util/zlib-extra.c @@ -0,0 +1,85 @@ +/* zlib-extra.c - Extra or enhanced routines for compressed I/O. + * + * Copyright (c) 2014 David Bremner + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/ . + * + * Author: David Bremner <david@tethera.net> + */ + +#include "zlib-extra.h" +#include <talloc.h> +#include <stdio.h> +#include <string.h> + +/* mimic POSIX/glibc getline, but on a zlib gzFile stream, and using talloc */ +util_status_t +gz_getline (void *talloc_ctx, char **bufptr, ssize_t *bytes_read, gzFile stream) +{ + char *buf = *bufptr; + unsigned int len; + size_t offset = 0; + + if (buf) { + len = talloc_array_length (buf); + } else { + /* same as getdelim from gnulib */ + len = 120; + buf = talloc_array (talloc_ctx, char, len); + if (buf == NULL) + return UTIL_OUT_OF_MEMORY; + } + + while (1) { + if (! gzgets (stream, buf + offset, len - offset)) { + /* Null indicates EOF or error */ + int zlib_status = 0; + (void) gzerror (stream, &zlib_status); + switch (zlib_status) { + case Z_OK: + /* no data read before EOF */ + if (offset == 0) + return UTIL_EOF; + else + goto SUCCESS; + case Z_ERRNO: + return UTIL_ERRNO; + default: + return UTIL_GZERROR; + } + } + + offset += strlen (buf + offset); + + if (buf[offset - 1] == '\n') + goto SUCCESS; + + len *= 2; + buf = talloc_realloc (talloc_ctx, buf, char, len); + if (buf == NULL) + return UTIL_OUT_OF_MEMORY; + } + SUCCESS: + *bufptr = buf; + *bytes_read = offset; + return UTIL_SUCCESS; +} + +const char *gz_error_string (util_status_t status, gzFile file) +{ + if (status == UTIL_GZERROR) + return gzerror (file, NULL); + else + return util_error_string (status); +} diff --git a/util/zlib-extra.h b/util/zlib-extra.h new file mode 100644 index 0000000..dbaa0b9 --- /dev/null +++ b/util/zlib-extra.h @@ -0,0 +1,25 @@ +#ifndef _ZLIB_EXTRA_H +#define _ZLIB_EXTRA_H + +#include "util.h" +#include <zlib.h> + +/* Like getline, but read from a gzFile. Allocation is with talloc. + * Returns: + * + * UTIL_SUCCESS, UTIL_OUT_OF_MEMORY, UTIL_ERRNO, UTIL_GZERROR + * Consult util.h for description + * + * UTIL_EOF End of file encountered before + * any characters read + */ +util_status_t +gz_getline (void *ctx, char **lineptr, ssize_t *bytes_read, gzFile stream); + +/* return a suitable error string based on the return status + * from gz_readline + */ + +const char * +gz_error_string (util_status_t status, gzFile stream); +#endif -- 1.9.0 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [Patch v7 2/6] util: add gz_readline 2014-04-05 15:43 ` [Patch v7 2/6] util: add gz_readline David Bremner @ 2014-04-06 10:21 ` Tomi Ollila 0 siblings, 0 replies; 12+ messages in thread From: Tomi Ollila @ 2014-04-06 10:21 UTC (permalink / raw) To: David Bremner, notmuch On Sat, Apr 05 2014, David Bremner <david@tethera.net> wrote: > The idea is to provide a more or less drop in replacement for readline > to read from zlib/gzip streams. Take the opportunity to replace > malloc with talloc. > --- > util/Makefile.local | 3 +- > util/util.c | 24 +++++++++++++++ > util/util.h | 29 ++++++++++++++++++ > util/zlib-extra.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++ > util/zlib-extra.h | 25 ++++++++++++++++ > 5 files changed, 165 insertions(+), 1 deletion(-) > create mode 100644 util/util.c > create mode 100644 util/util.h > create mode 100644 util/zlib-extra.c > create mode 100644 util/zlib-extra.h > > diff --git a/util/Makefile.local b/util/Makefile.local > index 29c0ce6..905f237 100644 > --- a/util/Makefile.local > +++ b/util/Makefile.local > @@ -4,7 +4,8 @@ dir := util > extra_cflags += -I$(srcdir)/$(dir) > > libutil_c_srcs := $(dir)/xutil.c $(dir)/error_util.c $(dir)/hex-escape.c \ > - $(dir)/string-util.c $(dir)/talloc-extra.c > + $(dir)/string-util.c $(dir)/talloc-extra.c $(dir)/zlib-extra.c \ > + $(dir)/util.c > > libutil_modules := $(libutil_c_srcs:.c=.o) > > diff --git a/util/util.c b/util/util.c > new file mode 100644 > index 0000000..3bd305d > --- /dev/null > +++ b/util/util.c > @@ -0,0 +1,24 @@ > +#include "util.h" > +#include "error_util.h" > +#include <string.h> > +#include <errno.h> > + > +const char * > +util_error_string (util_status_t errnum) > +{ > + switch (errnum) { > + case UTIL_SUCCESS: > + return "none"; The series gets OK from me. This "none" could be amended to "success" to be 'consistent' with libc strerror(0) -- which returns 'Success'. Tomi > + case UTIL_OUT_OF_MEMORY: > + return "out of memory"; > + case UTIL_EOF: > + return "end of file"; > + case UTIL_ERRNO: > + return strerror (errno); > + case UTIL_GZERROR: > + /* we lack context to be more informative here */ > + return "zlib error"; > + default: > + INTERNAL_ERROR("unexpected error status %d", errnum); > + } > +} > diff --git a/util/util.h b/util/util.h > new file mode 100644 > index 0000000..d12fadb > --- /dev/null > +++ b/util/util.h > @@ -0,0 +1,29 @@ > +#ifndef _UTIL_H > +#define _UTIL_H > + > +typedef enum util_status { > + /** > + * No error occurred. > + */ > + UTIL_SUCCESS = 0, > + /** > + * Out of memory. > + */ > + UTIL_OUT_OF_MEMORY, > + /** > + * End of stream reached while attempting to read. > + */ > + UTIL_EOF, > + /** > + * Low level error occured, consult errno. > + */ > + UTIL_ERRNO, > + /** > + * Zlib error occured, call gzerror for details. > + */ > + UTIL_GZERROR > +} util_status_t; > + > +const char * > +util_error_string (util_status_t status); > +#endif > diff --git a/util/zlib-extra.c b/util/zlib-extra.c > new file mode 100644 > index 0000000..cb34845 > --- /dev/null > +++ b/util/zlib-extra.c > @@ -0,0 +1,85 @@ > +/* zlib-extra.c - Extra or enhanced routines for compressed I/O. > + * > + * Copyright (c) 2014 David Bremner > + * > + * This program is free software: you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation, either version 3 of the License, or > + * (at your option) any later version. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program. If not, see http://www.gnu.org/licenses/ . > + * > + * Author: David Bremner <david@tethera.net> > + */ > + > +#include "zlib-extra.h" > +#include <talloc.h> > +#include <stdio.h> > +#include <string.h> > + > +/* mimic POSIX/glibc getline, but on a zlib gzFile stream, and using talloc */ > +util_status_t > +gz_getline (void *talloc_ctx, char **bufptr, ssize_t *bytes_read, gzFile stream) > +{ > + char *buf = *bufptr; > + unsigned int len; > + size_t offset = 0; > + > + if (buf) { > + len = talloc_array_length (buf); > + } else { > + /* same as getdelim from gnulib */ > + len = 120; > + buf = talloc_array (talloc_ctx, char, len); > + if (buf == NULL) > + return UTIL_OUT_OF_MEMORY; > + } > + > + while (1) { > + if (! gzgets (stream, buf + offset, len - offset)) { > + /* Null indicates EOF or error */ > + int zlib_status = 0; > + (void) gzerror (stream, &zlib_status); > + switch (zlib_status) { > + case Z_OK: > + /* no data read before EOF */ > + if (offset == 0) > + return UTIL_EOF; > + else > + goto SUCCESS; > + case Z_ERRNO: > + return UTIL_ERRNO; > + default: > + return UTIL_GZERROR; > + } > + } > + > + offset += strlen (buf + offset); > + > + if (buf[offset - 1] == '\n') > + goto SUCCESS; > + > + len *= 2; > + buf = talloc_realloc (talloc_ctx, buf, char, len); > + if (buf == NULL) > + return UTIL_OUT_OF_MEMORY; > + } > + SUCCESS: > + *bufptr = buf; > + *bytes_read = offset; > + return UTIL_SUCCESS; > +} > + > +const char *gz_error_string (util_status_t status, gzFile file) > +{ > + if (status == UTIL_GZERROR) > + return gzerror (file, NULL); > + else > + return util_error_string (status); > +} > diff --git a/util/zlib-extra.h b/util/zlib-extra.h > new file mode 100644 > index 0000000..dbaa0b9 > --- /dev/null > +++ b/util/zlib-extra.h > @@ -0,0 +1,25 @@ > +#ifndef _ZLIB_EXTRA_H > +#define _ZLIB_EXTRA_H > + > +#include "util.h" > +#include <zlib.h> > + > +/* Like getline, but read from a gzFile. Allocation is with talloc. > + * Returns: > + * > + * UTIL_SUCCESS, UTIL_OUT_OF_MEMORY, UTIL_ERRNO, UTIL_GZERROR > + * Consult util.h for description > + * > + * UTIL_EOF End of file encountered before > + * any characters read > + */ > +util_status_t > +gz_getline (void *ctx, char **lineptr, ssize_t *bytes_read, gzFile stream); > + > +/* return a suitable error string based on the return status > + * from gz_readline > + */ > + > +const char * > +gz_error_string (util_status_t status, gzFile stream); > +#endif > -- > 1.9.0 > > _______________________________________________ > notmuch mailing list > notmuch@notmuchmail.org > http://notmuchmail.org/mailman/listinfo/notmuch ^ permalink raw reply [flat|nested] 12+ messages in thread
* [Patch v7 3/6] test: restore with missing final newline 2014-04-05 15:43 v7 gzip dump restore patches David Bremner 2014-04-05 15:43 ` [Patch v7 1/6] dump: support gzipped and atomic output David Bremner 2014-04-05 15:43 ` [Patch v7 2/6] util: add gz_readline David Bremner @ 2014-04-05 15:43 ` David Bremner 2014-04-05 15:43 ` [Patch v7 4/6] restore: transparently support gzipped input David Bremner ` (4 subsequent siblings) 7 siblings, 0 replies; 12+ messages in thread From: David Bremner @ 2014-04-05 15:43 UTC (permalink / raw) To: notmuch Recent proposed patches for gzipped input had a bug with handling missing newlines that was not caught by the current test suite --- test/T240-dump-restore.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/T240-dump-restore.sh b/test/T240-dump-restore.sh index d79aca8..b6d8602 100755 --- a/test/T240-dump-restore.sh +++ b/test/T240-dump-restore.sh @@ -110,6 +110,15 @@ notmuch dump --format=batch-tag from:cworth | sed 's/^.*-- id://' | \ sort > OUTPUT.$test_count test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count +test_begin_subtest "format=batch-tag, missing newline" +printf "+a_tag_without_newline -- id:20091117232137.GA7669@griffis1.net" > IN +notmuch restore --accumulate < IN +notmuch dump id:20091117232137.GA7669@griffis1.net > OUT +cat <<EOF > EXPECTED ++a_tag_without_newline +inbox +unread -- id:20091117232137.GA7669@griffis1.net +EOF +test_expect_equal_file EXPECTED OUT + test_begin_subtest "format=batch-tag, # round-trip" notmuch dump --format=sup | sort > EXPECTED.$test_count notmuch dump --format=batch-tag | notmuch restore --format=batch-tag -- 1.9.0 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Patch v7 4/6] restore: transparently support gzipped input 2014-04-05 15:43 v7 gzip dump restore patches David Bremner ` (2 preceding siblings ...) 2014-04-05 15:43 ` [Patch v7 3/6] test: restore with missing final newline David Bremner @ 2014-04-05 15:43 ` David Bremner 2014-04-10 13:24 ` Austin Clements 2014-04-05 15:43 ` [Patch v7 5/6] notmuch-new: backup tags before database upgrade David Bremner ` (3 subsequent siblings) 7 siblings, 1 reply; 12+ messages in thread From: David Bremner @ 2014-04-05 15:43 UTC (permalink / raw) To: notmuch We rely completely on zlib to do the right thing in detecting gzipped input. Since our dump format is chosen to be 7 bit ascii, this should be fine. --- doc/man1/notmuch-restore.rst | 8 ++++ notmuch-restore.c | 93 +++++++++++++++++++++++++++++++++----------- test/T240-dump-restore.sh | 14 +++++++ 3 files changed, 92 insertions(+), 23 deletions(-) diff --git a/doc/man1/notmuch-restore.rst b/doc/man1/notmuch-restore.rst index d6cf19a..936b138 100644 --- a/doc/man1/notmuch-restore.rst +++ b/doc/man1/notmuch-restore.rst @@ -50,6 +50,14 @@ Supported options for **restore** include format, this heuristic, based the fact that batch-tag format contains no parentheses, should be accurate. +GZIPPED INPUT +============= + +\ **notmuch restore** will detect if the input is compressed in +**gzip(1)** format and automatically decompress it while reading. This +detection does not depend on file naming and in particular works for +standard input. + SEE ALSO ======== diff --git a/notmuch-restore.c b/notmuch-restore.c index c54d513..7abee0a 100644 --- a/notmuch-restore.c +++ b/notmuch-restore.c @@ -22,6 +22,7 @@ #include "hex-escape.h" #include "tag-util.h" #include "string-util.h" +#include "zlib-extra.h" static regex_t regex; @@ -128,10 +129,10 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[]) tag_op_list_t *tag_ops; char *input_file_name = NULL; - FILE *input = stdin; + const char *name_for_error = NULL; + gzFile input = NULL; char *line = NULL; void *line_ctx = NULL; - size_t line_size; ssize_t line_len; int ret = 0; @@ -157,39 +158,69 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[]) }; opt_index = parse_arguments (argc, argv, options, 1); - if (opt_index < 0) - return EXIT_FAILURE; + if (opt_index < 0) { + ret = EXIT_FAILURE; + goto DONE; + } + + name_for_error = input_file_name ? input_file_name : "stdin"; if (! accumulate) flags |= TAG_FLAG_REMOVE_ALL; - if (input_file_name) { - input = fopen (input_file_name, "r"); - if (input == NULL) { - fprintf (stderr, "Error opening %s for reading: %s\n", - input_file_name, strerror (errno)); - return EXIT_FAILURE; + errno = 0; + if (input_file_name) + input = gzopen (input_file_name, "r"); + else { + int infd = dup (STDIN_FILENO); + if (infd < 0) { + fprintf (stderr, "Error duping stdin: %s\n", + strerror (errno)); + ret = EXIT_FAILURE; + goto DONE; } + input = gzdopen (infd, "r"); + if (! input) + close (infd); + } + + if (input == NULL) { + fprintf (stderr, "Error opening %s for (gzip) reading: %s\n", + name_for_error, strerror (errno)); + ret = EXIT_FAILURE; + goto DONE; } if (opt_index < argc) { fprintf (stderr, "Unused positional parameter: %s\n", argv[opt_index]); - return EXIT_FAILURE; + ret = EXIT_FAILURE; + goto DONE; } tag_ops = tag_op_list_create (config); if (tag_ops == NULL) { fprintf (stderr, "Out of memory.\n"); - return EXIT_FAILURE; + ret = EXIT_FAILURE; + goto DONE; } do { - line_len = getline (&line, &line_size, input); + util_status_t status; + + status = gz_getline (line_ctx, &line, &line_len, input); /* empty input file not considered an error */ - if (line_len < 0) - return EXIT_SUCCESS; + if (status == UTIL_EOF) { + ret = EXIT_SUCCESS; + goto DONE; + } + if (status) { + fprintf (stderr, "Error reading (gzipped) input: %s\n", + gz_error_string(status, input)); + ret = EXIT_FAILURE; + goto DONE; + } } while ((line_len == 0) || (line[0] == '#') || /* the cast is safe because we checked about for line_len < 0 */ @@ -254,21 +285,37 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[]) if (ret) break; - } while ((line_len = getline (&line, &line_size, input)) != -1); + } while (! (ret = gz_getline (line_ctx, &line, &line_len, input))); + - if (line_ctx != NULL) - talloc_free (line_ctx); + /* EOF is normal loop termination condition, UTIL_SUCCESS is + * impossible here */ + if (ret == UTIL_EOF) { + ret = UTIL_SUCCESS; + } else { + fprintf (stderr, "Error reading (gzipped) input: %s\n", + gz_error_string (ret, input)); + } + + /* currently this should not be after DONE: since we don't + * know if the xregcomp was reached + */ if (input_format == DUMP_FORMAT_SUP) regfree (®ex); - if (line) - free (line); + DONE: + if (line_ctx != NULL) + talloc_free (line_ctx); - notmuch_database_destroy (notmuch); + if (notmuch) + notmuch_database_destroy (notmuch); - if (input != stdin) - fclose (input); + if (input && gzclose_r (input)) { + fprintf (stderr, "Error closing %s: %s\n", + name_for_error, gzerror (input, NULL)); + ret = EXIT_FAILURE; + } return ret ? EXIT_FAILURE : EXIT_SUCCESS; } diff --git a/test/T240-dump-restore.sh b/test/T240-dump-restore.sh index b6d8602..efe463e 100755 --- a/test/T240-dump-restore.sh +++ b/test/T240-dump-restore.sh @@ -80,6 +80,20 @@ notmuch dump --gzip --output=dump-gzip-outfile.gz gunzip dump-gzip-outfile.gz test_expect_equal_file dump.expected dump-gzip-outfile +test_begin_subtest "restoring gzipped stdin" +notmuch dump --gzip --output=backup.gz +notmuch tag +new_tag '*' +notmuch restore < backup.gz +notmuch dump --output=dump.actual +test_expect_equal_file dump.expected dump.actual + +test_begin_subtest "restoring gzipped file" +notmuch dump --gzip --output=backup.gz +notmuch tag +new_tag '*' +notmuch restore --input=backup.gz +notmuch dump --output=dump.actual +test_expect_equal_file dump.expected dump.actual + # Note, we assume all messages from cworth have a message-id # containing cworth.org -- 1.9.0 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [Patch v7 4/6] restore: transparently support gzipped input 2014-04-05 15:43 ` [Patch v7 4/6] restore: transparently support gzipped input David Bremner @ 2014-04-10 13:24 ` Austin Clements 0 siblings, 0 replies; 12+ messages in thread From: Austin Clements @ 2014-04-10 13:24 UTC (permalink / raw) To: David Bremner, notmuch On Sat, 05 Apr 2014, David Bremner <david@tethera.net> wrote: > We rely completely on zlib to do the right thing in detecting gzipped > input. Since our dump format is chosen to be 7 bit ascii, this should > be fine. > --- > doc/man1/notmuch-restore.rst | 8 ++++ > notmuch-restore.c | 93 +++++++++++++++++++++++++++++++++----------- > test/T240-dump-restore.sh | 14 +++++++ > 3 files changed, 92 insertions(+), 23 deletions(-) > > diff --git a/doc/man1/notmuch-restore.rst b/doc/man1/notmuch-restore.rst > index d6cf19a..936b138 100644 > --- a/doc/man1/notmuch-restore.rst > +++ b/doc/man1/notmuch-restore.rst > @@ -50,6 +50,14 @@ Supported options for **restore** include > format, this heuristic, based the fact that batch-tag format > contains no parentheses, should be accurate. > > +GZIPPED INPUT > +============= > + > +\ **notmuch restore** will detect if the input is compressed in > +**gzip(1)** format and automatically decompress it while reading. This > +detection does not depend on file naming and in particular works for > +standard input. > + > SEE ALSO > ======== > > diff --git a/notmuch-restore.c b/notmuch-restore.c > index c54d513..7abee0a 100644 > --- a/notmuch-restore.c > +++ b/notmuch-restore.c > @@ -22,6 +22,7 @@ > #include "hex-escape.h" > #include "tag-util.h" > #include "string-util.h" > +#include "zlib-extra.h" > > static regex_t regex; > > @@ -128,10 +129,10 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[]) > tag_op_list_t *tag_ops; > > char *input_file_name = NULL; > - FILE *input = stdin; > + const char *name_for_error = NULL; > + gzFile input = NULL; > char *line = NULL; > void *line_ctx = NULL; > - size_t line_size; > ssize_t line_len; > > int ret = 0; > @@ -157,39 +158,69 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[]) > }; > > opt_index = parse_arguments (argc, argv, options, 1); > - if (opt_index < 0) > - return EXIT_FAILURE; > + if (opt_index < 0) { > + ret = EXIT_FAILURE; > + goto DONE; > + } > + > + name_for_error = input_file_name ? input_file_name : "stdin"; > > if (! accumulate) > flags |= TAG_FLAG_REMOVE_ALL; > > - if (input_file_name) { > - input = fopen (input_file_name, "r"); > - if (input == NULL) { > - fprintf (stderr, "Error opening %s for reading: %s\n", > - input_file_name, strerror (errno)); > - return EXIT_FAILURE; > + errno = 0; > + if (input_file_name) > + input = gzopen (input_file_name, "r"); > + else { > + int infd = dup (STDIN_FILENO); > + if (infd < 0) { > + fprintf (stderr, "Error duping stdin: %s\n", > + strerror (errno)); > + ret = EXIT_FAILURE; > + goto DONE; > } > + input = gzdopen (infd, "r"); > + if (! input) > + close (infd); > + } > + > + if (input == NULL) { > + fprintf (stderr, "Error opening %s for (gzip) reading: %s\n", > + name_for_error, strerror (errno)); > + ret = EXIT_FAILURE; > + goto DONE; > } > > if (opt_index < argc) { > fprintf (stderr, "Unused positional parameter: %s\n", argv[opt_index]); > - return EXIT_FAILURE; > + ret = EXIT_FAILURE; > + goto DONE; > } > > tag_ops = tag_op_list_create (config); > if (tag_ops == NULL) { > fprintf (stderr, "Out of memory.\n"); > - return EXIT_FAILURE; > + ret = EXIT_FAILURE; > + goto DONE; > } > > do { > - line_len = getline (&line, &line_size, input); > + util_status_t status; > + > + status = gz_getline (line_ctx, &line, &line_len, input); > > /* empty input file not considered an error */ > - if (line_len < 0) > - return EXIT_SUCCESS; > + if (status == UTIL_EOF) { > + ret = EXIT_SUCCESS; > + goto DONE; > + } > > + if (status) { > + fprintf (stderr, "Error reading (gzipped) input: %s\n", > + gz_error_string(status, input)); > + ret = EXIT_FAILURE; > + goto DONE; > + } > } while ((line_len == 0) || > (line[0] == '#') || > /* the cast is safe because we checked about for line_len < 0 */ > @@ -254,21 +285,37 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[]) > if (ret) > break; > > - } while ((line_len = getline (&line, &line_size, input)) != -1); > + } while (! (ret = gz_getline (line_ctx, &line, &line_len, input))); > + > > - if (line_ctx != NULL) > - talloc_free (line_ctx); > + /* EOF is normal loop termination condition, UTIL_SUCCESS is > + * impossible here */ > + if (ret == UTIL_EOF) { > + ret = UTIL_SUCCESS; > + } else { > + fprintf (stderr, "Error reading (gzipped) input: %s\n", > + gz_error_string (ret, input)); ret = EXIT_FAILURE; here? > + } > + > + /* currently this should not be after DONE: since we don't > + * know if the xregcomp was reached > + */ > > if (input_format == DUMP_FORMAT_SUP) > regfree (®ex); > > - if (line) > - free (line); > + DONE: > + if (line_ctx != NULL) > + talloc_free (line_ctx); > > - notmuch_database_destroy (notmuch); > + if (notmuch) > + notmuch_database_destroy (notmuch); > > - if (input != stdin) > - fclose (input); > + if (input && gzclose_r (input)) { > + fprintf (stderr, "Error closing %s: %s\n", > + name_for_error, gzerror (input, NULL)); > + ret = EXIT_FAILURE; > + } > > return ret ? EXIT_FAILURE : EXIT_SUCCESS; > } > diff --git a/test/T240-dump-restore.sh b/test/T240-dump-restore.sh > index b6d8602..efe463e 100755 > --- a/test/T240-dump-restore.sh > +++ b/test/T240-dump-restore.sh > @@ -80,6 +80,20 @@ notmuch dump --gzip --output=dump-gzip-outfile.gz > gunzip dump-gzip-outfile.gz > test_expect_equal_file dump.expected dump-gzip-outfile > > +test_begin_subtest "restoring gzipped stdin" > +notmuch dump --gzip --output=backup.gz > +notmuch tag +new_tag '*' > +notmuch restore < backup.gz > +notmuch dump --output=dump.actual > +test_expect_equal_file dump.expected dump.actual > + > +test_begin_subtest "restoring gzipped file" > +notmuch dump --gzip --output=backup.gz > +notmuch tag +new_tag '*' > +notmuch restore --input=backup.gz > +notmuch dump --output=dump.actual > +test_expect_equal_file dump.expected dump.actual > + > # Note, we assume all messages from cworth have a message-id > # containing cworth.org > > -- > 1.9.0 > > _______________________________________________ > notmuch mailing list > notmuch@notmuchmail.org > http://notmuchmail.org/mailman/listinfo/notmuch ^ permalink raw reply [flat|nested] 12+ messages in thread
* [Patch v7 5/6] notmuch-new: backup tags before database upgrade 2014-04-05 15:43 v7 gzip dump restore patches David Bremner ` (3 preceding siblings ...) 2014-04-05 15:43 ` [Patch v7 4/6] restore: transparently support gzipped input David Bremner @ 2014-04-05 15:43 ` David Bremner 2014-04-10 13:25 ` Austin Clements 2014-04-05 15:43 ` [Patch v7 6/6] test: verify tag backup generated by " David Bremner ` (2 subsequent siblings) 7 siblings, 1 reply; 12+ messages in thread From: David Bremner @ 2014-04-05 15:43 UTC (permalink / raw) To: notmuch All we do here is calculate the backup filename, and call the existing dump routine. Also take the opportity to add a message about being safe to interrupt. --- notmuch-new.c | 29 ++++++++++++++++++++++++++++- test/T530-upgrade.sh | 4 +++- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/notmuch-new.c b/notmuch-new.c index 82acf69..d269c7c 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -989,8 +989,35 @@ notmuch_new_command (notmuch_config_t *config, int argc, char *argv[]) return EXIT_FAILURE; if (notmuch_database_needs_upgrade (notmuch)) { - if (add_files_state.verbosity >= VERBOSITY_NORMAL) + time_t now = time (NULL); + struct tm *gm_time = gmtime (&now); + + /* since dump files are written atomically, the amount of + * harm from overwriting one within a second seems + * relatively small. */ + + const char *backup_name = + talloc_asprintf (notmuch, "%s/dump-%04d%02d%02dT%02d%02d%02d.gz", + dot_notmuch_path, + gm_time->tm_year + 1900, + gm_time->tm_mon + 1, + gm_time->tm_mday, + gm_time->tm_hour, + gm_time->tm_min, + gm_time->tm_sec); + + if (add_files_state.verbosity >= VERBOSITY_NORMAL) { printf ("Welcome to a new version of notmuch! Your database will now be upgraded.\n"); + printf ("This process is safe to interrupt.\n"); + printf ("Backing up tags to %s...\n", backup_name); + } + + if (notmuch_database_dump (notmuch, backup_name, "", + DUMP_FORMAT_BATCH_TAG, TRUE)) { + fprintf (stderr, "Backup failed. Aborting upgrade."); + return EXIT_FAILURE; + } + gettimeofday (&add_files_state.tv_start, NULL); notmuch_database_upgrade (notmuch, add_files_state.verbosity >= VERBOSITY_NORMAL ? upgrade_print_progress : NULL, diff --git a/test/T530-upgrade.sh b/test/T530-upgrade.sh index 67bbf31..d46e3d1 100755 --- a/test/T530-upgrade.sh +++ b/test/T530-upgrade.sh @@ -26,9 +26,11 @@ output=$(notmuch search path:foo) test_expect_equal "$output" "" test_begin_subtest "database upgrade from format version 1" -output=$(notmuch new) +output=$(notmuch new | sed -e 's/^Backing up tags to .*$/Backing up tags to FILENAME/') test_expect_equal "$output" "\ Welcome to a new version of notmuch! Your database will now be upgraded. +This process is safe to interrupt. +Backing up tags to FILENAME Your notmuch database has now been upgraded to database format version 2. No new mail." -- 1.9.0 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [Patch v7 5/6] notmuch-new: backup tags before database upgrade 2014-04-05 15:43 ` [Patch v7 5/6] notmuch-new: backup tags before database upgrade David Bremner @ 2014-04-10 13:25 ` Austin Clements 0 siblings, 0 replies; 12+ messages in thread From: Austin Clements @ 2014-04-10 13:25 UTC (permalink / raw) To: David Bremner, notmuch On Sat, 05 Apr 2014, David Bremner <david@tethera.net> wrote: > All we do here is calculate the backup filename, and call the existing > dump routine. > > Also take the opportity to add a message about being safe to opportunity > interrupt. > --- > notmuch-new.c | 29 ++++++++++++++++++++++++++++- > test/T530-upgrade.sh | 4 +++- > 2 files changed, 31 insertions(+), 2 deletions(-) > > diff --git a/notmuch-new.c b/notmuch-new.c > index 82acf69..d269c7c 100644 > --- a/notmuch-new.c > +++ b/notmuch-new.c > @@ -989,8 +989,35 @@ notmuch_new_command (notmuch_config_t *config, int argc, char *argv[]) > return EXIT_FAILURE; > > if (notmuch_database_needs_upgrade (notmuch)) { > - if (add_files_state.verbosity >= VERBOSITY_NORMAL) > + time_t now = time (NULL); > + struct tm *gm_time = gmtime (&now); > + > + /* since dump files are written atomically, the amount of > + * harm from overwriting one within a second seems > + * relatively small. */ > + > + const char *backup_name = > + talloc_asprintf (notmuch, "%s/dump-%04d%02d%02dT%02d%02d%02d.gz", > + dot_notmuch_path, > + gm_time->tm_year + 1900, > + gm_time->tm_mon + 1, > + gm_time->tm_mday, > + gm_time->tm_hour, > + gm_time->tm_min, > + gm_time->tm_sec); > + > + if (add_files_state.verbosity >= VERBOSITY_NORMAL) { > printf ("Welcome to a new version of notmuch! Your database will now be upgraded.\n"); > + printf ("This process is safe to interrupt.\n"); > + printf ("Backing up tags to %s...\n", backup_name); > + } > + > + if (notmuch_database_dump (notmuch, backup_name, "", > + DUMP_FORMAT_BATCH_TAG, TRUE)) { > + fprintf (stderr, "Backup failed. Aborting upgrade."); > + return EXIT_FAILURE; > + } > + > gettimeofday (&add_files_state.tv_start, NULL); > notmuch_database_upgrade (notmuch, > add_files_state.verbosity >= VERBOSITY_NORMAL ? upgrade_print_progress : NULL, > diff --git a/test/T530-upgrade.sh b/test/T530-upgrade.sh > index 67bbf31..d46e3d1 100755 > --- a/test/T530-upgrade.sh > +++ b/test/T530-upgrade.sh > @@ -26,9 +26,11 @@ output=$(notmuch search path:foo) > test_expect_equal "$output" "" > > test_begin_subtest "database upgrade from format version 1" > -output=$(notmuch new) > +output=$(notmuch new | sed -e 's/^Backing up tags to .*$/Backing up tags to FILENAME/') > test_expect_equal "$output" "\ > Welcome to a new version of notmuch! Your database will now be upgraded. > +This process is safe to interrupt. > +Backing up tags to FILENAME > Your notmuch database has now been upgraded to database format version 2. > No new mail." > > -- > 1.9.0 > > _______________________________________________ > notmuch mailing list > notmuch@notmuchmail.org > http://notmuchmail.org/mailman/listinfo/notmuch ^ permalink raw reply [flat|nested] 12+ messages in thread
* [Patch v7 6/6] test: verify tag backup generated by database upgrade 2014-04-05 15:43 v7 gzip dump restore patches David Bremner ` (4 preceding siblings ...) 2014-04-05 15:43 ` [Patch v7 5/6] notmuch-new: backup tags before database upgrade David Bremner @ 2014-04-05 15:43 ` David Bremner 2014-04-10 13:27 ` v7 gzip dump restore patches Austin Clements 2014-04-12 11:32 ` David Bremner 7 siblings, 0 replies; 12+ messages in thread From: David Bremner @ 2014-04-05 15:43 UTC (permalink / raw) To: notmuch 'pre upgrade dump' is not much of a test, but at least this way we get somewhat sensible behaviour if it fails. --- test/T530-upgrade.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/T530-upgrade.sh b/test/T530-upgrade.sh index d46e3d1..7d5d5aa 100755 --- a/test/T530-upgrade.sh +++ b/test/T530-upgrade.sh @@ -25,6 +25,8 @@ test_begin_subtest "path: search does not work with old database version" output=$(notmuch search path:foo) test_expect_equal "$output" "" +test_expect_success 'pre upgrade dump' 'notmuch dump | sort > pre-upgrade-dump' + test_begin_subtest "database upgrade from format version 1" output=$(notmuch new | sed -e 's/^Backing up tags to .*$/Backing up tags to FILENAME/') test_expect_equal "$output" "\ @@ -34,6 +36,10 @@ Backing up tags to FILENAME Your notmuch database has now been upgraded to database format version 2. No new mail." +test_begin_subtest "tag backup matches pre-upgrade dump" +gunzip -c ${MAIL_DIR}/.notmuch/dump-*.gz | sort > backup-dump +test_expect_equal_file pre-upgrade-dump backup-dump + test_begin_subtest "folder: no longer matches in the middle of path" output=$(notmuch search folder:baz) test_expect_equal "$output" "" -- 1.9.0 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: v7 gzip dump restore patches 2014-04-05 15:43 v7 gzip dump restore patches David Bremner ` (5 preceding siblings ...) 2014-04-05 15:43 ` [Patch v7 6/6] test: verify tag backup generated by " David Bremner @ 2014-04-10 13:27 ` Austin Clements 2014-04-12 11:32 ` David Bremner 7 siblings, 0 replies; 12+ messages in thread From: Austin Clements @ 2014-04-10 13:27 UTC (permalink / raw) To: David Bremner, notmuch On Sat, 05 Apr 2014, David Bremner <david@tethera.net> wrote: > Supercedes > > id:1396554083-3892-2-git-send-email-david@tethera.net > > - adds new analogues of strerror > - util_error_string > - gz_error_string LGTM other than the two nits I pointed out. I'd be happy to have the fixes for those simply amended in when you apply. ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: v7 gzip dump restore patches 2014-04-05 15:43 v7 gzip dump restore patches David Bremner ` (6 preceding siblings ...) 2014-04-10 13:27 ` v7 gzip dump restore patches Austin Clements @ 2014-04-12 11:32 ` David Bremner 7 siblings, 0 replies; 12+ messages in thread From: David Bremner @ 2014-04-12 11:32 UTC (permalink / raw) To: notmuch David Bremner <david@tethera.net> writes: > Supercedes > > id:1396554083-3892-2-git-send-email-david@tethera.net > > - adds new analogues of strerror > - util_error_string > - gz_error_string > pushed, amended as suggested by Austin and Tomi. d ^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2014-04-12 11:32 UTC | newest] Thread overview: 12+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2014-04-05 15:43 v7 gzip dump restore patches David Bremner 2014-04-05 15:43 ` [Patch v7 1/6] dump: support gzipped and atomic output David Bremner 2014-04-05 15:43 ` [Patch v7 2/6] util: add gz_readline David Bremner 2014-04-06 10:21 ` Tomi Ollila 2014-04-05 15:43 ` [Patch v7 3/6] test: restore with missing final newline David Bremner 2014-04-05 15:43 ` [Patch v7 4/6] restore: transparently support gzipped input David Bremner 2014-04-10 13:24 ` Austin Clements 2014-04-05 15:43 ` [Patch v7 5/6] notmuch-new: backup tags before database upgrade David Bremner 2014-04-10 13:25 ` Austin Clements 2014-04-05 15:43 ` [Patch v7 6/6] test: verify tag backup generated by " David Bremner 2014-04-10 13:27 ` v7 gzip dump restore patches Austin Clements 2014-04-12 11:32 ` David Bremner
Code repositories for project(s) associated with this public inbox https://yhetil.org/notmuch.git/ This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).