unofficial mirror of notmuch@notmuchmail.org
 help / color / mirror / code / Atom feed
* v6 gzipped dump/restore 
@ 2014-04-03 19:41 David Bremner
  2014-04-03 19:41 ` [Patch v6 1/6] dump: support gzipped and atomic output David Bremner
                   ` (5 more replies)
  0 siblings, 6 replies; 12+ messages in thread
From: David Bremner @ 2014-04-03 19:41 UTC (permalink / raw)
  To: notmuch

I hope I got all the comments from Tomi and Austin. Interdiff below.

I ended up collapsing two patches while I was revising, since it
seemed artificial to do some of the changes twice to keep the gzipped
and atomic features as seperate patches.


diff --git a/INSTALL b/INSTALL
index df318fa..b543c50 100644
--- a/INSTALL
+++ b/INSTALL
@@ -67,6 +67,9 @@ Talloc, and zlib which are each described below:
 	by Xapian, so if you installed that you will already have
 	zlib. You may need to install the zlib headers separately.
 
+	Notmuch needs the transparent write feature of zlib introduced
+	in version 1.2.5.2 (Dec. 2011).
+
 	zlib is available from http://zlib.net
 
 Building Documentation
diff --git a/configure b/configure
index d685ab3..1d624f7 100755
--- a/configure
+++ b/configure
@@ -340,9 +340,9 @@ else
     errors=$((errors + 1))
 fi
 
-printf "Checking for zlib development files... "
+printf "Checking for zlib (>= 1.2.5.2)... "
 have_zlib=0
-if pkg-config --exists zlib; then
+if pkg-config --atleast-version=1.2.5.2 zlib; then
     printf "Yes.\n"
     have_zlib=1
     zlib_cflags=$(pkg-config --cflags zlib)
diff --git a/notmuch-dump.c b/notmuch-dump.c
index 05ed6b4..2a7252a 100644
--- a/notmuch-dump.c
+++ b/notmuch-dump.c
@@ -140,7 +140,7 @@ notmuch_database_dump (notmuch_database_t *notmuch,
 	tempname = talloc_asprintf (notmuch, "%s.XXXXXX", output_file_name);
 	outfd = mkstemp (tempname);
     } else {
-	outfd = fileno (stdout);
+	outfd = dup (STDOUT_FILENO);
     }
 
     if (outfd < 0) {
@@ -153,6 +153,9 @@ notmuch_database_dump (notmuch_database_t *notmuch,
     if (output == NULL) {
 	fprintf (stderr, "Error opening %s for (gzip) writing: %s\n",
 		 name_for_error, strerror (errno));
+	if (close (outfd))
+	    fprintf (stderr, "Error closing %s during shutdown: %s\n",
+		 name_for_error, strerror (errno));
 	goto DONE;
     }
 
@@ -165,22 +168,25 @@ notmuch_database_dump (notmuch_database_t *notmuch,
 	goto DONE;
     }
 
-    ret = fdatasync (outfd);
-    if (ret) {
-	perror ("fdatasync");
-	goto DONE;
-    }
-
     if (output_file_name) {
-	ret = gzclose_w (output);
-	if (ret != Z_OK) {
-	    ret = EXIT_FAILURE;
+	ret = fdatasync (outfd);
+	if (ret) {
+	    fprintf (stderr, "Error syncing %s to disk: %s\n",
+		     name_for_error, strerror (errno));
 	    goto DONE;
 	}
+    }
 
+    if (gzclose_w (output) != Z_OK) {
+	ret = EXIT_FAILURE;
+	goto DONE;
+    }
+
+    if (output_file_name) {
 	ret = rename (tempname, output_file_name);
 	if (ret) {
-	    perror ("rename");
+	    fprintf (stderr, "Error renaming %s to %s: %s\n",
+		     tempname, output_file_name, strerror (errno));
 	    goto DONE;
 	}
 
diff --git a/notmuch-new.c b/notmuch-new.c
index e0431c6..d269c7c 100644
--- a/notmuch-new.c
+++ b/notmuch-new.c
@@ -997,7 +997,7 @@ notmuch_new_command (notmuch_config_t *config, int argc, char *argv[])
 	     * relatively small. */
 
 	    const char *backup_name =
-		talloc_asprintf (notmuch, "%s/backup-%04d-%02d-%02d-%02d:%02d:%02d.gz",
+		talloc_asprintf (notmuch, "%s/dump-%04d%02d%02dT%02d%02d%02d.gz",
 				 dot_notmuch_path,
 				 gm_time->tm_year + 1900,
 				 gm_time->tm_mon + 1,
@@ -1009,12 +1009,12 @@ notmuch_new_command (notmuch_config_t *config, int argc, char *argv[])
 	    if (add_files_state.verbosity >= VERBOSITY_NORMAL) {
 		printf ("Welcome to a new version of notmuch! Your database will now be upgraded.\n");
 		printf ("This process is safe to interrupt.\n");
-		printf ("Backing up tags to %s\n", backup_name);
+		printf ("Backing up tags to %s...\n", backup_name);
 	    }
 
 	    if (notmuch_database_dump (notmuch, backup_name, "",
 				       DUMP_FORMAT_BATCH_TAG, TRUE)) {
-		fprintf (stderr, "Backup failed. Aborting upgrade");
+		fprintf (stderr, "Backup failed. Aborting upgrade.");
 		return EXIT_FAILURE;
 	    }
 
diff --git a/notmuch-restore.c b/notmuch-restore.c
index 86bce20..eb5b7b2 100644
--- a/notmuch-restore.c
+++ b/notmuch-restore.c
@@ -132,7 +132,6 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
     gzFile input;
     char *line = NULL;
     void *line_ctx = NULL;
-    size_t line_size;
     ssize_t line_len;
 
     int ret = 0;
@@ -166,8 +165,16 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
 
     if (input_file_name)
 	input = gzopen (input_file_name, "r");
-    else
-	input = gzdopen (fileno (stdin), "r");
+    else {
+	int infd = dup (STDIN_FILENO);
+	if (infd < 0) {
+	    fprintf (stderr, "Error duping stdin\n");
+	    return EXIT_FAILURE;
+	}
+	input = gzdopen (infd, "r");
+	if (! input)
+	    close (infd);
+    }
 
     if (input == NULL) {
 	fprintf (stderr, "Error opening %s for (gzip) reading: %s\n",
@@ -189,7 +196,7 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
     do {
 	util_status_t status;
 
-	status = gz_getline (line_ctx, &line, &line_size, &line_len, input);
+	status = gz_getline (line_ctx, &line, &line_len, input);
 
 	/* empty input file not considered an error */
 	if (status == UTIL_EOF)
@@ -262,7 +269,7 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
 	if (ret)
 	    break;
 
-    }  while (gz_getline (line_ctx, &line, &line_size, &line_len, input) == UTIL_SUCCESS);
+    }  while (gz_getline (line_ctx, &line, &line_len, input) == UTIL_SUCCESS);
 
     if (line_ctx != NULL)
 	talloc_free (line_ctx);
@@ -272,8 +279,7 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
 
     notmuch_database_destroy (notmuch);
 
-    if (input_file_name != NULL)
-	gzclose_r (input);
+    gzclose_r (input);
 
     return ret ? EXIT_FAILURE : EXIT_SUCCESS;
 }
diff --git a/test/T240-dump-restore.sh b/test/T240-dump-restore.sh
index 50d4d48..efe463e 100755
--- a/test/T240-dump-restore.sh
+++ b/test/T240-dump-restore.sh
@@ -124,6 +124,15 @@ notmuch dump --format=batch-tag from:cworth | sed 's/^.*-- id://' | \
     sort > OUTPUT.$test_count
 test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
 
+test_begin_subtest "format=batch-tag, missing newline"
+printf "+a_tag_without_newline -- id:20091117232137.GA7669@griffis1.net" > IN
+notmuch restore --accumulate < IN
+notmuch dump id:20091117232137.GA7669@griffis1.net > OUT
+cat <<EOF > EXPECTED
++a_tag_without_newline +inbox +unread -- id:20091117232137.GA7669@griffis1.net
+EOF
+test_expect_equal_file EXPECTED OUT
+
 test_begin_subtest "format=batch-tag, # round-trip"
 notmuch dump --format=sup | sort > EXPECTED.$test_count
 notmuch dump --format=batch-tag | notmuch restore --format=batch-tag
diff --git a/test/T530-upgrade.sh b/test/T530-upgrade.sh
index 7a68ddf..7d5d5aa 100755
--- a/test/T530-upgrade.sh
+++ b/test/T530-upgrade.sh
@@ -37,7 +37,7 @@ Your notmuch database has now been upgraded to database format version 2.
 No new mail."
 
 test_begin_subtest "tag backup matches pre-upgrade dump"
-gunzip -c ${MAIL_DIR}/.notmuch/backup-*.gz | sort > backup-dump
+gunzip -c ${MAIL_DIR}/.notmuch/dump-*.gz | sort > backup-dump
 test_expect_equal_file pre-upgrade-dump backup-dump
 
 test_begin_subtest "folder: no longer matches in the middle of path"
diff --git a/util/zlib-extra.c b/util/zlib-extra.c
index cb1eba0..922ab52 100644
--- a/util/zlib-extra.c
+++ b/util/zlib-extra.c
@@ -25,34 +25,36 @@
 
 /* mimic POSIX/glibc getline, but on a zlib gzFile stream, and using talloc */
 util_status_t
-gz_getline (void *talloc_ctx, char **bufptr, size_t *bufsiz, ssize_t *bytes_read,
-	    gzFile stream)
+gz_getline (void *talloc_ctx, char **bufptr, ssize_t *bytes_read, gzFile stream)
 {
-    size_t len = *bufsiz;
     char *buf = *bufptr;
+    unsigned int len;
     size_t offset = 0;
 
-    if (len == 0 || buf == NULL) {
+    if (buf) {
+	len = talloc_array_length (buf);
+    } else {
 	/* same as getdelim from gnulib */
 	len = 120;
-	buf = talloc_size (talloc_ctx, len);
+	buf = talloc_array (talloc_ctx, char, len);
 	if (buf == NULL)
 	    return UTIL_OUT_OF_MEMORY;
     }
 
     while (1) {
 	if (! gzgets (stream, buf + offset, len - offset)) {
+	    /* Null indicates EOF or error */
 	    int zlib_status = 0;
 	    (void) gzerror (stream, &zlib_status);
 	    switch (zlib_status) {
 	    case Z_OK:
-		/* follow getline behaviour */
-		*bytes_read = -1;
-		return UTIL_EOF;
-		break;
+		/* no data read before EOF */
+		if (offset == 0)
+		    return UTIL_EOF;
+		else
+		    goto SUCCESS;
 	    case Z_ERRNO:
 		return UTIL_FILE;
-		break;
 	    default:
 		return UTIL_ERROR;
 	    }
@@ -60,17 +62,16 @@ gz_getline (void *talloc_ctx, char **bufptr, size_t *bufsiz, ssize_t *bytes_read
 
 	offset += strlen (buf + offset);
 
-	if ( buf[offset - 1] == '\n' )
-	    break;
+	if (buf[offset - 1] == '\n')
+	    goto SUCCESS;
 
 	len *= 2;
 	buf = talloc_realloc (talloc_ctx, buf, char, len);
 	if (buf == NULL)
 	    return UTIL_OUT_OF_MEMORY;
     }
-
+ SUCCESS:
     *bufptr = buf;
-    *bufsiz = len;
     *bytes_read = offset;
     return UTIL_SUCCESS;
 }
diff --git a/util/zlib-extra.h b/util/zlib-extra.h
index ed46ac1..ed67115 100644
--- a/util/zlib-extra.h
+++ b/util/zlib-extra.h
@@ -1,11 +1,11 @@
 #ifndef _ZLIB_EXTRA_H
 #define _ZLIB_EXTRA_H
 
-#include <zlib.h>
 #include "util.h"
+#include <zlib.h>
+
 /* Like getline, but read from a gzFile. Allocation is with talloc */
 util_status_t
-gz_getline (void *ctx, char **lineptr, size_t *line_size, ssize_t *bytes_read,
-	    gzFile stream);
+gz_getline (void *ctx, char **lineptr, ssize_t *bytes_read, gzFile stream);
 
 #endif

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Patch v6 1/6] dump: support gzipped and atomic output
  2014-04-03 19:41 v6 gzipped dump/restore David Bremner
@ 2014-04-03 19:41 ` David Bremner
  2014-04-04  2:32   ` David Bremner
                     ` (2 more replies)
  2014-04-03 19:41 ` [Patch v6 2/6] util: add gz_readline David Bremner
                   ` (4 subsequent siblings)
  5 siblings, 3 replies; 12+ messages in thread
From: David Bremner @ 2014-04-03 19:41 UTC (permalink / raw)
  To: notmuch

The main goal is to support gzipped output for future internal
calls (e.g. from notmuch-new) to notmuch_database_dump.

The additional dependency is not very heavy since xapian already pulls
in zlib.

We want the dump to be "atomic", in the sense that after running the
dump file is either present and complete, or not present.  This avoids
certain classes of mishaps involving overwriting a good backup with a
bad or partial one.
---
 INSTALL                   | 20 ++++++++--
 Makefile.local            |  2 +-
 configure                 | 28 ++++++++++++--
 doc/man1/notmuch-dump.rst |  3 ++
 notmuch-client.h          |  4 +-
 notmuch-dump.c            | 95 +++++++++++++++++++++++++++++++++++++----------
 test/T240-dump-restore.sh | 12 ++++++
 7 files changed, 136 insertions(+), 28 deletions(-)

diff --git a/INSTALL b/INSTALL
index 690b0ef..b543c50 100644
--- a/INSTALL
+++ b/INSTALL
@@ -20,8 +20,8 @@ configure stage.
 
 Dependencies
 ------------
-Notmuch depends on three libraries: Xapian, GMime 2.4 or 2.6, and
-Talloc which are each described below:
+Notmuch depends on four libraries: Xapian, GMime 2.4 or 2.6,
+Talloc, and zlib which are each described below:
 
 	Xapian
 	------
@@ -60,6 +60,18 @@ Talloc which are each described below:
 
 	Talloc is available from http://talloc.samba.org/
 
+	zlib
+	----
+
+	zlib is an extremely popular compression library. It is used
+	by Xapian, so if you installed that you will already have
+	zlib. You may need to install the zlib headers separately.
+
+	Notmuch needs the transparent write feature of zlib introduced
+	in version 1.2.5.2 (Dec. 2011).
+
+	zlib is available from http://zlib.net
+
 Building Documentation
 ----------------------
 
@@ -79,11 +91,11 @@ dependencies with a simple simple command line. For example:
 
   For Debian and similar:
 
-        sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev python-sphinx
+        sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev zlib1g-dev python-sphinx
 
   For Fedora and similar:
 
-	sudo yum install xapian-core-devel gmime-devel libtalloc-devel python-sphinx
+	sudo yum install xapian-core-devel gmime-devel libtalloc-devel zlib-devel python-sphinx
 
 On other systems, a similar command can be used, but the details of
 the package names may be different.
diff --git a/Makefile.local b/Makefile.local
index cb7b106..e5a20a7 100644
--- a/Makefile.local
+++ b/Makefile.local
@@ -41,7 +41,7 @@ PV_FILE=bindings/python/notmuch/version.py
 # Smash together user's values with our extra values
 FINAL_CFLAGS = -DNOTMUCH_VERSION=$(VERSION) $(CPPFLAGS) $(CFLAGS) $(WARN_CFLAGS) $(extra_cflags) $(CONFIGURE_CFLAGS)
 FINAL_CXXFLAGS = $(CPPFLAGS) $(CXXFLAGS) $(WARN_CXXFLAGS) $(extra_cflags) $(extra_cxxflags) $(CONFIGURE_CXXFLAGS)
-FINAL_NOTMUCH_LDFLAGS = $(LDFLAGS) -Lutil -lutil -Llib -lnotmuch $(AS_NEEDED_LDFLAGS) $(GMIME_LDFLAGS) $(TALLOC_LDFLAGS)
+FINAL_NOTMUCH_LDFLAGS = $(LDFLAGS) -Lutil -lutil -Llib -lnotmuch $(AS_NEEDED_LDFLAGS) $(GMIME_LDFLAGS) $(TALLOC_LDFLAGS) $(ZLIB_LDFLAGS)
 FINAL_NOTMUCH_LINKER = CC
 ifneq ($(LINKER_RESOLVES_LIBRARY_DEPENDENCIES),1)
 FINAL_NOTMUCH_LDFLAGS += $(CONFIGURE_LDFLAGS)
diff --git a/configure b/configure
index 1d430b9..1d624f7 100755
--- a/configure
+++ b/configure
@@ -340,6 +340,18 @@ else
     errors=$((errors + 1))
 fi
 
+printf "Checking for zlib (>= 1.2.5.2)... "
+have_zlib=0
+if pkg-config --atleast-version=1.2.5.2 zlib; then
+    printf "Yes.\n"
+    have_zlib=1
+    zlib_cflags=$(pkg-config --cflags zlib)
+    zlib_ldflags=$(pkg-config --libs zlib)
+else
+    printf "No.\n"
+    errors=$((errors + 1))
+fi
+
 printf "Checking for talloc development files... "
 if pkg-config --exists talloc; then
     printf "Yes.\n"
@@ -496,6 +508,11 @@ EOF
 	echo "	Xapian library (including development files such as headers)"
 	echo "	http://xapian.org/"
     fi
+    if [ $have_zlib -eq 0 ]; then
+	echo "	zlib library (including development files such as headers)"
+	echo "	http://zlib.net/"
+	echo
+    fi
     if [ $have_gmime -eq 0 ]; then
 	echo "	Either GMime 2.4 library" $GMIME_24_VERSION_CTR "or GMime 2.6 library" $GMIME_26_VERSION_CTR
 	echo "	(including development files such as headers)"
@@ -519,11 +536,11 @@ case a simple command will install everything you need. For example:
 
 On Debian and similar systems:
 
-	sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev
+	sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev zlib1g-dev
 
 Or on Fedora and similar systems:
 
-	sudo yum install xapian-core-devel gmime-devel libtalloc-devel
+	sudo yum install xapian-core-devel gmime-devel libtalloc-devel zlib-devel
 
 On other systems, similar commands can be used, but the details of the
 package names may be different.
@@ -844,6 +861,10 @@ XAPIAN_LDFLAGS = ${xapian_ldflags}
 GMIME_CFLAGS = ${gmime_cflags}
 GMIME_LDFLAGS = ${gmime_ldflags}
 
+# Flags needed to compile and link against zlib
+ZLIB_CFLAGS = ${zlib_cflags}
+ZLIB_LDFLAGS = ${zlib_ldflags}
+
 # Flags needed to compile and link against talloc
 TALLOC_CFLAGS = ${talloc_cflags}
 TALLOC_LDFLAGS = ${talloc_ldflags}
@@ -882,6 +903,7 @@ CONFIGURE_CFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS)      \\
 		   -DUTIL_BYTE_ORDER=\$(UTIL_BYTE_ORDER)
 
 CONFIGURE_CXXFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS)    \\
+		     \$(ZLIB_CFLAGS)					 \\
 		     \$(TALLOC_CFLAGS) -DHAVE_VALGRIND=\$(HAVE_VALGRIND) \\
 		     \$(VALGRIND_CFLAGS) \$(XAPIAN_CXXFLAGS)             \\
 		     -DHAVE_STRCASESTR=\$(HAVE_STRCASESTR)               \\
@@ -892,5 +914,5 @@ CONFIGURE_CXXFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS)    \\
 		     -DHAVE_XAPIAN_COMPACT=\$(HAVE_XAPIAN_COMPACT)       \\
 		     -DUTIL_BYTE_ORDER=\$(UTIL_BYTE_ORDER)
 
-CONFIGURE_LDFLAGS =  \$(GMIME_LDFLAGS) \$(TALLOC_LDFLAGS) \$(XAPIAN_LDFLAGS)
+CONFIGURE_LDFLAGS =  \$(GMIME_LDFLAGS) \$(TALLOC_LDFLAGS) \$(ZLIB_LDFLAGS) \$(XAPIAN_LDFLAGS)
 EOF
diff --git a/doc/man1/notmuch-dump.rst b/doc/man1/notmuch-dump.rst
index 17d1da5..d94cb4f 100644
--- a/doc/man1/notmuch-dump.rst
+++ b/doc/man1/notmuch-dump.rst
@@ -19,6 +19,9 @@ recreated from the messages themselves. The output of notmuch dump is
 therefore the only critical thing to backup (and much more friendly to
 incremental backup than the native database files.)
 
+``--gzip``
+    Compress the output in a format compatible with **gzip(1)**.
+
 ``--format=(sup|batch-tag)``
     Notmuch restore supports two plain text dump formats, both with one
     message-id per line, followed by a list of tags.
diff --git a/notmuch-client.h b/notmuch-client.h
index d110648..e1efbe0 100644
--- a/notmuch-client.h
+++ b/notmuch-client.h
@@ -450,7 +450,9 @@ typedef enum dump_formats {
 int
 notmuch_database_dump (notmuch_database_t *notmuch,
 		       const char *output_file_name,
-		       const char *query_str, dump_format_t output_format);
+		       const char *query_str,
+		       dump_format_t output_format,
+		       notmuch_bool_t gzip_output);
 
 #include "command-line-arguments.h"
 #endif
diff --git a/notmuch-dump.c b/notmuch-dump.c
index 21702d7..2a7252a 100644
--- a/notmuch-dump.c
+++ b/notmuch-dump.c
@@ -21,9 +21,11 @@
 #include "notmuch-client.h"
 #include "hex-escape.h"
 #include "string-util.h"
+#include <zlib.h>
+
 
 static int
-database_dump_file (notmuch_database_t *notmuch, FILE *output,
+database_dump_file (notmuch_database_t *notmuch, gzFile output,
 		    const char *query_str, int output_format)
 {
     notmuch_query_t *query;
@@ -69,7 +71,7 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
 	}
 
 	if (output_format == DUMP_FORMAT_SUP) {
-	    fprintf (output, "%s (", message_id);
+	    gzprintf (output, "%s (", message_id);
 	}
 
 	for (tags = notmuch_message_get_tags (message);
@@ -78,12 +80,12 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
 	    const char *tag_str = notmuch_tags_get (tags);
 
 	    if (! first)
-		fputs (" ", output);
+		gzputs (output, " ");
 
 	    first = 0;
 
 	    if (output_format == DUMP_FORMAT_SUP) {
-		fputs (tag_str, output);
+		gzputs (output, tag_str);
 	    } else {
 		if (hex_encode (notmuch, tag_str,
 				&buffer, &buffer_size) != HEX_SUCCESS) {
@@ -91,12 +93,12 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
 			     tag_str);
 		    return EXIT_FAILURE;
 		}
-		fprintf (output, "+%s", buffer);
+		gzprintf (output, "+%s", buffer);
 	    }
 	}
 
 	if (output_format == DUMP_FORMAT_SUP) {
-	    fputs (")\n", output);
+	    gzputs (output, ")\n");
 	} else {
 	    if (make_boolean_term (notmuch, "id", message_id,
 				   &buffer, &buffer_size)) {
@@ -104,7 +106,7 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
 			     message_id, strerror (errno));
 		    return EXIT_FAILURE;
 	    }
-	    fprintf (output, " -- %s\n", buffer);
+	    gzprintf (output, " -- %s\n", buffer);
 	}
 
 	notmuch_message_destroy (message);
@@ -121,24 +123,77 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
 int
 notmuch_database_dump (notmuch_database_t *notmuch,
 		       const char *output_file_name,
-		       const char *query_str, dump_format_t output_format)
+		       const char *query_str,
+		       dump_format_t output_format,
+		       notmuch_bool_t gzip_output)
 {
-    FILE *output = stdout;
-    int ret;
+    gzFile output;
+    const char *mode = gzip_output ? "w9" : "wT";
+    const char *name_for_error = output_file_name ? output_file_name : "stdout";
+
+    char *tempname = NULL;
+    int outfd = -1;
+
+    int ret = -1;
 
     if (output_file_name) {
-	output = fopen (output_file_name, "w");
-	if (output == NULL) {
-	    fprintf (stderr, "Error opening %s for writing: %s\n",
-		     output_file_name, strerror (errno));
-	    return EXIT_FAILURE;
-	}
+	tempname = talloc_asprintf (notmuch, "%s.XXXXXX", output_file_name);
+	outfd = mkstemp (tempname);
+    } else {
+	outfd = dup (STDOUT_FILENO);
+    }
+
+    if (outfd < 0) {
+	fprintf (stderr, "Bad output file %s\n", name_for_error);
+	goto DONE;
+    }
+
+    output = gzdopen (outfd, mode);
+
+    if (output == NULL) {
+	fprintf (stderr, "Error opening %s for (gzip) writing: %s\n",
+		 name_for_error, strerror (errno));
+	if (close (outfd))
+	    fprintf (stderr, "Error closing %s during shutdown: %s\n",
+		 name_for_error, strerror (errno));
+	goto DONE;
     }
 
     ret = database_dump_file (notmuch, output, query_str, output_format);
+    if (ret) goto DONE;
 
-    if (output != stdout)
-	fclose (output);
+    ret = gzflush (output, Z_FINISH);
+    if (ret) {
+	fprintf (stderr, "Error flushing output: %s\n", gzerror (output, NULL));
+	goto DONE;
+    }
+
+    if (output_file_name) {
+	ret = fdatasync (outfd);
+	if (ret) {
+	    fprintf (stderr, "Error syncing %s to disk: %s\n",
+		     name_for_error, strerror (errno));
+	    goto DONE;
+	}
+    }
+
+    if (gzclose_w (output) != Z_OK) {
+	ret = EXIT_FAILURE;
+	goto DONE;
+    }
+
+    if (output_file_name) {
+	ret = rename (tempname, output_file_name);
+	if (ret) {
+	    fprintf (stderr, "Error renaming %s to %s: %s\n",
+		     tempname, output_file_name, strerror (errno));
+	    goto DONE;
+	}
+
+    }
+ DONE:
+    if (ret != EXIT_SUCCESS && output_file_name)
+	(void) unlink (tempname);
 
     return ret;
 }
@@ -158,6 +213,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[])
     int opt_index;
 
     int output_format = DUMP_FORMAT_BATCH_TAG;
+    notmuch_bool_t gzip_output = 0;
 
     notmuch_opt_desc_t options[] = {
 	{ NOTMUCH_OPT_KEYWORD, &output_format, "format", 'f',
@@ -165,6 +221,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[])
 				  { "batch-tag", DUMP_FORMAT_BATCH_TAG },
 				  { 0, 0 } } },
 	{ NOTMUCH_OPT_STRING, &output_file_name, "output", 'o', 0  },
+	{ NOTMUCH_OPT_BOOLEAN, &gzip_output, "gzip", 'z', 0 },
 	{ 0, 0, 0, 0, 0 }
     };
 
@@ -181,7 +238,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[])
     }
 
     ret = notmuch_database_dump (notmuch, output_file_name, query_str,
-				 output_format);
+				 output_format, gzip_output);
 
     notmuch_database_destroy (notmuch);
 
diff --git a/test/T240-dump-restore.sh b/test/T240-dump-restore.sh
index 0004438..d79aca8 100755
--- a/test/T240-dump-restore.sh
+++ b/test/T240-dump-restore.sh
@@ -68,6 +68,18 @@ test_begin_subtest "dump --output=outfile --"
 notmuch dump --output=dump-1-arg-dash.actual --
 test_expect_equal_file dump.expected dump-1-arg-dash.actual
 
+# gzipped output
+
+test_begin_subtest "dump --gzip"
+notmuch dump --gzip > dump-gzip.gz
+gunzip dump-gzip.gz
+test_expect_equal_file dump.expected dump-gzip
+
+test_begin_subtest "dump --gzip --output=outfile"
+notmuch dump --gzip --output=dump-gzip-outfile.gz
+gunzip dump-gzip-outfile.gz
+test_expect_equal_file dump.expected dump-gzip-outfile
+
 # Note, we assume all messages from cworth have a message-id
 # containing cworth.org
 
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Patch v6 2/6] util: add gz_readline
  2014-04-03 19:41 v6 gzipped dump/restore David Bremner
  2014-04-03 19:41 ` [Patch v6 1/6] dump: support gzipped and atomic output David Bremner
@ 2014-04-03 19:41 ` David Bremner
  2014-04-03 19:41 ` [Patch v6 3/6] test: restore with missing final newline David Bremner
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 12+ messages in thread
From: David Bremner @ 2014-04-03 19:41 UTC (permalink / raw)
  To: notmuch

The idea is to provide a more or less drop in replacement for readline
to read from zlib/gzip streams.  Take the opportunity to replace
malloc with talloc.
---
 util/Makefile.local |  2 +-
 util/util.h         | 12 +++++++++
 util/zlib-extra.c   | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 util/zlib-extra.h   | 11 ++++++++
 4 files changed, 101 insertions(+), 1 deletion(-)
 create mode 100644 util/util.h
 create mode 100644 util/zlib-extra.c
 create mode 100644 util/zlib-extra.h

diff --git a/util/Makefile.local b/util/Makefile.local
index 29c0ce6..e2a5b65 100644
--- a/util/Makefile.local
+++ b/util/Makefile.local
@@ -4,7 +4,7 @@ dir := util
 extra_cflags += -I$(srcdir)/$(dir)
 
 libutil_c_srcs := $(dir)/xutil.c $(dir)/error_util.c $(dir)/hex-escape.c \
-		  $(dir)/string-util.c $(dir)/talloc-extra.c
+		  $(dir)/string-util.c $(dir)/talloc-extra.c $(dir)/zlib-extra.c
 
 libutil_modules := $(libutil_c_srcs:.c=.o)
 
diff --git a/util/util.h b/util/util.h
new file mode 100644
index 0000000..8663cfc
--- /dev/null
+++ b/util/util.h
@@ -0,0 +1,12 @@
+#ifndef _UTIL_H
+#define _UTIL_H
+
+typedef enum util_status {
+    UTIL_SUCCESS = 0,
+    UTIL_ERROR = 1,
+    UTIL_OUT_OF_MEMORY,
+    UTIL_EOF,
+    UTIL_FILE,
+} util_status_t;
+
+#endif
diff --git a/util/zlib-extra.c b/util/zlib-extra.c
new file mode 100644
index 0000000..922ab52
--- /dev/null
+++ b/util/zlib-extra.c
@@ -0,0 +1,77 @@
+/* zlib-extra.c -  Extra or enhanced routines for compressed I/O.
+ *
+ * Copyright (c) 2014 David Bremner
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/ .
+ *
+ * Author: David Bremner <david@tethera.net>
+ */
+
+#include "zlib-extra.h"
+#include <talloc.h>
+#include <stdio.h>
+#include <string.h>
+
+/* mimic POSIX/glibc getline, but on a zlib gzFile stream, and using talloc */
+util_status_t
+gz_getline (void *talloc_ctx, char **bufptr, ssize_t *bytes_read, gzFile stream)
+{
+    char *buf = *bufptr;
+    unsigned int len;
+    size_t offset = 0;
+
+    if (buf) {
+	len = talloc_array_length (buf);
+    } else {
+	/* same as getdelim from gnulib */
+	len = 120;
+	buf = talloc_array (talloc_ctx, char, len);
+	if (buf == NULL)
+	    return UTIL_OUT_OF_MEMORY;
+    }
+
+    while (1) {
+	if (! gzgets (stream, buf + offset, len - offset)) {
+	    /* Null indicates EOF or error */
+	    int zlib_status = 0;
+	    (void) gzerror (stream, &zlib_status);
+	    switch (zlib_status) {
+	    case Z_OK:
+		/* no data read before EOF */
+		if (offset == 0)
+		    return UTIL_EOF;
+		else
+		    goto SUCCESS;
+	    case Z_ERRNO:
+		return UTIL_FILE;
+	    default:
+		return UTIL_ERROR;
+	    }
+	}
+
+	offset += strlen (buf + offset);
+
+	if (buf[offset - 1] == '\n')
+	    goto SUCCESS;
+
+	len *= 2;
+	buf = talloc_realloc (talloc_ctx, buf, char, len);
+	if (buf == NULL)
+	    return UTIL_OUT_OF_MEMORY;
+    }
+ SUCCESS:
+    *bufptr = buf;
+    *bytes_read = offset;
+    return UTIL_SUCCESS;
+}
diff --git a/util/zlib-extra.h b/util/zlib-extra.h
new file mode 100644
index 0000000..ed67115
--- /dev/null
+++ b/util/zlib-extra.h
@@ -0,0 +1,11 @@
+#ifndef _ZLIB_EXTRA_H
+#define _ZLIB_EXTRA_H
+
+#include "util.h"
+#include <zlib.h>
+
+/* Like getline, but read from a gzFile. Allocation is with talloc */
+util_status_t
+gz_getline (void *ctx, char **lineptr, ssize_t *bytes_read, gzFile stream);
+
+#endif
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Patch v6 3/6] test: restore with missing final newline
  2014-04-03 19:41 v6 gzipped dump/restore David Bremner
  2014-04-03 19:41 ` [Patch v6 1/6] dump: support gzipped and atomic output David Bremner
  2014-04-03 19:41 ` [Patch v6 2/6] util: add gz_readline David Bremner
@ 2014-04-03 19:41 ` David Bremner
  2014-04-03 19:41 ` [Patch v6 4/6] restore: transparently support gzipped input David Bremner
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 12+ messages in thread
From: David Bremner @ 2014-04-03 19:41 UTC (permalink / raw)
  To: notmuch

Recent proposed patches for gzipped input had a bug with handling
missing newlines that was not caught by the current test suite
---
 test/T240-dump-restore.sh | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/test/T240-dump-restore.sh b/test/T240-dump-restore.sh
index d79aca8..b6d8602 100755
--- a/test/T240-dump-restore.sh
+++ b/test/T240-dump-restore.sh
@@ -110,6 +110,15 @@ notmuch dump --format=batch-tag from:cworth | sed 's/^.*-- id://' | \
     sort > OUTPUT.$test_count
 test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
 
+test_begin_subtest "format=batch-tag, missing newline"
+printf "+a_tag_without_newline -- id:20091117232137.GA7669@griffis1.net" > IN
+notmuch restore --accumulate < IN
+notmuch dump id:20091117232137.GA7669@griffis1.net > OUT
+cat <<EOF > EXPECTED
++a_tag_without_newline +inbox +unread -- id:20091117232137.GA7669@griffis1.net
+EOF
+test_expect_equal_file EXPECTED OUT
+
 test_begin_subtest "format=batch-tag, # round-trip"
 notmuch dump --format=sup | sort > EXPECTED.$test_count
 notmuch dump --format=batch-tag | notmuch restore --format=batch-tag
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Patch v6 4/6] restore: transparently support gzipped input
  2014-04-03 19:41 v6 gzipped dump/restore David Bremner
                   ` (2 preceding siblings ...)
  2014-04-03 19:41 ` [Patch v6 3/6] test: restore with missing final newline David Bremner
@ 2014-04-03 19:41 ` David Bremner
  2014-04-04 21:56   ` Austin Clements
  2014-04-04 22:10   ` Austin Clements
  2014-04-03 19:41 ` [Patch v6 5/6] notmuch-new: backup tags before database upgrade David Bremner
  2014-04-03 19:41 ` [Patch v6 6/6] test: verify tag backup generated by " David Bremner
  5 siblings, 2 replies; 12+ messages in thread
From: David Bremner @ 2014-04-03 19:41 UTC (permalink / raw)
  To: notmuch

We rely completely on zlib to do the right thing in detecting gzipped
input. Since our dump format is chosen to be 7 bit ascii, this should
be fine.
---
 doc/man1/notmuch-restore.rst |  8 ++++++++
 notmuch-restore.c            | 41 ++++++++++++++++++++++++++---------------
 test/T240-dump-restore.sh    | 14 ++++++++++++++
 3 files changed, 48 insertions(+), 15 deletions(-)

diff --git a/doc/man1/notmuch-restore.rst b/doc/man1/notmuch-restore.rst
index d6cf19a..936b138 100644
--- a/doc/man1/notmuch-restore.rst
+++ b/doc/man1/notmuch-restore.rst
@@ -50,6 +50,14 @@ Supported options for **restore** include
             format, this heuristic, based the fact that batch-tag format
             contains no parentheses, should be accurate.
 
+GZIPPED INPUT
+=============
+
+\ **notmuch restore** will detect if the input is compressed in
+**gzip(1)** format and automatically decompress it while reading. This
+detection does not depend on file naming and in particular works for
+standard input.
+
 SEE ALSO
 ========
 
diff --git a/notmuch-restore.c b/notmuch-restore.c
index c54d513..eb5b7b2 100644
--- a/notmuch-restore.c
+++ b/notmuch-restore.c
@@ -22,6 +22,7 @@
 #include "hex-escape.h"
 #include "tag-util.h"
 #include "string-util.h"
+#include "zlib-extra.h"
 
 static regex_t regex;
 
@@ -128,10 +129,9 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
     tag_op_list_t *tag_ops;
 
     char *input_file_name = NULL;
-    FILE *input = stdin;
+    gzFile input;
     char *line = NULL;
     void *line_ctx = NULL;
-    size_t line_size;
     ssize_t line_len;
 
     int ret = 0;
@@ -163,13 +163,23 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
     if (! accumulate)
 	flags |= TAG_FLAG_REMOVE_ALL;
 
-    if (input_file_name) {
-	input = fopen (input_file_name, "r");
-	if (input == NULL) {
-	    fprintf (stderr, "Error opening %s for reading: %s\n",
-		     input_file_name, strerror (errno));
+    if (input_file_name)
+	input = gzopen (input_file_name, "r");
+    else {
+	int infd = dup (STDIN_FILENO);
+	if (infd < 0) {
+	    fprintf (stderr, "Error duping stdin\n");
 	    return EXIT_FAILURE;
 	}
+	input = gzdopen (infd, "r");
+	if (! input)
+	    close (infd);
+    }
+
+    if (input == NULL) {
+	fprintf (stderr, "Error opening %s for (gzip) reading: %s\n",
+		 input_file_name ? input_file_name : "stdin", strerror (errno));
+	return EXIT_FAILURE;
     }
 
     if (opt_index < argc) {
@@ -184,12 +194,17 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
     }
 
     do {
-	line_len = getline (&line, &line_size, input);
+	util_status_t status;
+
+	status = gz_getline (line_ctx, &line, &line_len, input);
 
 	/* empty input file not considered an error */
-	if (line_len < 0)
+	if (status == UTIL_EOF)
 	    return EXIT_SUCCESS;
 
+	if (status)
+	    return EXIT_FAILURE;
+
     } while ((line_len == 0) ||
 	     (line[0] == '#') ||
 	     /* the cast is safe because we checked about for line_len < 0 */
@@ -254,7 +269,7 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
 	if (ret)
 	    break;
 
-    }  while ((line_len = getline (&line, &line_size, input)) != -1);
+    }  while (gz_getline (line_ctx, &line, &line_len, input) == UTIL_SUCCESS);
 
     if (line_ctx != NULL)
 	talloc_free (line_ctx);
@@ -262,13 +277,9 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
     if (input_format == DUMP_FORMAT_SUP)
 	regfree (&regex);
 
-    if (line)
-	free (line);
-
     notmuch_database_destroy (notmuch);
 
-    if (input != stdin)
-	fclose (input);
+    gzclose_r (input);
 
     return ret ? EXIT_FAILURE : EXIT_SUCCESS;
 }
diff --git a/test/T240-dump-restore.sh b/test/T240-dump-restore.sh
index b6d8602..efe463e 100755
--- a/test/T240-dump-restore.sh
+++ b/test/T240-dump-restore.sh
@@ -80,6 +80,20 @@ notmuch dump --gzip --output=dump-gzip-outfile.gz
 gunzip dump-gzip-outfile.gz
 test_expect_equal_file dump.expected dump-gzip-outfile
 
+test_begin_subtest "restoring gzipped stdin"
+notmuch dump --gzip --output=backup.gz
+notmuch tag +new_tag '*'
+notmuch restore < backup.gz
+notmuch dump --output=dump.actual
+test_expect_equal_file dump.expected dump.actual
+
+test_begin_subtest "restoring gzipped file"
+notmuch dump --gzip --output=backup.gz
+notmuch tag +new_tag '*'
+notmuch restore --input=backup.gz
+notmuch dump --output=dump.actual
+test_expect_equal_file dump.expected dump.actual
+
 # Note, we assume all messages from cworth have a message-id
 # containing cworth.org
 
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Patch v6 5/6] notmuch-new: backup tags before database upgrade
  2014-04-03 19:41 v6 gzipped dump/restore David Bremner
                   ` (3 preceding siblings ...)
  2014-04-03 19:41 ` [Patch v6 4/6] restore: transparently support gzipped input David Bremner
@ 2014-04-03 19:41 ` David Bremner
  2014-04-03 19:41 ` [Patch v6 6/6] test: verify tag backup generated by " David Bremner
  5 siblings, 0 replies; 12+ messages in thread
From: David Bremner @ 2014-04-03 19:41 UTC (permalink / raw)
  To: notmuch

All we do here is calculate the backup filename, and call the existing
dump routine.

Also take the opportity to add a message about being safe to
interrupt.
---
 notmuch-new.c        | 29 ++++++++++++++++++++++++++++-
 test/T530-upgrade.sh |  4 +++-
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/notmuch-new.c b/notmuch-new.c
index 82acf69..d269c7c 100644
--- a/notmuch-new.c
+++ b/notmuch-new.c
@@ -989,8 +989,35 @@ notmuch_new_command (notmuch_config_t *config, int argc, char *argv[])
 	    return EXIT_FAILURE;
 
 	if (notmuch_database_needs_upgrade (notmuch)) {
-	    if (add_files_state.verbosity >= VERBOSITY_NORMAL)
+	    time_t now = time (NULL);
+	    struct tm *gm_time = gmtime (&now);
+
+	    /* since dump files are written atomically, the amount of
+	     * harm from overwriting one within a second seems
+	     * relatively small. */
+
+	    const char *backup_name =
+		talloc_asprintf (notmuch, "%s/dump-%04d%02d%02dT%02d%02d%02d.gz",
+				 dot_notmuch_path,
+				 gm_time->tm_year + 1900,
+				 gm_time->tm_mon + 1,
+				 gm_time->tm_mday,
+				 gm_time->tm_hour,
+				 gm_time->tm_min,
+				 gm_time->tm_sec);
+
+	    if (add_files_state.verbosity >= VERBOSITY_NORMAL) {
 		printf ("Welcome to a new version of notmuch! Your database will now be upgraded.\n");
+		printf ("This process is safe to interrupt.\n");
+		printf ("Backing up tags to %s...\n", backup_name);
+	    }
+
+	    if (notmuch_database_dump (notmuch, backup_name, "",
+				       DUMP_FORMAT_BATCH_TAG, TRUE)) {
+		fprintf (stderr, "Backup failed. Aborting upgrade.");
+		return EXIT_FAILURE;
+	    }
+
 	    gettimeofday (&add_files_state.tv_start, NULL);
 	    notmuch_database_upgrade (notmuch,
 				      add_files_state.verbosity >= VERBOSITY_NORMAL ? upgrade_print_progress : NULL,
diff --git a/test/T530-upgrade.sh b/test/T530-upgrade.sh
index 67bbf31..d46e3d1 100755
--- a/test/T530-upgrade.sh
+++ b/test/T530-upgrade.sh
@@ -26,9 +26,11 @@ output=$(notmuch search path:foo)
 test_expect_equal "$output" ""
 
 test_begin_subtest "database upgrade from format version 1"
-output=$(notmuch new)
+output=$(notmuch new | sed -e 's/^Backing up tags to .*$/Backing up tags to FILENAME/')
 test_expect_equal "$output" "\
 Welcome to a new version of notmuch! Your database will now be upgraded.
+This process is safe to interrupt.
+Backing up tags to FILENAME
 Your notmuch database has now been upgraded to database format version 2.
 No new mail."
 
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Patch v6 6/6] test: verify tag backup generated by database upgrade
  2014-04-03 19:41 v6 gzipped dump/restore David Bremner
                   ` (4 preceding siblings ...)
  2014-04-03 19:41 ` [Patch v6 5/6] notmuch-new: backup tags before database upgrade David Bremner
@ 2014-04-03 19:41 ` David Bremner
  5 siblings, 0 replies; 12+ messages in thread
From: David Bremner @ 2014-04-03 19:41 UTC (permalink / raw)
  To: notmuch

'pre upgrade dump' is not much of a test, but at least this way we get
somewhat sensible behaviour if it fails.
---
 test/T530-upgrade.sh | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/test/T530-upgrade.sh b/test/T530-upgrade.sh
index d46e3d1..7d5d5aa 100755
--- a/test/T530-upgrade.sh
+++ b/test/T530-upgrade.sh
@@ -25,6 +25,8 @@ test_begin_subtest "path: search does not work with old database version"
 output=$(notmuch search path:foo)
 test_expect_equal "$output" ""
 
+test_expect_success 'pre upgrade dump' 'notmuch dump | sort > pre-upgrade-dump'
+
 test_begin_subtest "database upgrade from format version 1"
 output=$(notmuch new | sed -e 's/^Backing up tags to .*$/Backing up tags to FILENAME/')
 test_expect_equal "$output" "\
@@ -34,6 +36,10 @@ Backing up tags to FILENAME
 Your notmuch database has now been upgraded to database format version 2.
 No new mail."
 
+test_begin_subtest "tag backup matches pre-upgrade dump"
+gunzip -c ${MAIL_DIR}/.notmuch/dump-*.gz | sort > backup-dump
+test_expect_equal_file pre-upgrade-dump backup-dump
+
 test_begin_subtest "folder: no longer matches in the middle of path"
 output=$(notmuch search folder:baz)
 test_expect_equal "$output" ""
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [Patch v6 1/6] dump: support gzipped and atomic output
  2014-04-03 19:41 ` [Patch v6 1/6] dump: support gzipped and atomic output David Bremner
@ 2014-04-04  2:32   ` David Bremner
  2014-04-04 10:51   ` Tomi Ollila
  2014-04-04 22:05   ` Austin Clements
  2 siblings, 0 replies; 12+ messages in thread
From: David Bremner @ 2014-04-04  2:32 UTC (permalink / raw)
  To: notmuch

David Bremner <david@tethera.net> writes:

> The main goal is to support gzipped output for future internal
> calls (e.g. from notmuch-new) to notmuch_database_dump.

Apologies if this later turns out to be duplicate. I'm not sure what
happened to the cover letter for this series.

This supercedes 

id:1396401381-18128-1-git-send-email-david@tethera.net

I tried to take care of Austin and Tomi's comments, but there were
several messages so hopefully I didn't miss anything.

I ended up merging the "gzipped output" and "atomic output" patches; I
fought it for a bit, but I realized I would have to do some of the
proposed changes twice and throw away the first one.

diff from v5 follows:

diff --git a/INSTALL b/INSTALL
index df318fa..b543c50 100644
--- a/INSTALL
+++ b/INSTALL
@@ -67,6 +67,9 @@ Talloc, and zlib which are each described below:
 	by Xapian, so if you installed that you will already have
 	zlib. You may need to install the zlib headers separately.
 
+	Notmuch needs the transparent write feature of zlib introduced
+	in version 1.2.5.2 (Dec. 2011).
+
 	zlib is available from http://zlib.net
 
 Building Documentation
diff --git a/configure b/configure
index d685ab3..1d624f7 100755
--- a/configure
+++ b/configure
@@ -340,9 +340,9 @@ else
     errors=$((errors + 1))
 fi
 
-printf "Checking for zlib development files... "
+printf "Checking for zlib (>= 1.2.5.2)... "
 have_zlib=0
-if pkg-config --exists zlib; then
+if pkg-config --atleast-version=1.2.5.2 zlib; then
     printf "Yes.\n"
     have_zlib=1
     zlib_cflags=$(pkg-config --cflags zlib)
diff --git a/notmuch-dump.c b/notmuch-dump.c
index 05ed6b4..2a7252a 100644
--- a/notmuch-dump.c
+++ b/notmuch-dump.c
@@ -140,7 +140,7 @@ notmuch_database_dump (notmuch_database_t *notmuch,
 	tempname = talloc_asprintf (notmuch, "%s.XXXXXX", output_file_name);
 	outfd = mkstemp (tempname);
     } else {
-	outfd = fileno (stdout);
+	outfd = dup (STDOUT_FILENO);
     }
 
     if (outfd < 0) {
@@ -153,6 +153,9 @@ notmuch_database_dump (notmuch_database_t *notmuch,
     if (output == NULL) {
 	fprintf (stderr, "Error opening %s for (gzip) writing: %s\n",
 		 name_for_error, strerror (errno));
+	if (close (outfd))
+	    fprintf (stderr, "Error closing %s during shutdown: %s\n",
+		 name_for_error, strerror (errno));
 	goto DONE;
     }
 
@@ -165,22 +168,25 @@ notmuch_database_dump (notmuch_database_t *notmuch,
 	goto DONE;
     }
 
-    ret = fdatasync (outfd);
-    if (ret) {
-	perror ("fdatasync");
-	goto DONE;
-    }
-
     if (output_file_name) {
-	ret = gzclose_w (output);
-	if (ret != Z_OK) {
-	    ret = EXIT_FAILURE;
+	ret = fdatasync (outfd);
+	if (ret) {
+	    fprintf (stderr, "Error syncing %s to disk: %s\n",
+		     name_for_error, strerror (errno));
 	    goto DONE;
 	}
+    }
 
+    if (gzclose_w (output) != Z_OK) {
+	ret = EXIT_FAILURE;
+	goto DONE;
+    }
+
+    if (output_file_name) {
 	ret = rename (tempname, output_file_name);
 	if (ret) {
-	    perror ("rename");
+	    fprintf (stderr, "Error renaming %s to %s: %s\n",
+		     tempname, output_file_name, strerror (errno));
 	    goto DONE;
 	}
 
diff --git a/notmuch-new.c b/notmuch-new.c
index e0431c6..d269c7c 100644
--- a/notmuch-new.c
+++ b/notmuch-new.c
@@ -997,7 +997,7 @@ notmuch_new_command (notmuch_config_t *config, int argc, char *argv[])
 	     * relatively small. */
 
 	    const char *backup_name =
-		talloc_asprintf (notmuch, "%s/backup-%04d-%02d-%02d-%02d:%02d:%02d.gz",
+		talloc_asprintf (notmuch, "%s/dump-%04d%02d%02dT%02d%02d%02d.gz",
 				 dot_notmuch_path,
 				 gm_time->tm_year + 1900,
 				 gm_time->tm_mon + 1,
@@ -1009,12 +1009,12 @@ notmuch_new_command (notmuch_config_t *config, int argc, char *argv[])
 	    if (add_files_state.verbosity >= VERBOSITY_NORMAL) {
 		printf ("Welcome to a new version of notmuch! Your database will now be upgraded.\n");
 		printf ("This process is safe to interrupt.\n");
-		printf ("Backing up tags to %s\n", backup_name);
+		printf ("Backing up tags to %s...\n", backup_name);
 	    }
 
 	    if (notmuch_database_dump (notmuch, backup_name, "",
 				       DUMP_FORMAT_BATCH_TAG, TRUE)) {
-		fprintf (stderr, "Backup failed. Aborting upgrade");
+		fprintf (stderr, "Backup failed. Aborting upgrade.");
 		return EXIT_FAILURE;
 	    }
 
diff --git a/notmuch-restore.c b/notmuch-restore.c
index 86bce20..eb5b7b2 100644
--- a/notmuch-restore.c
+++ b/notmuch-restore.c
@@ -132,7 +132,6 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
     gzFile input;
     char *line = NULL;
     void *line_ctx = NULL;
-    size_t line_size;
     ssize_t line_len;
 
     int ret = 0;
@@ -166,8 +165,16 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
 
     if (input_file_name)
 	input = gzopen (input_file_name, "r");
-    else
-	input = gzdopen (fileno (stdin), "r");
+    else {
+	int infd = dup (STDIN_FILENO);
+	if (infd < 0) {
+	    fprintf (stderr, "Error duping stdin\n");
+	    return EXIT_FAILURE;
+	}
+	input = gzdopen (infd, "r");
+	if (! input)
+	    close (infd);
+    }
 
     if (input == NULL) {
 	fprintf (stderr, "Error opening %s for (gzip) reading: %s\n",
@@ -189,7 +196,7 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
     do {
 	util_status_t status;
 
-	status = gz_getline (line_ctx, &line, &line_size, &line_len, input);
+	status = gz_getline (line_ctx, &line, &line_len, input);
 
 	/* empty input file not considered an error */
 	if (status == UTIL_EOF)
@@ -262,7 +269,7 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
 	if (ret)
 	    break;
 
-    }  while (gz_getline (line_ctx, &line, &line_size, &line_len, input) == UTIL_SUCCESS);
+    }  while (gz_getline (line_ctx, &line, &line_len, input) == UTIL_SUCCESS);
 
     if (line_ctx != NULL)
 	talloc_free (line_ctx);
@@ -272,8 +279,7 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
 
     notmuch_database_destroy (notmuch);
 
-    if (input_file_name != NULL)
-	gzclose_r (input);
+    gzclose_r (input);
 
     return ret ? EXIT_FAILURE : EXIT_SUCCESS;
 }
diff --git a/test/T240-dump-restore.sh b/test/T240-dump-restore.sh
index 50d4d48..efe463e 100755
--- a/test/T240-dump-restore.sh
+++ b/test/T240-dump-restore.sh
@@ -124,6 +124,15 @@ notmuch dump --format=batch-tag from:cworth | sed 's/^.*-- id://' | \
     sort > OUTPUT.$test_count
 test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
 
+test_begin_subtest "format=batch-tag, missing newline"
+printf "+a_tag_without_newline -- id:20091117232137.GA7669@griffis1.net" > IN
+notmuch restore --accumulate < IN
+notmuch dump id:20091117232137.GA7669@griffis1.net > OUT
+cat <<EOF > EXPECTED
++a_tag_without_newline +inbox +unread -- id:20091117232137.GA7669@griffis1.net
+EOF
+test_expect_equal_file EXPECTED OUT
+
 test_begin_subtest "format=batch-tag, # round-trip"
 notmuch dump --format=sup | sort > EXPECTED.$test_count
 notmuch dump --format=batch-tag | notmuch restore --format=batch-tag
diff --git a/test/T530-upgrade.sh b/test/T530-upgrade.sh
index 7a68ddf..7d5d5aa 100755
--- a/test/T530-upgrade.sh
+++ b/test/T530-upgrade.sh
@@ -37,7 +37,7 @@ Your notmuch database has now been upgraded to database format version 2.
 No new mail."
 
 test_begin_subtest "tag backup matches pre-upgrade dump"
-gunzip -c ${MAIL_DIR}/.notmuch/backup-*.gz | sort > backup-dump
+gunzip -c ${MAIL_DIR}/.notmuch/dump-*.gz | sort > backup-dump
 test_expect_equal_file pre-upgrade-dump backup-dump
 
 test_begin_subtest "folder: no longer matches in the middle of path"
diff --git a/util/zlib-extra.c b/util/zlib-extra.c
index cb1eba0..922ab52 100644
--- a/util/zlib-extra.c
+++ b/util/zlib-extra.c
@@ -25,34 +25,36 @@
 
 /* mimic POSIX/glibc getline, but on a zlib gzFile stream, and using talloc */
 util_status_t
-gz_getline (void *talloc_ctx, char **bufptr, size_t *bufsiz, ssize_t *bytes_read,
-	    gzFile stream)
+gz_getline (void *talloc_ctx, char **bufptr, ssize_t *bytes_read, gzFile stream)
 {
-    size_t len = *bufsiz;
     char *buf = *bufptr;
+    unsigned int len;
     size_t offset = 0;
 
-    if (len == 0 || buf == NULL) {
+    if (buf) {
+	len = talloc_array_length (buf);
+    } else {
 	/* same as getdelim from gnulib */
 	len = 120;
-	buf = talloc_size (talloc_ctx, len);
+	buf = talloc_array (talloc_ctx, char, len);
 	if (buf == NULL)
 	    return UTIL_OUT_OF_MEMORY;
     }
 
     while (1) {
 	if (! gzgets (stream, buf + offset, len - offset)) {
+	    /* Null indicates EOF or error */
 	    int zlib_status = 0;
 	    (void) gzerror (stream, &zlib_status);
 	    switch (zlib_status) {
 	    case Z_OK:
-		/* follow getline behaviour */
-		*bytes_read = -1;
-		return UTIL_EOF;
-		break;
+		/* no data read before EOF */
+		if (offset == 0)
+		    return UTIL_EOF;
+		else
+		    goto SUCCESS;
 	    case Z_ERRNO:
 		return UTIL_FILE;
-		break;
 	    default:
 		return UTIL_ERROR;
 	    }
@@ -60,17 +62,16 @@ gz_getline (void *talloc_ctx, char **bufptr, size_t *bufsiz, ssize_t *bytes_read
 
 	offset += strlen (buf + offset);
 
-	if ( buf[offset - 1] == '\n' )
-	    break;
+	if (buf[offset - 1] == '\n')
+	    goto SUCCESS;
 
 	len *= 2;
 	buf = talloc_realloc (talloc_ctx, buf, char, len);
 	if (buf == NULL)
 	    return UTIL_OUT_OF_MEMORY;
     }
-
+ SUCCESS:
     *bufptr = buf;
-    *bufsiz = len;
     *bytes_read = offset;
     return UTIL_SUCCESS;
 }
diff --git a/util/zlib-extra.h b/util/zlib-extra.h
index ed46ac1..ed67115 100644
--- a/util/zlib-extra.h
+++ b/util/zlib-extra.h
@@ -1,11 +1,11 @@
 #ifndef _ZLIB_EXTRA_H
 #define _ZLIB_EXTRA_H
 
-#include <zlib.h>
 #include "util.h"
+#include <zlib.h>
+
 /* Like getline, but read from a gzFile. Allocation is with talloc */
 util_status_t
-gz_getline (void *ctx, char **lineptr, size_t *line_size, ssize_t *bytes_read,
-	    gzFile stream);
+gz_getline (void *ctx, char **lineptr, ssize_t *bytes_read, gzFile stream);
 
 #endif

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [Patch v6 1/6] dump: support gzipped and atomic output
  2014-04-03 19:41 ` [Patch v6 1/6] dump: support gzipped and atomic output David Bremner
  2014-04-04  2:32   ` David Bremner
@ 2014-04-04 10:51   ` Tomi Ollila
  2014-04-04 22:05   ` Austin Clements
  2 siblings, 0 replies; 12+ messages in thread
From: Tomi Ollila @ 2014-04-04 10:51 UTC (permalink / raw)
  To: David Bremner, notmuch

On Thu, Apr 03 2014, David Bremner <david@tethera.net> wrote:

> The main goal is to support gzipped output for future internal
> calls (e.g. from notmuch-new) to notmuch_database_dump.
>
> The additional dependency is not very heavy since xapian already pulls
> in zlib.
>
> We want the dump to be "atomic", in the sense that after running the
> dump file is either present and complete, or not present.  This avoids
> certain classes of mishaps involving overwriting a good backup with a
> bad or partial one.

2 things in this patch (comments inline), otherwise LGTM.

Except now I remembered. In addition to those 2 the the error message in patch 6/6:
 +	if (infd < 0) {
 +	    fprintf (stderr, "Error duping stdin\n");
to
 +	if (infd < 0) {
 +	    fprintf (stderr, "Error duping stdin: %s\n", strerror (errno));


Tomi

> ---
>  INSTALL                   | 20 ++++++++--
>  Makefile.local            |  2 +-
>  configure                 | 28 ++++++++++++--
>  doc/man1/notmuch-dump.rst |  3 ++
>  notmuch-client.h          |  4 +-
>  notmuch-dump.c            | 95 +++++++++++++++++++++++++++++++++++++----------
>  test/T240-dump-restore.sh | 12 ++++++
>  7 files changed, 136 insertions(+), 28 deletions(-)
>
> diff --git a/INSTALL b/INSTALL
> index 690b0ef..b543c50 100644
> --- a/INSTALL
> +++ b/INSTALL
> @@ -20,8 +20,8 @@ configure stage.
>  
>  Dependencies
>  ------------
> -Notmuch depends on three libraries: Xapian, GMime 2.4 or 2.6, and
> -Talloc which are each described below:
> +Notmuch depends on four libraries: Xapian, GMime 2.4 or 2.6,
> +Talloc, and zlib which are each described below:
>  
>  	Xapian
>  	------
> @@ -60,6 +60,18 @@ Talloc which are each described below:
>  
>  	Talloc is available from http://talloc.samba.org/
>  
> +	zlib
> +	----
> +
> +	zlib is an extremely popular compression library. It is used
> +	by Xapian, so if you installed that you will already have
> +	zlib. You may need to install the zlib headers separately.
> +
> +	Notmuch needs the transparent write feature of zlib introduced
> +	in version 1.2.5.2 (Dec. 2011).
> +
> +	zlib is available from http://zlib.net
> +
>  Building Documentation
>  ----------------------
>  
> @@ -79,11 +91,11 @@ dependencies with a simple simple command line. For example:
>  
>    For Debian and similar:
>  
> -        sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev python-sphinx
> +        sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev zlib1g-dev python-sphinx
>  
>    For Fedora and similar:
>  
> -	sudo yum install xapian-core-devel gmime-devel libtalloc-devel python-sphinx
> +	sudo yum install xapian-core-devel gmime-devel libtalloc-devel zlib-devel python-sphinx
>  
>  On other systems, a similar command can be used, but the details of
>  the package names may be different.
> diff --git a/Makefile.local b/Makefile.local
> index cb7b106..e5a20a7 100644
> --- a/Makefile.local
> +++ b/Makefile.local
> @@ -41,7 +41,7 @@ PV_FILE=bindings/python/notmuch/version.py
>  # Smash together user's values with our extra values
>  FINAL_CFLAGS = -DNOTMUCH_VERSION=$(VERSION) $(CPPFLAGS) $(CFLAGS) $(WARN_CFLAGS) $(extra_cflags) $(CONFIGURE_CFLAGS)
>  FINAL_CXXFLAGS = $(CPPFLAGS) $(CXXFLAGS) $(WARN_CXXFLAGS) $(extra_cflags) $(extra_cxxflags) $(CONFIGURE_CXXFLAGS)
> -FINAL_NOTMUCH_LDFLAGS = $(LDFLAGS) -Lutil -lutil -Llib -lnotmuch $(AS_NEEDED_LDFLAGS) $(GMIME_LDFLAGS) $(TALLOC_LDFLAGS)
> +FINAL_NOTMUCH_LDFLAGS = $(LDFLAGS) -Lutil -lutil -Llib -lnotmuch $(AS_NEEDED_LDFLAGS) $(GMIME_LDFLAGS) $(TALLOC_LDFLAGS) $(ZLIB_LDFLAGS)
>  FINAL_NOTMUCH_LINKER = CC
>  ifneq ($(LINKER_RESOLVES_LIBRARY_DEPENDENCIES),1)
>  FINAL_NOTMUCH_LDFLAGS += $(CONFIGURE_LDFLAGS)
> diff --git a/configure b/configure
> index 1d430b9..1d624f7 100755
> --- a/configure
> +++ b/configure
> @@ -340,6 +340,18 @@ else
>      errors=$((errors + 1))
>  fi
>  
> +printf "Checking for zlib (>= 1.2.5.2)... "
> +have_zlib=0
> +if pkg-config --atleast-version=1.2.5.2 zlib; then
> +    printf "Yes.\n"
> +    have_zlib=1
> +    zlib_cflags=$(pkg-config --cflags zlib)
> +    zlib_ldflags=$(pkg-config --libs zlib)
> +else
> +    printf "No.\n"
> +    errors=$((errors + 1))
> +fi
> +
>  printf "Checking for talloc development files... "
>  if pkg-config --exists talloc; then
>      printf "Yes.\n"
> @@ -496,6 +508,11 @@ EOF
>  	echo "	Xapian library (including development files such as headers)"
>  	echo "	http://xapian.org/"
>      fi
> +    if [ $have_zlib -eq 0 ]; then
> +	echo "	zlib library (including development files such as headers)"
> +	echo "	http://zlib.net/"
> +	echo

This message above should inform the required zlib version; this is usually
the only string user see at the end of failed configure and she may wonder
that 'Hey, I do have zlib. it is ubiquitous!'.

> +    fi
>      if [ $have_gmime -eq 0 ]; then
>  	echo "	Either GMime 2.4 library" $GMIME_24_VERSION_CTR "or GMime 2.6 library" $GMIME_26_VERSION_CTR
>  	echo "	(including development files such as headers)"
> @@ -519,11 +536,11 @@ case a simple command will install everything you need. For example:
>  
>  On Debian and similar systems:
>  
> -	sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev
> +	sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev zlib1g-dev
>  
>  Or on Fedora and similar systems:
>  
> -	sudo yum install xapian-core-devel gmime-devel libtalloc-devel
> +	sudo yum install xapian-core-devel gmime-devel libtalloc-devel zlib-devel
>  
>  On other systems, similar commands can be used, but the details of the
>  package names may be different.
> @@ -844,6 +861,10 @@ XAPIAN_LDFLAGS = ${xapian_ldflags}
>  GMIME_CFLAGS = ${gmime_cflags}
>  GMIME_LDFLAGS = ${gmime_ldflags}
>  
> +# Flags needed to compile and link against zlib
> +ZLIB_CFLAGS = ${zlib_cflags}
> +ZLIB_LDFLAGS = ${zlib_ldflags}
> +
>  # Flags needed to compile and link against talloc
>  TALLOC_CFLAGS = ${talloc_cflags}
>  TALLOC_LDFLAGS = ${talloc_ldflags}
> @@ -882,6 +903,7 @@ CONFIGURE_CFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS)      \\
>  		   -DUTIL_BYTE_ORDER=\$(UTIL_BYTE_ORDER)
>  
>  CONFIGURE_CXXFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS)    \\
> +		     \$(ZLIB_CFLAGS)					 \\
>  		     \$(TALLOC_CFLAGS) -DHAVE_VALGRIND=\$(HAVE_VALGRIND) \\
>  		     \$(VALGRIND_CFLAGS) \$(XAPIAN_CXXFLAGS)             \\
>  		     -DHAVE_STRCASESTR=\$(HAVE_STRCASESTR)               \\
> @@ -892,5 +914,5 @@ CONFIGURE_CXXFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS)    \\
>  		     -DHAVE_XAPIAN_COMPACT=\$(HAVE_XAPIAN_COMPACT)       \\
>  		     -DUTIL_BYTE_ORDER=\$(UTIL_BYTE_ORDER)
>  
> -CONFIGURE_LDFLAGS =  \$(GMIME_LDFLAGS) \$(TALLOC_LDFLAGS) \$(XAPIAN_LDFLAGS)
> +CONFIGURE_LDFLAGS =  \$(GMIME_LDFLAGS) \$(TALLOC_LDFLAGS) \$(ZLIB_LDFLAGS) \$(XAPIAN_LDFLAGS)
>  EOF
> diff --git a/doc/man1/notmuch-dump.rst b/doc/man1/notmuch-dump.rst
> index 17d1da5..d94cb4f 100644
> --- a/doc/man1/notmuch-dump.rst
> +++ b/doc/man1/notmuch-dump.rst
> @@ -19,6 +19,9 @@ recreated from the messages themselves. The output of notmuch dump is
>  therefore the only critical thing to backup (and much more friendly to
>  incremental backup than the native database files.)
>  
> +``--gzip``
> +    Compress the output in a format compatible with **gzip(1)**.
> +
>  ``--format=(sup|batch-tag)``
>      Notmuch restore supports two plain text dump formats, both with one
>      message-id per line, followed by a list of tags.
> diff --git a/notmuch-client.h b/notmuch-client.h
> index d110648..e1efbe0 100644
> --- a/notmuch-client.h
> +++ b/notmuch-client.h
> @@ -450,7 +450,9 @@ typedef enum dump_formats {
>  int
>  notmuch_database_dump (notmuch_database_t *notmuch,
>  		       const char *output_file_name,
> -		       const char *query_str, dump_format_t output_format);
> +		       const char *query_str,
> +		       dump_format_t output_format,
> +		       notmuch_bool_t gzip_output);
>  
>  #include "command-line-arguments.h"
>  #endif
> diff --git a/notmuch-dump.c b/notmuch-dump.c
> index 21702d7..2a7252a 100644
> --- a/notmuch-dump.c
> +++ b/notmuch-dump.c
> @@ -21,9 +21,11 @@
>  #include "notmuch-client.h"
>  #include "hex-escape.h"
>  #include "string-util.h"
> +#include <zlib.h>
> +
>  
>  static int
> -database_dump_file (notmuch_database_t *notmuch, FILE *output,
> +database_dump_file (notmuch_database_t *notmuch, gzFile output,
>  		    const char *query_str, int output_format)
>  {
>      notmuch_query_t *query;
> @@ -69,7 +71,7 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
>  	}
>  
>  	if (output_format == DUMP_FORMAT_SUP) {
> -	    fprintf (output, "%s (", message_id);
> +	    gzprintf (output, "%s (", message_id);
>  	}
>  
>  	for (tags = notmuch_message_get_tags (message);
> @@ -78,12 +80,12 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
>  	    const char *tag_str = notmuch_tags_get (tags);
>  
>  	    if (! first)
> -		fputs (" ", output);
> +		gzputs (output, " ");
>  
>  	    first = 0;
>  
>  	    if (output_format == DUMP_FORMAT_SUP) {
> -		fputs (tag_str, output);
> +		gzputs (output, tag_str);
>  	    } else {
>  		if (hex_encode (notmuch, tag_str,
>  				&buffer, &buffer_size) != HEX_SUCCESS) {
> @@ -91,12 +93,12 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
>  			     tag_str);
>  		    return EXIT_FAILURE;
>  		}
> -		fprintf (output, "+%s", buffer);
> +		gzprintf (output, "+%s", buffer);
>  	    }
>  	}
>  
>  	if (output_format == DUMP_FORMAT_SUP) {
> -	    fputs (")\n", output);
> +	    gzputs (output, ")\n");
>  	} else {
>  	    if (make_boolean_term (notmuch, "id", message_id,
>  				   &buffer, &buffer_size)) {
> @@ -104,7 +106,7 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
>  			     message_id, strerror (errno));
>  		    return EXIT_FAILURE;
>  	    }
> -	    fprintf (output, " -- %s\n", buffer);
> +	    gzprintf (output, " -- %s\n", buffer);
>  	}
>  
>  	notmuch_message_destroy (message);
> @@ -121,24 +123,77 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
>  int
>  notmuch_database_dump (notmuch_database_t *notmuch,
>  		       const char *output_file_name,
> -		       const char *query_str, dump_format_t output_format)
> +		       const char *query_str,
> +		       dump_format_t output_format,
> +		       notmuch_bool_t gzip_output)
>  {
> -    FILE *output = stdout;
> -    int ret;
> +    gzFile output;
> +    const char *mode = gzip_output ? "w9" : "wT";
> +    const char *name_for_error = output_file_name ? output_file_name : "stdout";
> +
> +    char *tempname = NULL;
> +    int outfd = -1;
> +
> +    int ret = -1;
>  
>      if (output_file_name) {
> -	output = fopen (output_file_name, "w");
> -	if (output == NULL) {
> -	    fprintf (stderr, "Error opening %s for writing: %s\n",
> -		     output_file_name, strerror (errno));
> -	    return EXIT_FAILURE;
> -	}
> +	tempname = talloc_asprintf (notmuch, "%s.XXXXXX", output_file_name);
> +	outfd = mkstemp (tempname);
> +    } else {
> +	outfd = dup (STDOUT_FILENO);
> +    }
> +
> +    if (outfd < 0) {
> +	fprintf (stderr, "Bad output file %s\n", name_for_error);
> +	goto DONE;
> +    }
> +
> +    output = gzdopen (outfd, mode);
> +
> +    if (output == NULL) {
> +	fprintf (stderr, "Error opening %s for (gzip) writing: %s\n",
> +		 name_for_error, strerror (errno));
> +	if (close (outfd))
> +	    fprintf (stderr, "Error closing %s during shutdown: %s\n",
> +		 name_for_error, strerror (errno));
> +	goto DONE;
>      }
>  
>      ret = database_dump_file (notmuch, output, query_str, output_format);
> +    if (ret) goto DONE;
>  
> -    if (output != stdout)
> -	fclose (output);
> +    ret = gzflush (output, Z_FINISH);
> +    if (ret) {
> +	fprintf (stderr, "Error flushing output: %s\n", gzerror (output, NULL));
> +	goto DONE;
> +    }
> +
> +    if (output_file_name) {
> +	ret = fdatasync (outfd);
> +	if (ret) {
> +	    fprintf (stderr, "Error syncing %s to disk: %s\n",
> +		     name_for_error, strerror (errno));
> +	    goto DONE;
> +	}
> +    }
> +
> +    if (gzclose_w (output) != Z_OK) {
> +	ret = EXIT_FAILURE;

error message here, like fprintf (stderr, "Error closing output: %s\n", gzerror (output, NULL));

> +	goto DONE;
> +    }
> +
> +    if (output_file_name) {
> +	ret = rename (tempname, output_file_name);
> +	if (ret) {
> +	    fprintf (stderr, "Error renaming %s to %s: %s\n",
> +		     tempname, output_file_name, strerror (errno));
> +	    goto DONE;
> +	}
> +
> +    }
> + DONE:
> +    if (ret != EXIT_SUCCESS && output_file_name)
> +	(void) unlink (tempname);
>  
>      return ret;
>  }
> @@ -158,6 +213,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[])
>      int opt_index;
>  
>      int output_format = DUMP_FORMAT_BATCH_TAG;
> +    notmuch_bool_t gzip_output = 0;
>  
>      notmuch_opt_desc_t options[] = {
>  	{ NOTMUCH_OPT_KEYWORD, &output_format, "format", 'f',
> @@ -165,6 +221,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[])
>  				  { "batch-tag", DUMP_FORMAT_BATCH_TAG },
>  				  { 0, 0 } } },
>  	{ NOTMUCH_OPT_STRING, &output_file_name, "output", 'o', 0  },
> +	{ NOTMUCH_OPT_BOOLEAN, &gzip_output, "gzip", 'z', 0 },
>  	{ 0, 0, 0, 0, 0 }
>      };
>  
> @@ -181,7 +238,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[])
>      }
>  
>      ret = notmuch_database_dump (notmuch, output_file_name, query_str,
> -				 output_format);
> +				 output_format, gzip_output);
>  
>      notmuch_database_destroy (notmuch);
>  
> diff --git a/test/T240-dump-restore.sh b/test/T240-dump-restore.sh
> index 0004438..d79aca8 100755
> --- a/test/T240-dump-restore.sh
> +++ b/test/T240-dump-restore.sh
> @@ -68,6 +68,18 @@ test_begin_subtest "dump --output=outfile --"
>  notmuch dump --output=dump-1-arg-dash.actual --
>  test_expect_equal_file dump.expected dump-1-arg-dash.actual
>  
> +# gzipped output
> +
> +test_begin_subtest "dump --gzip"
> +notmuch dump --gzip > dump-gzip.gz
> +gunzip dump-gzip.gz
> +test_expect_equal_file dump.expected dump-gzip
> +
> +test_begin_subtest "dump --gzip --output=outfile"
> +notmuch dump --gzip --output=dump-gzip-outfile.gz
> +gunzip dump-gzip-outfile.gz
> +test_expect_equal_file dump.expected dump-gzip-outfile
> +
>  # Note, we assume all messages from cworth have a message-id
>  # containing cworth.org
>  
> -- 
> 1.9.0

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Patch v6 4/6] restore: transparently support gzipped input
  2014-04-03 19:41 ` [Patch v6 4/6] restore: transparently support gzipped input David Bremner
@ 2014-04-04 21:56   ` Austin Clements
  2014-04-04 22:10   ` Austin Clements
  1 sibling, 0 replies; 12+ messages in thread
From: Austin Clements @ 2014-04-04 21:56 UTC (permalink / raw)
  To: David Bremner; +Cc: notmuch

Quoth David Bremner on Apr 03 at  4:41 pm:
> We rely completely on zlib to do the right thing in detecting gzipped
> input. Since our dump format is chosen to be 7 bit ascii, this should
> be fine.
> ---
>  doc/man1/notmuch-restore.rst |  8 ++++++++
>  notmuch-restore.c            | 41 ++++++++++++++++++++++++++---------------
>  test/T240-dump-restore.sh    | 14 ++++++++++++++
>  3 files changed, 48 insertions(+), 15 deletions(-)
> 
> diff --git a/doc/man1/notmuch-restore.rst b/doc/man1/notmuch-restore.rst
> index d6cf19a..936b138 100644
> --- a/doc/man1/notmuch-restore.rst
> +++ b/doc/man1/notmuch-restore.rst
> @@ -50,6 +50,14 @@ Supported options for **restore** include
>              format, this heuristic, based the fact that batch-tag format
>              contains no parentheses, should be accurate.
>  
> +GZIPPED INPUT
> +=============
> +
> +\ **notmuch restore** will detect if the input is compressed in
> +**gzip(1)** format and automatically decompress it while reading. This
> +detection does not depend on file naming and in particular works for
> +standard input.
> +
>  SEE ALSO
>  ========
>  
> diff --git a/notmuch-restore.c b/notmuch-restore.c
> index c54d513..eb5b7b2 100644
> --- a/notmuch-restore.c
> +++ b/notmuch-restore.c
> @@ -22,6 +22,7 @@
>  #include "hex-escape.h"
>  #include "tag-util.h"
>  #include "string-util.h"
> +#include "zlib-extra.h"
>  
>  static regex_t regex;
>  
> @@ -128,10 +129,9 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
>      tag_op_list_t *tag_ops;
>  
>      char *input_file_name = NULL;
> -    FILE *input = stdin;
> +    gzFile input;
>      char *line = NULL;
>      void *line_ctx = NULL;
> -    size_t line_size;
>      ssize_t line_len;
>  
>      int ret = 0;
> @@ -163,13 +163,23 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
>      if (! accumulate)
>  	flags |= TAG_FLAG_REMOVE_ALL;
>  
> -    if (input_file_name) {
> -	input = fopen (input_file_name, "r");
> -	if (input == NULL) {
> -	    fprintf (stderr, "Error opening %s for reading: %s\n",
> -		     input_file_name, strerror (errno));
> +    if (input_file_name)
> +	input = gzopen (input_file_name, "r");
> +    else {
> +	int infd = dup (STDIN_FILENO);
> +	if (infd < 0) {
> +	    fprintf (stderr, "Error duping stdin\n");
>  	    return EXIT_FAILURE;
>  	}
> +	input = gzdopen (infd, "r");
> +	if (! input)
> +	    close (infd);
> +    }
> +
> +    if (input == NULL) {
> +	fprintf (stderr, "Error opening %s for (gzip) reading: %s\n",
> +		 input_file_name ? input_file_name : "stdin", strerror (errno));

There's a sketchy line about errno in the gz(d)open docs: "On error,
gzopen() *may* set the global variable errno to indicate the error."
(emphasis mine).  This suggests we should set errno to 0 before the
calls to gz(d)open above.

> +	return EXIT_FAILURE;
>      }
>  
>      if (opt_index < argc) {
> @@ -184,12 +194,17 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
>      }
>  
>      do {
> -	line_len = getline (&line, &line_size, input);
> +	util_status_t status;
> +
> +	status = gz_getline (line_ctx, &line, &line_len, input);
>  
>  	/* empty input file not considered an error */
> -	if (line_len < 0)
> +	if (status == UTIL_EOF)
>  	    return EXIT_SUCCESS;
>  
> +	if (status)

Will this lead to a silent exit failure if there's a problem with
decompression?  This suggests we should have a UTIL_GZERROR that tells
the caller to consult gzerror for the error message.  (Though this is
still an improvement over the original code, which would silently
succeed when getline failed!)

> +	    return EXIT_FAILURE;
> +
>      } while ((line_len == 0) ||
>  	     (line[0] == '#') ||
>  	     /* the cast is safe because we checked about for line_len < 0 */
> @@ -254,7 +269,7 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
>  	if (ret)
>  	    break;
>  
> -    }  while ((line_len = getline (&line, &line_size, input)) != -1);
> +    }  while (gz_getline (line_ctx, &line, &line_len, input) == UTIL_SUCCESS);

It looks like a gz_getline error here will cause restore to stop and
claim that the restore was successful.  (The original code has this
problem, too.)

>  
>      if (line_ctx != NULL)
>  	talloc_free (line_ctx);
> @@ -262,13 +277,9 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
>      if (input_format == DUMP_FORMAT_SUP)
>  	regfree (&regex);
>  
> -    if (line)
> -	free (line);
> -
>      notmuch_database_destroy (notmuch);
>  
> -    if (input != stdin)
> -	fclose (input);
> +    gzclose_r (input);
>  
>      return ret ? EXIT_FAILURE : EXIT_SUCCESS;
>  }
> diff --git a/test/T240-dump-restore.sh b/test/T240-dump-restore.sh
> index b6d8602..efe463e 100755
> --- a/test/T240-dump-restore.sh
> +++ b/test/T240-dump-restore.sh
> @@ -80,6 +80,20 @@ notmuch dump --gzip --output=dump-gzip-outfile.gz
>  gunzip dump-gzip-outfile.gz
>  test_expect_equal_file dump.expected dump-gzip-outfile
>  
> +test_begin_subtest "restoring gzipped stdin"
> +notmuch dump --gzip --output=backup.gz
> +notmuch tag +new_tag '*'
> +notmuch restore < backup.gz
> +notmuch dump --output=dump.actual
> +test_expect_equal_file dump.expected dump.actual
> +
> +test_begin_subtest "restoring gzipped file"
> +notmuch dump --gzip --output=backup.gz
> +notmuch tag +new_tag '*'
> +notmuch restore --input=backup.gz
> +notmuch dump --output=dump.actual
> +test_expect_equal_file dump.expected dump.actual
> +
>  # Note, we assume all messages from cworth have a message-id
>  # containing cworth.org
>  

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Patch v6 1/6] dump: support gzipped and atomic output
  2014-04-03 19:41 ` [Patch v6 1/6] dump: support gzipped and atomic output David Bremner
  2014-04-04  2:32   ` David Bremner
  2014-04-04 10:51   ` Tomi Ollila
@ 2014-04-04 22:05   ` Austin Clements
  2 siblings, 0 replies; 12+ messages in thread
From: Austin Clements @ 2014-04-04 22:05 UTC (permalink / raw)
  To: David Bremner; +Cc: notmuch

Quoth David Bremner on Apr 03 at  4:41 pm:
> The main goal is to support gzipped output for future internal
> calls (e.g. from notmuch-new) to notmuch_database_dump.
> 
> The additional dependency is not very heavy since xapian already pulls
> in zlib.
> 
> We want the dump to be "atomic", in the sense that after running the
> dump file is either present and complete, or not present.  This avoids
> certain classes of mishaps involving overwriting a good backup with a
> bad or partial one.
> ---
>  INSTALL                   | 20 ++++++++--
>  Makefile.local            |  2 +-
>  configure                 | 28 ++++++++++++--
>  doc/man1/notmuch-dump.rst |  3 ++
>  notmuch-client.h          |  4 +-
>  notmuch-dump.c            | 95 +++++++++++++++++++++++++++++++++++++----------
>  test/T240-dump-restore.sh | 12 ++++++
>  7 files changed, 136 insertions(+), 28 deletions(-)
> 
> diff --git a/INSTALL b/INSTALL
> index 690b0ef..b543c50 100644
> --- a/INSTALL
> +++ b/INSTALL
> @@ -20,8 +20,8 @@ configure stage.
>  
>  Dependencies
>  ------------
> -Notmuch depends on three libraries: Xapian, GMime 2.4 or 2.6, and
> -Talloc which are each described below:
> +Notmuch depends on four libraries: Xapian, GMime 2.4 or 2.6,
> +Talloc, and zlib which are each described below:
>  
>  	Xapian
>  	------
> @@ -60,6 +60,18 @@ Talloc which are each described below:
>  
>  	Talloc is available from http://talloc.samba.org/
>  
> +	zlib
> +	----
> +
> +	zlib is an extremely popular compression library. It is used
> +	by Xapian, so if you installed that you will already have
> +	zlib. You may need to install the zlib headers separately.
> +
> +	Notmuch needs the transparent write feature of zlib introduced
> +	in version 1.2.5.2 (Dec. 2011).
> +
> +	zlib is available from http://zlib.net
> +
>  Building Documentation
>  ----------------------
>  
> @@ -79,11 +91,11 @@ dependencies with a simple simple command line. For example:
>  
>    For Debian and similar:
>  
> -        sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev python-sphinx
> +        sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev zlib1g-dev python-sphinx
>  
>    For Fedora and similar:
>  
> -	sudo yum install xapian-core-devel gmime-devel libtalloc-devel python-sphinx
> +	sudo yum install xapian-core-devel gmime-devel libtalloc-devel zlib-devel python-sphinx
>  
>  On other systems, a similar command can be used, but the details of
>  the package names may be different.
> diff --git a/Makefile.local b/Makefile.local
> index cb7b106..e5a20a7 100644
> --- a/Makefile.local
> +++ b/Makefile.local
> @@ -41,7 +41,7 @@ PV_FILE=bindings/python/notmuch/version.py
>  # Smash together user's values with our extra values
>  FINAL_CFLAGS = -DNOTMUCH_VERSION=$(VERSION) $(CPPFLAGS) $(CFLAGS) $(WARN_CFLAGS) $(extra_cflags) $(CONFIGURE_CFLAGS)
>  FINAL_CXXFLAGS = $(CPPFLAGS) $(CXXFLAGS) $(WARN_CXXFLAGS) $(extra_cflags) $(extra_cxxflags) $(CONFIGURE_CXXFLAGS)
> -FINAL_NOTMUCH_LDFLAGS = $(LDFLAGS) -Lutil -lutil -Llib -lnotmuch $(AS_NEEDED_LDFLAGS) $(GMIME_LDFLAGS) $(TALLOC_LDFLAGS)
> +FINAL_NOTMUCH_LDFLAGS = $(LDFLAGS) -Lutil -lutil -Llib -lnotmuch $(AS_NEEDED_LDFLAGS) $(GMIME_LDFLAGS) $(TALLOC_LDFLAGS) $(ZLIB_LDFLAGS)
>  FINAL_NOTMUCH_LINKER = CC
>  ifneq ($(LINKER_RESOLVES_LIBRARY_DEPENDENCIES),1)
>  FINAL_NOTMUCH_LDFLAGS += $(CONFIGURE_LDFLAGS)
> diff --git a/configure b/configure
> index 1d430b9..1d624f7 100755
> --- a/configure
> +++ b/configure
> @@ -340,6 +340,18 @@ else
>      errors=$((errors + 1))
>  fi
>  
> +printf "Checking for zlib (>= 1.2.5.2)... "
> +have_zlib=0
> +if pkg-config --atleast-version=1.2.5.2 zlib; then
> +    printf "Yes.\n"
> +    have_zlib=1
> +    zlib_cflags=$(pkg-config --cflags zlib)
> +    zlib_ldflags=$(pkg-config --libs zlib)
> +else
> +    printf "No.\n"
> +    errors=$((errors + 1))
> +fi
> +
>  printf "Checking for talloc development files... "
>  if pkg-config --exists talloc; then
>      printf "Yes.\n"
> @@ -496,6 +508,11 @@ EOF
>  	echo "	Xapian library (including development files such as headers)"
>  	echo "	http://xapian.org/"
>      fi
> +    if [ $have_zlib -eq 0 ]; then
> +	echo "	zlib library (including development files such as headers)"
> +	echo "	http://zlib.net/"
> +	echo
> +    fi
>      if [ $have_gmime -eq 0 ]; then
>  	echo "	Either GMime 2.4 library" $GMIME_24_VERSION_CTR "or GMime 2.6 library" $GMIME_26_VERSION_CTR
>  	echo "	(including development files such as headers)"
> @@ -519,11 +536,11 @@ case a simple command will install everything you need. For example:
>  
>  On Debian and similar systems:
>  
> -	sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev
> +	sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev zlib1g-dev
>  
>  Or on Fedora and similar systems:
>  
> -	sudo yum install xapian-core-devel gmime-devel libtalloc-devel
> +	sudo yum install xapian-core-devel gmime-devel libtalloc-devel zlib-devel
>  
>  On other systems, similar commands can be used, but the details of the
>  package names may be different.
> @@ -844,6 +861,10 @@ XAPIAN_LDFLAGS = ${xapian_ldflags}
>  GMIME_CFLAGS = ${gmime_cflags}
>  GMIME_LDFLAGS = ${gmime_ldflags}
>  
> +# Flags needed to compile and link against zlib
> +ZLIB_CFLAGS = ${zlib_cflags}
> +ZLIB_LDFLAGS = ${zlib_ldflags}
> +
>  # Flags needed to compile and link against talloc
>  TALLOC_CFLAGS = ${talloc_cflags}
>  TALLOC_LDFLAGS = ${talloc_ldflags}
> @@ -882,6 +903,7 @@ CONFIGURE_CFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS)      \\
>  		   -DUTIL_BYTE_ORDER=\$(UTIL_BYTE_ORDER)
>  
>  CONFIGURE_CXXFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS)    \\
> +		     \$(ZLIB_CFLAGS)					 \\
>  		     \$(TALLOC_CFLAGS) -DHAVE_VALGRIND=\$(HAVE_VALGRIND) \\
>  		     \$(VALGRIND_CFLAGS) \$(XAPIAN_CXXFLAGS)             \\
>  		     -DHAVE_STRCASESTR=\$(HAVE_STRCASESTR)               \\
> @@ -892,5 +914,5 @@ CONFIGURE_CXXFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS)    \\
>  		     -DHAVE_XAPIAN_COMPACT=\$(HAVE_XAPIAN_COMPACT)       \\
>  		     -DUTIL_BYTE_ORDER=\$(UTIL_BYTE_ORDER)
>  
> -CONFIGURE_LDFLAGS =  \$(GMIME_LDFLAGS) \$(TALLOC_LDFLAGS) \$(XAPIAN_LDFLAGS)
> +CONFIGURE_LDFLAGS =  \$(GMIME_LDFLAGS) \$(TALLOC_LDFLAGS) \$(ZLIB_LDFLAGS) \$(XAPIAN_LDFLAGS)
>  EOF
> diff --git a/doc/man1/notmuch-dump.rst b/doc/man1/notmuch-dump.rst
> index 17d1da5..d94cb4f 100644
> --- a/doc/man1/notmuch-dump.rst
> +++ b/doc/man1/notmuch-dump.rst
> @@ -19,6 +19,9 @@ recreated from the messages themselves. The output of notmuch dump is
>  therefore the only critical thing to backup (and much more friendly to
>  incremental backup than the native database files.)
>  
> +``--gzip``
> +    Compress the output in a format compatible with **gzip(1)**.
> +
>  ``--format=(sup|batch-tag)``
>      Notmuch restore supports two plain text dump formats, both with one
>      message-id per line, followed by a list of tags.
> diff --git a/notmuch-client.h b/notmuch-client.h
> index d110648..e1efbe0 100644
> --- a/notmuch-client.h
> +++ b/notmuch-client.h
> @@ -450,7 +450,9 @@ typedef enum dump_formats {
>  int
>  notmuch_database_dump (notmuch_database_t *notmuch,
>  		       const char *output_file_name,
> -		       const char *query_str, dump_format_t output_format);
> +		       const char *query_str,
> +		       dump_format_t output_format,
> +		       notmuch_bool_t gzip_output);
>  
>  #include "command-line-arguments.h"
>  #endif
> diff --git a/notmuch-dump.c b/notmuch-dump.c
> index 21702d7..2a7252a 100644
> --- a/notmuch-dump.c
> +++ b/notmuch-dump.c
> @@ -21,9 +21,11 @@
>  #include "notmuch-client.h"
>  #include "hex-escape.h"
>  #include "string-util.h"
> +#include <zlib.h>
> +
>  
>  static int
> -database_dump_file (notmuch_database_t *notmuch, FILE *output,
> +database_dump_file (notmuch_database_t *notmuch, gzFile output,
>  		    const char *query_str, int output_format)
>  {
>      notmuch_query_t *query;
> @@ -69,7 +71,7 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
>  	}
>  
>  	if (output_format == DUMP_FORMAT_SUP) {
> -	    fprintf (output, "%s (", message_id);
> +	    gzprintf (output, "%s (", message_id);
>  	}
>  
>  	for (tags = notmuch_message_get_tags (message);
> @@ -78,12 +80,12 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
>  	    const char *tag_str = notmuch_tags_get (tags);
>  
>  	    if (! first)
> -		fputs (" ", output);
> +		gzputs (output, " ");
>  
>  	    first = 0;
>  
>  	    if (output_format == DUMP_FORMAT_SUP) {
> -		fputs (tag_str, output);
> +		gzputs (output, tag_str);
>  	    } else {
>  		if (hex_encode (notmuch, tag_str,
>  				&buffer, &buffer_size) != HEX_SUCCESS) {
> @@ -91,12 +93,12 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
>  			     tag_str);
>  		    return EXIT_FAILURE;
>  		}
> -		fprintf (output, "+%s", buffer);
> +		gzprintf (output, "+%s", buffer);
>  	    }
>  	}
>  
>  	if (output_format == DUMP_FORMAT_SUP) {
> -	    fputs (")\n", output);
> +	    gzputs (output, ")\n");
>  	} else {
>  	    if (make_boolean_term (notmuch, "id", message_id,
>  				   &buffer, &buffer_size)) {
> @@ -104,7 +106,7 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
>  			     message_id, strerror (errno));
>  		    return EXIT_FAILURE;
>  	    }
> -	    fprintf (output, " -- %s\n", buffer);
> +	    gzprintf (output, " -- %s\n", buffer);
>  	}
>  
>  	notmuch_message_destroy (message);
> @@ -121,24 +123,77 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
>  int
>  notmuch_database_dump (notmuch_database_t *notmuch,
>  		       const char *output_file_name,
> -		       const char *query_str, dump_format_t output_format)
> +		       const char *query_str,
> +		       dump_format_t output_format,
> +		       notmuch_bool_t gzip_output)
>  {
> -    FILE *output = stdout;
> -    int ret;
> +    gzFile output;

This should to gzclose_w output on all error paths.  The easiest way
to do that it probably to
  gzFile output = NULL;
here and ...

> +    const char *mode = gzip_output ? "w9" : "wT";
> +    const char *name_for_error = output_file_name ? output_file_name : "stdout";
> +
> +    char *tempname = NULL;
> +    int outfd = -1;
> +
> +    int ret = -1;
>  
>      if (output_file_name) {
> -	output = fopen (output_file_name, "w");
> -	if (output == NULL) {
> -	    fprintf (stderr, "Error opening %s for writing: %s\n",
> -		     output_file_name, strerror (errno));
> -	    return EXIT_FAILURE;
> -	}
> +	tempname = talloc_asprintf (notmuch, "%s.XXXXXX", output_file_name);
> +	outfd = mkstemp (tempname);
> +    } else {
> +	outfd = dup (STDOUT_FILENO);
> +    }
> +
> +    if (outfd < 0) {
> +	fprintf (stderr, "Bad output file %s\n", name_for_error);
> +	goto DONE;
> +    }
> +
> +    output = gzdopen (outfd, mode);
> +
> +    if (output == NULL) {
> +	fprintf (stderr, "Error opening %s for (gzip) writing: %s\n",
> +		 name_for_error, strerror (errno));
> +	if (close (outfd))
> +	    fprintf (stderr, "Error closing %s during shutdown: %s\n",
> +		 name_for_error, strerror (errno));
> +	goto DONE;
>      }
>  
>      ret = database_dump_file (notmuch, output, query_str, output_format);
> +    if (ret) goto DONE;
>  
> -    if (output != stdout)
> -	fclose (output);
> +    ret = gzflush (output, Z_FINISH);
> +    if (ret) {
> +	fprintf (stderr, "Error flushing output: %s\n", gzerror (output, NULL));
> +	goto DONE;
> +    }
> +
> +    if (output_file_name) {
> +	ret = fdatasync (outfd);
> +	if (ret) {
> +	    fprintf (stderr, "Error syncing %s to disk: %s\n",
> +		     name_for_error, strerror (errno));
> +	    goto DONE;
> +	}
> +    }
> +
> +    if (gzclose_w (output) != Z_OK) {
> +	ret = EXIT_FAILURE;
> +	goto DONE;
> +    }

... output = NULL; here, and ...

> +
> +    if (output_file_name) {
> +	ret = rename (tempname, output_file_name);
> +	if (ret) {
> +	    fprintf (stderr, "Error renaming %s to %s: %s\n",
> +		     tempname, output_file_name, strerror (errno));
> +	    goto DONE;
> +	}
> +
> +    }
> + DONE:

...
if (output)
  gzclose_w (output);
here.

> +    if (ret != EXIT_SUCCESS && output_file_name)
> +	(void) unlink (tempname);
>  
>      return ret;
>  }
> @@ -158,6 +213,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[])
>      int opt_index;
>  
>      int output_format = DUMP_FORMAT_BATCH_TAG;
> +    notmuch_bool_t gzip_output = 0;
>  
>      notmuch_opt_desc_t options[] = {
>  	{ NOTMUCH_OPT_KEYWORD, &output_format, "format", 'f',
> @@ -165,6 +221,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[])
>  				  { "batch-tag", DUMP_FORMAT_BATCH_TAG },
>  				  { 0, 0 } } },
>  	{ NOTMUCH_OPT_STRING, &output_file_name, "output", 'o', 0  },
> +	{ NOTMUCH_OPT_BOOLEAN, &gzip_output, "gzip", 'z', 0 },
>  	{ 0, 0, 0, 0, 0 }
>      };
>  
> @@ -181,7 +238,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[])
>      }
>  
>      ret = notmuch_database_dump (notmuch, output_file_name, query_str,
> -				 output_format);
> +				 output_format, gzip_output);
>  
>      notmuch_database_destroy (notmuch);
>  
> diff --git a/test/T240-dump-restore.sh b/test/T240-dump-restore.sh
> index 0004438..d79aca8 100755
> --- a/test/T240-dump-restore.sh
> +++ b/test/T240-dump-restore.sh
> @@ -68,6 +68,18 @@ test_begin_subtest "dump --output=outfile --"
>  notmuch dump --output=dump-1-arg-dash.actual --
>  test_expect_equal_file dump.expected dump-1-arg-dash.actual
>  
> +# gzipped output
> +
> +test_begin_subtest "dump --gzip"
> +notmuch dump --gzip > dump-gzip.gz
> +gunzip dump-gzip.gz
> +test_expect_equal_file dump.expected dump-gzip
> +
> +test_begin_subtest "dump --gzip --output=outfile"
> +notmuch dump --gzip --output=dump-gzip-outfile.gz
> +gunzip dump-gzip-outfile.gz
> +test_expect_equal_file dump.expected dump-gzip-outfile
> +
>  # Note, we assume all messages from cworth have a message-id
>  # containing cworth.org
>  

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Patch v6 4/6] restore: transparently support gzipped input
  2014-04-03 19:41 ` [Patch v6 4/6] restore: transparently support gzipped input David Bremner
  2014-04-04 21:56   ` Austin Clements
@ 2014-04-04 22:10   ` Austin Clements
  1 sibling, 0 replies; 12+ messages in thread
From: Austin Clements @ 2014-04-04 22:10 UTC (permalink / raw)
  To: David Bremner; +Cc: notmuch

Quoth David Bremner on Apr 03 at  4:41 pm:
> We rely completely on zlib to do the right thing in detecting gzipped
> input. Since our dump format is chosen to be 7 bit ascii, this should
> be fine.
> ---
>  doc/man1/notmuch-restore.rst |  8 ++++++++
>  notmuch-restore.c            | 41 ++++++++++++++++++++++++++---------------
>  test/T240-dump-restore.sh    | 14 ++++++++++++++
>  3 files changed, 48 insertions(+), 15 deletions(-)
> 
> diff --git a/doc/man1/notmuch-restore.rst b/doc/man1/notmuch-restore.rst
> index d6cf19a..936b138 100644
> --- a/doc/man1/notmuch-restore.rst
> +++ b/doc/man1/notmuch-restore.rst
> @@ -50,6 +50,14 @@ Supported options for **restore** include
>              format, this heuristic, based the fact that batch-tag format
>              contains no parentheses, should be accurate.
>  
> +GZIPPED INPUT
> +=============
> +
> +\ **notmuch restore** will detect if the input is compressed in
> +**gzip(1)** format and automatically decompress it while reading. This
> +detection does not depend on file naming and in particular works for
> +standard input.
> +
>  SEE ALSO
>  ========
>  
> diff --git a/notmuch-restore.c b/notmuch-restore.c
> index c54d513..eb5b7b2 100644
> --- a/notmuch-restore.c
> +++ b/notmuch-restore.c
> @@ -22,6 +22,7 @@
>  #include "hex-escape.h"
>  #include "tag-util.h"
>  #include "string-util.h"
> +#include "zlib-extra.h"
>  
>  static regex_t regex;
>  
> @@ -128,10 +129,9 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
>      tag_op_list_t *tag_ops;
>  
>      char *input_file_name = NULL;
> -    FILE *input = stdin;
> +    gzFile input;

I missed it on my first pass, but this also still leaks input on error
paths the way that patch 1 leaks output.  Though the old code does,
too, so maybe we're okay with assuming the OS will clean up everything
right after this function returns anyway?

>      char *line = NULL;
>      void *line_ctx = NULL;
> -    size_t line_size;
>      ssize_t line_len;
>  
>      int ret = 0;
> @@ -163,13 +163,23 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
>      if (! accumulate)
>  	flags |= TAG_FLAG_REMOVE_ALL;
>  
> -    if (input_file_name) {
> -	input = fopen (input_file_name, "r");
> -	if (input == NULL) {
> -	    fprintf (stderr, "Error opening %s for reading: %s\n",
> -		     input_file_name, strerror (errno));
> +    if (input_file_name)
> +	input = gzopen (input_file_name, "r");
> +    else {
> +	int infd = dup (STDIN_FILENO);
> +	if (infd < 0) {
> +	    fprintf (stderr, "Error duping stdin\n");
>  	    return EXIT_FAILURE;
>  	}
> +	input = gzdopen (infd, "r");
> +	if (! input)
> +	    close (infd);
> +    }
> +
> +    if (input == NULL) {
> +	fprintf (stderr, "Error opening %s for (gzip) reading: %s\n",
> +		 input_file_name ? input_file_name : "stdin", strerror (errno));
> +	return EXIT_FAILURE;
>      }
>  
>      if (opt_index < argc) {
> @@ -184,12 +194,17 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
>      }
>  
>      do {
> -	line_len = getline (&line, &line_size, input);
> +	util_status_t status;
> +
> +	status = gz_getline (line_ctx, &line, &line_len, input);
>  
>  	/* empty input file not considered an error */
> -	if (line_len < 0)
> +	if (status == UTIL_EOF)
>  	    return EXIT_SUCCESS;
>  
> +	if (status)
> +	    return EXIT_FAILURE;
> +
>      } while ((line_len == 0) ||
>  	     (line[0] == '#') ||
>  	     /* the cast is safe because we checked about for line_len < 0 */
> @@ -254,7 +269,7 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
>  	if (ret)
>  	    break;
>  
> -    }  while ((line_len = getline (&line, &line_size, input)) != -1);
> +    }  while (gz_getline (line_ctx, &line, &line_len, input) == UTIL_SUCCESS);
>  
>      if (line_ctx != NULL)
>  	talloc_free (line_ctx);
> @@ -262,13 +277,9 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
>      if (input_format == DUMP_FORMAT_SUP)
>  	regfree (&regex);
>  
> -    if (line)
> -	free (line);
> -
>      notmuch_database_destroy (notmuch);
>  
> -    if (input != stdin)
> -	fclose (input);
> +    gzclose_r (input);
>  
>      return ret ? EXIT_FAILURE : EXIT_SUCCESS;
>  }
> diff --git a/test/T240-dump-restore.sh b/test/T240-dump-restore.sh
> index b6d8602..efe463e 100755
> --- a/test/T240-dump-restore.sh
> +++ b/test/T240-dump-restore.sh
> @@ -80,6 +80,20 @@ notmuch dump --gzip --output=dump-gzip-outfile.gz
>  gunzip dump-gzip-outfile.gz
>  test_expect_equal_file dump.expected dump-gzip-outfile
>  
> +test_begin_subtest "restoring gzipped stdin"
> +notmuch dump --gzip --output=backup.gz
> +notmuch tag +new_tag '*'
> +notmuch restore < backup.gz
> +notmuch dump --output=dump.actual
> +test_expect_equal_file dump.expected dump.actual
> +
> +test_begin_subtest "restoring gzipped file"
> +notmuch dump --gzip --output=backup.gz
> +notmuch tag +new_tag '*'
> +notmuch restore --input=backup.gz
> +notmuch dump --output=dump.actual
> +test_expect_equal_file dump.expected dump.actual
> +
>  # Note, we assume all messages from cworth have a message-id
>  # containing cworth.org
>  

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2014-04-04 22:11 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-04-03 19:41 v6 gzipped dump/restore David Bremner
2014-04-03 19:41 ` [Patch v6 1/6] dump: support gzipped and atomic output David Bremner
2014-04-04  2:32   ` David Bremner
2014-04-04 10:51   ` Tomi Ollila
2014-04-04 22:05   ` Austin Clements
2014-04-03 19:41 ` [Patch v6 2/6] util: add gz_readline David Bremner
2014-04-03 19:41 ` [Patch v6 3/6] test: restore with missing final newline David Bremner
2014-04-03 19:41 ` [Patch v6 4/6] restore: transparently support gzipped input David Bremner
2014-04-04 21:56   ` Austin Clements
2014-04-04 22:10   ` Austin Clements
2014-04-03 19:41 ` [Patch v6 5/6] notmuch-new: backup tags before database upgrade David Bremner
2014-04-03 19:41 ` [Patch v6 6/6] test: verify tag backup generated by " David Bremner

Code repositories for project(s) associated with this public inbox

	https://yhetil.org/notmuch.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).