unofficial mirror of notmuch@notmuchmail.org
 help / color / mirror / code / Atom feed
From: David Bremner <david@tethera.net>
To: notmuch@notmuchmail.org
Subject: [PATCH] dump: support gzipped output
Date: Sat, 29 Mar 2014 10:46:25 -0300	[thread overview]
Message-ID: <1396100785-8744-1-git-send-email-david@tethera.net> (raw)
In-Reply-To: <1396056046-2247-1-git-send-email-david@tethera.net>

The main goal is to support gzipped output for future internal
calls (e.g. from notmuch-new) to notmuch_database_dump.

The additional dependency is not very heavy since xapian already pulls
in zlib.
---

I had a quick look at supporting gzipped input for restore; I think it
just requires an implementation of getline that uses gzgetc or gzgets;
the decompression can be completely transparent to the user based on
magic number.

 INSTALL                   | 17 ++++++++++----
 Makefile.local            |  2 +-
 configure                 | 23 ++++++++++++++++---
 doc/man1/notmuch-dump.rst |  3 +++
 notmuch-client.h          |  4 +++-
 notmuch-dump.c            | 57 ++++++++++++++++++++++++++++++-----------------
 test/T240-dump-restore.sh | 12 ++++++++++
 7 files changed, 89 insertions(+), 29 deletions(-)

diff --git a/INSTALL b/INSTALL
index 690b0ef..2754e52 100644
--- a/INSTALL
+++ b/INSTALL
@@ -20,8 +20,8 @@ configure stage.
 
 Dependencies
 ------------
-Notmuch depends on three libraries: Xapian, GMime 2.4 or 2.6, and
-Talloc which are each described below:
+Notmuch depends on four libraries: Xapian, GMime 2.4 or 2.6,
+Talloc, and zlib which are each described below:
 
 	Xapian
 	------
@@ -60,6 +60,15 @@ Talloc which are each described below:
 
 	Talloc is available from http://talloc.samba.org/
 
+	zlib
+	----
+
+	zlib is an extremely popular compression library. It is used
+	by Xapian, so if you installed that you will already have
+	zlib. You may need to install the zlib headers seperately.
+
+	zlib is available from http://zlib.net
+
 Building Documentation
 ----------------------
 
@@ -79,11 +88,11 @@ dependencies with a simple simple command line. For example:
 
   For Debian and similar:
 
-        sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev python-sphinx
+        sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev zlib1g-dev python-sphinx
 
   For Fedora and similar:
 
-	sudo yum install xapian-core-devel gmime-devel libtalloc-devel python-sphinx
+	sudo yum install xapian-core-devel gmime-devel libtalloc-devel zlib-devel python-sphinx
 
 On other systems, a similar command can be used, but the details of
 the package names may be different.
diff --git a/Makefile.local b/Makefile.local
index cb7b106..e5a20a7 100644
--- a/Makefile.local
+++ b/Makefile.local
@@ -41,7 +41,7 @@ PV_FILE=bindings/python/notmuch/version.py
 # Smash together user's values with our extra values
 FINAL_CFLAGS = -DNOTMUCH_VERSION=$(VERSION) $(CPPFLAGS) $(CFLAGS) $(WARN_CFLAGS) $(extra_cflags) $(CONFIGURE_CFLAGS)
 FINAL_CXXFLAGS = $(CPPFLAGS) $(CXXFLAGS) $(WARN_CXXFLAGS) $(extra_cflags) $(extra_cxxflags) $(CONFIGURE_CXXFLAGS)
-FINAL_NOTMUCH_LDFLAGS = $(LDFLAGS) -Lutil -lutil -Llib -lnotmuch $(AS_NEEDED_LDFLAGS) $(GMIME_LDFLAGS) $(TALLOC_LDFLAGS)
+FINAL_NOTMUCH_LDFLAGS = $(LDFLAGS) -Lutil -lutil -Llib -lnotmuch $(AS_NEEDED_LDFLAGS) $(GMIME_LDFLAGS) $(TALLOC_LDFLAGS) $(ZLIB_LDFLAGS)
 FINAL_NOTMUCH_LINKER = CC
 ifneq ($(LINKER_RESOLVES_LIBRARY_DEPENDENCIES),1)
 FINAL_NOTMUCH_LDFLAGS += $(CONFIGURE_LDFLAGS)
diff --git a/configure b/configure
index 1d430b9..89bb3f3 100755
--- a/configure
+++ b/configure
@@ -340,6 +340,18 @@ else
     errors=$((errors + 1))
 fi
 
+printf "Checking for zlib development files... "
+have_zlib=0
+if pkg-config --exists zlib; then
+    printf "Yes.\n"
+    have_zlib=1
+    zlib_cflags=$(pkg-config --cflags zlib)
+    zlib_ldflags=$(pkg-config --libs zlib)
+else
+    printf "No.\n"
+    errors=$((errors + 1))
+fi
+
 printf "Checking for talloc development files... "
 if pkg-config --exists talloc; then
     printf "Yes.\n"
@@ -519,11 +531,11 @@ case a simple command will install everything you need. For example:
 
 On Debian and similar systems:
 
-	sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev
+	sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev zlib1g-dev
 
 Or on Fedora and similar systems:
 
-	sudo yum install xapian-core-devel gmime-devel libtalloc-devel
+	sudo yum install xapian-core-devel gmime-devel libtalloc-devel zlib-devel
 
 On other systems, similar commands can be used, but the details of the
 package names may be different.
@@ -844,6 +856,10 @@ XAPIAN_LDFLAGS = ${xapian_ldflags}
 GMIME_CFLAGS = ${gmime_cflags}
 GMIME_LDFLAGS = ${gmime_ldflags}
 
+# Flags needed to compile and link against zlib
+ZLIB_CFLAGS = ${zlib_cflags}
+ZLIB_LDFLAGS = ${zlib_ldflags}
+
 # Flags needed to compile and link against talloc
 TALLOC_CFLAGS = ${talloc_cflags}
 TALLOC_LDFLAGS = ${talloc_ldflags}
@@ -882,6 +898,7 @@ CONFIGURE_CFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS)      \\
 		   -DUTIL_BYTE_ORDER=\$(UTIL_BYTE_ORDER)
 
 CONFIGURE_CXXFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS)    \\
+		     \$(ZLIB_CFLAGS)					 \\
 		     \$(TALLOC_CFLAGS) -DHAVE_VALGRIND=\$(HAVE_VALGRIND) \\
 		     \$(VALGRIND_CFLAGS) \$(XAPIAN_CXXFLAGS)             \\
 		     -DHAVE_STRCASESTR=\$(HAVE_STRCASESTR)               \\
@@ -892,5 +909,5 @@ CONFIGURE_CXXFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS)    \\
 		     -DHAVE_XAPIAN_COMPACT=\$(HAVE_XAPIAN_COMPACT)       \\
 		     -DUTIL_BYTE_ORDER=\$(UTIL_BYTE_ORDER)
 
-CONFIGURE_LDFLAGS =  \$(GMIME_LDFLAGS) \$(TALLOC_LDFLAGS) \$(XAPIAN_LDFLAGS)
+CONFIGURE_LDFLAGS =  \$(GMIME_LDFLAGS) \$(TALLOC_LDFLAGS) \$(ZLIB_LDFLAGS) \$(XAPIAN_LDFLAGS)
 EOF
diff --git a/doc/man1/notmuch-dump.rst b/doc/man1/notmuch-dump.rst
index 17d1da5..d94cb4f 100644
--- a/doc/man1/notmuch-dump.rst
+++ b/doc/man1/notmuch-dump.rst
@@ -19,6 +19,9 @@ recreated from the messages themselves. The output of notmuch dump is
 therefore the only critical thing to backup (and much more friendly to
 incremental backup than the native database files.)
 
+``--gzip``
+    Compress the output in a format compatible with **gzip(1)**.
+
 ``--format=(sup|batch-tag)``
     Notmuch restore supports two plain text dump formats, both with one
     message-id per line, followed by a list of tags.
diff --git a/notmuch-client.h b/notmuch-client.h
index d110648..e1efbe0 100644
--- a/notmuch-client.h
+++ b/notmuch-client.h
@@ -450,7 +450,9 @@ typedef enum dump_formats {
 int
 notmuch_database_dump (notmuch_database_t *notmuch,
 		       const char *output_file_name,
-		       const char *query_str, dump_format_t output_format);
+		       const char *query_str,
+		       dump_format_t output_format,
+		       notmuch_bool_t gzip_output);
 
 #include "command-line-arguments.h"
 #endif
diff --git a/notmuch-dump.c b/notmuch-dump.c
index 21702d7..128a37d 100644
--- a/notmuch-dump.c
+++ b/notmuch-dump.c
@@ -21,10 +21,12 @@
 #include "notmuch-client.h"
 #include "hex-escape.h"
 #include "string-util.h"
+#include <zlib.h>
+
 
 static int
-database_dump_file (notmuch_database_t *notmuch, FILE *output,
-		    const char *query_str, int output_format)
+database_dump_file (notmuch_database_t *notmuch, gzFile output,
+			const char *query_str, int output_format)
 {
     notmuch_query_t *query;
     notmuch_messages_t *messages;
@@ -69,7 +71,7 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
 	}
 
 	if (output_format == DUMP_FORMAT_SUP) {
-	    fprintf (output, "%s (", message_id);
+	    gzprintf (output, "%s (", message_id);
 	}
 
 	for (tags = notmuch_message_get_tags (message);
@@ -78,12 +80,12 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
 	    const char *tag_str = notmuch_tags_get (tags);
 
 	    if (! first)
-		fputs (" ", output);
+		gzputs (output, " ");
 
 	    first = 0;
 
 	    if (output_format == DUMP_FORMAT_SUP) {
-		fputs (tag_str, output);
+		gzputs (output, tag_str);
 	    } else {
 		if (hex_encode (notmuch, tag_str,
 				&buffer, &buffer_size) != HEX_SUCCESS) {
@@ -91,12 +93,12 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
 			     tag_str);
 		    return EXIT_FAILURE;
 		}
-		fprintf (output, "+%s", buffer);
+		gzprintf (output, "+%s", buffer);
 	    }
 	}
 
 	if (output_format == DUMP_FORMAT_SUP) {
-	    fputs (")\n", output);
+	    gzputs (output, ")\n");
 	} else {
 	    if (make_boolean_term (notmuch, "id", message_id,
 				   &buffer, &buffer_size)) {
@@ -104,7 +106,7 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
 			     message_id, strerror (errno));
 		    return EXIT_FAILURE;
 	    }
-	    fprintf (output, " -- %s\n", buffer);
+	    gzprintf (output, " -- %s\n", buffer);
 	}
 
 	notmuch_message_destroy (message);
@@ -121,24 +123,37 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
 int
 notmuch_database_dump (notmuch_database_t *notmuch,
 		       const char *output_file_name,
-		       const char *query_str, dump_format_t output_format)
+		       const char *query_str,
+		       dump_format_t output_format,
+		       notmuch_bool_t gzip_output)
 {
-    FILE *output = stdout;
+    gzFile output;
+    const char *mode = gzip_output ? "w9" : "wT";
+
     int ret;
 
-    if (output_file_name) {
-	output = fopen (output_file_name, "w");
-	if (output == NULL) {
-	    fprintf (stderr, "Error opening %s for writing: %s\n",
-		     output_file_name, strerror (errno));
-	    return EXIT_FAILURE;
-	}
+    if (output_file_name)
+	output = gzopen (output_file_name, mode);
+    else
+	output = gzdopen (fileno (stdout), mode);
+
+    if (output == NULL) {
+	fprintf (stderr, "Error opening %s for (gzip) writing: %s\n",
+		 output_file_name || "stdout", strerror (errno));
+	return EXIT_FAILURE;
     }
 
     ret = database_dump_file (notmuch, output, query_str, output_format);
 
-    if (output != stdout)
-	fclose (output);
+    /* unlike stdio, zlib needs explicit flushing */
+    if (gzflush (output, Z_FINISH)) {
+	fprintf (stderr, "Error flushing output: %s\n",
+		 gzerror (output, NULL));
+	return EXIT_FAILURE;
+    }
+
+    if (output_file_name)
+	gzclose_w (output);
 
     return ret;
 }
@@ -158,6 +173,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[])
     int opt_index;
 
     int output_format = DUMP_FORMAT_BATCH_TAG;
+    notmuch_bool_t gzip_output = 0;
 
     notmuch_opt_desc_t options[] = {
 	{ NOTMUCH_OPT_KEYWORD, &output_format, "format", 'f',
@@ -165,6 +181,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[])
 				  { "batch-tag", DUMP_FORMAT_BATCH_TAG },
 				  { 0, 0 } } },
 	{ NOTMUCH_OPT_STRING, &output_file_name, "output", 'o', 0  },
+	{ NOTMUCH_OPT_BOOLEAN, &gzip_output, "gzip", 'z', 0 },
 	{ 0, 0, 0, 0, 0 }
     };
 
@@ -181,7 +198,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[])
     }
 
     ret = notmuch_database_dump (notmuch, output_file_name, query_str,
-				 output_format);
+				 output_format, gzip_output);
 
     notmuch_database_destroy (notmuch);
 
diff --git a/test/T240-dump-restore.sh b/test/T240-dump-restore.sh
index 0004438..d79aca8 100755
--- a/test/T240-dump-restore.sh
+++ b/test/T240-dump-restore.sh
@@ -68,6 +68,18 @@ test_begin_subtest "dump --output=outfile --"
 notmuch dump --output=dump-1-arg-dash.actual --
 test_expect_equal_file dump.expected dump-1-arg-dash.actual
 
+# gzipped output
+
+test_begin_subtest "dump --gzip"
+notmuch dump --gzip > dump-gzip.gz
+gunzip dump-gzip.gz
+test_expect_equal_file dump.expected dump-gzip
+
+test_begin_subtest "dump --gzip --output=outfile"
+notmuch dump --gzip --output=dump-gzip-outfile.gz
+gunzip dump-gzip-outfile.gz
+test_expect_equal_file dump.expected dump-gzip-outfile
+
 # Note, we assume all messages from cworth have a message-id
 # containing cworth.org
 
-- 
1.9.0

  parent reply	other threads:[~2014-03-29 13:46 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-03-25 17:07 [PATCH 0/2] cli: notmuch dump abstractions Jani Nikula
2014-03-25 17:07 ` [PATCH 1/2] cli: abstract database dumping from the dump command Jani Nikula
2014-03-25 17:07 ` [PATCH 2/2] cli: abstract dump file open " Jani Nikula
2014-03-25 17:48   ` [PATCH v2] " Jani Nikula
2014-03-26 22:01 ` [PATCH 0/2] cli: notmuch dump abstractions Mark Walters
2014-03-29  1:20   ` [PATCH] RFC: impliment gzipped output for notmuch dump David Bremner
2014-03-29  7:16     ` Tomi Ollila
2014-03-29  9:25     ` Jani Nikula
2014-03-29 12:29       ` David Bremner
2014-03-29 13:02         ` Jani Nikula
2014-03-29 16:25           ` David Bremner
2014-03-29 13:46     ` David Bremner [this message]
2014-03-29 18:16       ` David Bremner
2014-03-29 18:16         ` [Patch v2 1/3] dump: support gzipped output David Bremner
2014-03-29 18:16         ` [Patch v2 2/3] util: add gzreadline David Bremner
2014-03-30  8:30           ` Tomi Ollila
2014-03-30 11:23             ` [Patch v3] " David Bremner
2014-03-30 12:45               ` Tomi Ollila
2014-03-30 14:37                 ` David Bremner
2014-03-30 16:13                   ` Tomi Ollila
2014-03-30 11:03           ` [Patch v2 2/3] " David Bremner
2014-03-29 18:16         ` [Patch v2 3/3] restore: transparently support gzipped input David Bremner
2014-03-30 22:33 ` [PATCH 0/2] cli: notmuch dump abstractions David Bremner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://notmuchmail.org/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1396100785-8744-1-git-send-email-david@tethera.net \
    --to=david@tethera.net \
    --cc=notmuch@notmuchmail.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://yhetil.org/notmuch.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).