From: Austin Clements <amdragon@MIT.EDU>
To: David Bremner <david@tethera.net>
Cc: notmuch@notmuchmail.org
Subject: Re: [Patch v6 4/6] restore: transparently support gzipped input
Date: Fri, 4 Apr 2014 18:10:54 -0400 [thread overview]
Message-ID: <20140404221054.GC15472@mit.edu> (raw)
In-Reply-To: <1396554083-3892-5-git-send-email-david@tethera.net>
Quoth David Bremner on Apr 03 at 4:41 pm:
> We rely completely on zlib to do the right thing in detecting gzipped
> input. Since our dump format is chosen to be 7 bit ascii, this should
> be fine.
> ---
> doc/man1/notmuch-restore.rst | 8 ++++++++
> notmuch-restore.c | 41 ++++++++++++++++++++++++++---------------
> test/T240-dump-restore.sh | 14 ++++++++++++++
> 3 files changed, 48 insertions(+), 15 deletions(-)
>
> diff --git a/doc/man1/notmuch-restore.rst b/doc/man1/notmuch-restore.rst
> index d6cf19a..936b138 100644
> --- a/doc/man1/notmuch-restore.rst
> +++ b/doc/man1/notmuch-restore.rst
> @@ -50,6 +50,14 @@ Supported options for **restore** include
> format, this heuristic, based the fact that batch-tag format
> contains no parentheses, should be accurate.
>
> +GZIPPED INPUT
> +=============
> +
> +\ **notmuch restore** will detect if the input is compressed in
> +**gzip(1)** format and automatically decompress it while reading. This
> +detection does not depend on file naming and in particular works for
> +standard input.
> +
> SEE ALSO
> ========
>
> diff --git a/notmuch-restore.c b/notmuch-restore.c
> index c54d513..eb5b7b2 100644
> --- a/notmuch-restore.c
> +++ b/notmuch-restore.c
> @@ -22,6 +22,7 @@
> #include "hex-escape.h"
> #include "tag-util.h"
> #include "string-util.h"
> +#include "zlib-extra.h"
>
> static regex_t regex;
>
> @@ -128,10 +129,9 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
> tag_op_list_t *tag_ops;
>
> char *input_file_name = NULL;
> - FILE *input = stdin;
> + gzFile input;
I missed it on my first pass, but this also still leaks input on error
paths the way that patch 1 leaks output. Though the old code does,
too, so maybe we're okay with assuming the OS will clean up everything
right after this function returns anyway?
> char *line = NULL;
> void *line_ctx = NULL;
> - size_t line_size;
> ssize_t line_len;
>
> int ret = 0;
> @@ -163,13 +163,23 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
> if (! accumulate)
> flags |= TAG_FLAG_REMOVE_ALL;
>
> - if (input_file_name) {
> - input = fopen (input_file_name, "r");
> - if (input == NULL) {
> - fprintf (stderr, "Error opening %s for reading: %s\n",
> - input_file_name, strerror (errno));
> + if (input_file_name)
> + input = gzopen (input_file_name, "r");
> + else {
> + int infd = dup (STDIN_FILENO);
> + if (infd < 0) {
> + fprintf (stderr, "Error duping stdin\n");
> return EXIT_FAILURE;
> }
> + input = gzdopen (infd, "r");
> + if (! input)
> + close (infd);
> + }
> +
> + if (input == NULL) {
> + fprintf (stderr, "Error opening %s for (gzip) reading: %s\n",
> + input_file_name ? input_file_name : "stdin", strerror (errno));
> + return EXIT_FAILURE;
> }
>
> if (opt_index < argc) {
> @@ -184,12 +194,17 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
> }
>
> do {
> - line_len = getline (&line, &line_size, input);
> + util_status_t status;
> +
> + status = gz_getline (line_ctx, &line, &line_len, input);
>
> /* empty input file not considered an error */
> - if (line_len < 0)
> + if (status == UTIL_EOF)
> return EXIT_SUCCESS;
>
> + if (status)
> + return EXIT_FAILURE;
> +
> } while ((line_len == 0) ||
> (line[0] == '#') ||
> /* the cast is safe because we checked about for line_len < 0 */
> @@ -254,7 +269,7 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
> if (ret)
> break;
>
> - } while ((line_len = getline (&line, &line_size, input)) != -1);
> + } while (gz_getline (line_ctx, &line, &line_len, input) == UTIL_SUCCESS);
>
> if (line_ctx != NULL)
> talloc_free (line_ctx);
> @@ -262,13 +277,9 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
> if (input_format == DUMP_FORMAT_SUP)
> regfree (®ex);
>
> - if (line)
> - free (line);
> -
> notmuch_database_destroy (notmuch);
>
> - if (input != stdin)
> - fclose (input);
> + gzclose_r (input);
>
> return ret ? EXIT_FAILURE : EXIT_SUCCESS;
> }
> diff --git a/test/T240-dump-restore.sh b/test/T240-dump-restore.sh
> index b6d8602..efe463e 100755
> --- a/test/T240-dump-restore.sh
> +++ b/test/T240-dump-restore.sh
> @@ -80,6 +80,20 @@ notmuch dump --gzip --output=dump-gzip-outfile.gz
> gunzip dump-gzip-outfile.gz
> test_expect_equal_file dump.expected dump-gzip-outfile
>
> +test_begin_subtest "restoring gzipped stdin"
> +notmuch dump --gzip --output=backup.gz
> +notmuch tag +new_tag '*'
> +notmuch restore < backup.gz
> +notmuch dump --output=dump.actual
> +test_expect_equal_file dump.expected dump.actual
> +
> +test_begin_subtest "restoring gzipped file"
> +notmuch dump --gzip --output=backup.gz
> +notmuch tag +new_tag '*'
> +notmuch restore --input=backup.gz
> +notmuch dump --output=dump.actual
> +test_expect_equal_file dump.expected dump.actual
> +
> # Note, we assume all messages from cworth have a message-id
> # containing cworth.org
>
next prev parent reply other threads:[~2014-04-04 22:11 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-04-03 19:41 v6 gzipped dump/restore David Bremner
2014-04-03 19:41 ` [Patch v6 1/6] dump: support gzipped and atomic output David Bremner
2014-04-04 2:32 ` David Bremner
2014-04-04 10:51 ` Tomi Ollila
2014-04-04 22:05 ` Austin Clements
2014-04-03 19:41 ` [Patch v6 2/6] util: add gz_readline David Bremner
2014-04-03 19:41 ` [Patch v6 3/6] test: restore with missing final newline David Bremner
2014-04-03 19:41 ` [Patch v6 4/6] restore: transparently support gzipped input David Bremner
2014-04-04 21:56 ` Austin Clements
2014-04-04 22:10 ` Austin Clements [this message]
2014-04-03 19:41 ` [Patch v6 5/6] notmuch-new: backup tags before database upgrade David Bremner
2014-04-03 19:41 ` [Patch v6 6/6] test: verify tag backup generated by " David Bremner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://notmuchmail.org/
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20140404221054.GC15472@mit.edu \
--to=amdragon@mit.edu \
--cc=david@tethera.net \
--cc=notmuch@notmuchmail.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://yhetil.org/notmuch.git/
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).