From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id A3A2D1F566 for ; Wed, 27 Sep 2023 06:02:49 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1695794569; bh=p12x0P07Gv03Qiv2GVMy+m+Ou20/TOYCFtbVVW7wnqw=; h=From:To:Subject:Date:In-Reply-To:References:From; b=EKui3JbDY8qSdo7G/sB5TnYKB5vbaipFUeMCCYv9nOvzpXVWOSDIR0O98KuQqQjiq rsVjQ29lP/yoF4duGIxJIKows5A239d6fw9BaZ/ZaqI2Q6ChoIX8IXwLiyc5kqsC+8 Oj2W8L3YFWmDVPQH7ahmI12Q7QXxvwF7r90q3Iyo= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 3/3] lei: don't gzip --rsyncable by default for mbox* Date: Wed, 27 Sep 2023 06:02:49 +0000 Message-ID: <20230927060249.706224-4-e@80x24.org> In-Reply-To: <20230927060249.706224-1-e@80x24.org> References: <20230927060249.706224-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Using and memoizing the usability of `--rsyncable' is unsafe since pigz (or GNU gzip) can be uninstalled and leave a user with a non-rsync-aware gzip implementation in the long-running daemon. So we stop passing --rsyncable by default to pigz/gzip and no longer attempt to check for it (since it was a TOCTTOU error, anyways). Specifying --rsyncable explicitly didn't work, either, and ended up passing `1' to the gzip/pigz argv :x Finally, we now test --rsyncable on the CLI by adding support for it in `lei convert' and testing it in t/lei-convert.t --- lib/PublicInbox/LEI.pm | 3 ++- lib/PublicInbox/LeiToMail.pm | 2 +- lib/PublicInbox/MboxReader.pm | 36 ++++++++--------------------------- t/lei-convert.t | 27 ++++++++++++++++++++++++-- 4 files changed, 36 insertions(+), 32 deletions(-) diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 2f8d7a96..beb0f897 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -275,7 +275,8 @@ our %CMD = ( # sorted in order of importance/use: qw(all:s mode=s), @net_opt, @c_opt ], 'convert' => [ 'LOCATION...|--stdin', 'one-time conversion from URL or filesystem to another format', - qw(stdin| in-format|F=s out-format|f=s output|mfolder|o=s lock=s@ kw!), + qw(stdin| in-format|F=s out-format|f=s output|mfolder|o=s lock=s@ kw! + rsyncable), @net_opt, @c_opt ], 'p2q' => [ 'LOCATION_OR_COMMIT...|--stdin', "use a patch to generate a query for `lei q --stdin'", diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index a2cd8650..2dddf00b 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -155,7 +155,7 @@ sub reap_compress { # awaitpid callback $lei->fail($?, "@$cmd failed") if $?; } -sub _post_augment_mbox { # open a compressor process from top-level process +sub _post_augment_mbox { # open a compressor process from top-level lei-daemon my ($self, $lei) = @_; my $zsfx = $self->{zsfx} or return; my $cmd = PublicInbox::MboxReader::zsfx2cmd($zsfx, undef, $lei); diff --git a/lib/PublicInbox/MboxReader.pm b/lib/PublicInbox/MboxReader.pm index beffabe8..e4209022 100644 --- a/lib/PublicInbox/MboxReader.pm +++ b/lib/PublicInbox/MboxReader.pm @@ -1,10 +1,10 @@ -# Copyright (C) 2020-2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ -# reader for mbox variants we support +# reader for mbox variants we support (and also sets up commands for writing) package PublicInbox::MboxReader; use strict; -use v5.10.1; +use v5.10.1; # check regexps before v5.12 use Data::Dumper; $Data::Dumper::Useqq = 1; # should've been the default, for bad data @@ -141,10 +141,9 @@ sub reads { # all of these support -c for stdout and -d for decompression, # mutt is commonly distributed with hooks for gz, bz2 and xz, at least -# { foo => '' } means "--foo" is passed to the command-line, -# otherwise { foo => '--bar' } passes "--bar" +# { foo => '' } means "--foo" is passed to the command-line my %zsfx2cmd = ( - gz => [ qw(GZIP pigz gzip) ], + gz => [ qw(GZIP pigz gzip), { rsyncable => '' } ], bz2 => [ 'bzip2', {} ], xz => [ 'xz', {} ], # don't add new entries here unless MUA support is widely available @@ -173,28 +172,9 @@ sub zsfx2cmd ($$$) { } $cmd[0] // die join(' or ', @info)." missing for .$zsfx"; - # not all gzip support --rsyncable, FreeBSD gzip doesn't even exit - # with an error code - if (!$decompress && $cmd[0] =~ m!/gzip\z! && !defined($cmd_opt)) { - pipe(my ($r, $w)) or die "pipe: $!"; - open my $null, '+>', '/dev/null' or die "open: $!"; - my $rdr = { 0 => $null, 1 => $null, 2 => $w }; - my $tst = [ $cmd[0], '--rsyncable' ]; - my $pid = PublicInbox::Spawn::spawn($tst, undef, $rdr); - close $w; - my $err = do { local $/; <$r> }; - waitpid($pid, 0) == $pid or die "BUG: waitpid: $!"; - $cmd_opt = $err ? {} : { rsyncable => '' }; - push(@$x, $cmd_opt); - } - for my $bool (keys %$cmd_opt) { - my $switch = $cmd_opt->{$bool} // next; - push @cmd, '--'.($switch || $bool); - } - for my $key (qw(rsyncable)) { # support compression level? - my $switch = $cmd_opt->{$key} // next; - my $val = $lei->{opt}->{$key} // next; - push @cmd, $switch, $val; + # only for --rsyncable. TODO: support compression level? + for my $key (keys %$cmd_opt) { + push @cmd, '--'.$key if $lei->{opt}->{$key}; } \@cmd; } diff --git a/t/lei-convert.t b/t/lei-convert.t index e1849ff7..115e7ed0 100644 --- a/t/lei-convert.t +++ b/t/lei-convert.t @@ -1,12 +1,13 @@ #!perl -w -# Copyright (C) 2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ -use strict; use v5.10.1; use PublicInbox::TestCommon; +use v5.12; use PublicInbox::TestCommon; use PublicInbox::MboxReader; use PublicInbox::MdirReader; use PublicInbox::NetReader; use PublicInbox::Eml; use IO::Uncompress::Gunzip; +use autodie qw(open); require_mods(qw(lei -imapd -nntpd Mail::IMAPClient Net::NNTP)); my ($tmpdir, $for_destroy) = tmpdir; my $sock = tcp_server; @@ -125,5 +126,27 @@ test_lei({ tmpdir => $tmpdir }, sub { like($md[0], qr/:2,S\z/, "`seen' flag set in Maildir"); lei_ok(qw(convert -o mboxrd:/dev/stdout), "$d/md2"); like($lei_out, qr/^Status: RO/sm, "`seen' flag preserved"); + + SKIP: { + my $ok; + for my $x (($ENV{GZIP}//''), qw(pigz gzip)) { + $x && `$x -h 2>&1` =~ /--rsyncable\b/s or next; + $ok = $x; + last; + } + skip 'pigz || gzip do not support --rsyncable' if !$ok; + lei_ok qw(convert --rsyncable), "mboxrd:$d/qp.gz", + '-o', "mboxcl2:$d/qp2.gz"; + undef $fh; # necessary to make IO::Uncompress::Gunzip happy + open $fh, '<', "$d/qp2.gz"; + $fh = IO::Uncompress::Gunzip->new($fh, MultiStream => 1); + my @tmp; + PublicInbox::MboxReader->mboxcl2($fh, sub { + my ($eml) = @_; + $eml->header_set($_) for qw(Content-Length Lines); + push @tmp, $eml; + }); + is_deeply(\@tmp, \@bar, 'read rsyncable-gzipped mboxcl2'); + } }); done_testing;