unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH 0/3] more process management cleanups + bugfix
@ 2023-09-27  6:02 Eric Wong
  2023-09-27  6:02 ` [PATCH 1/3] convert: use ProcessPipe with popen_rd Eric Wong
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Eric Wong @ 2023-09-27  6:02 UTC (permalink / raw)
  To: meta

3/3 is a bugfix found while working on this series, but probably
nobody noticed it...  The code reductions in lib/ are nice.

Eric Wong (3):
  convert: use ProcessPipe with popen_rd
  spawn: add popen_wr support
  lei: don't gzip --rsyncable by default for mbox*

 lib/PublicInbox/LEI.pm         |  3 ++-
 lib/PublicInbox/LeiRediff.pm   | 25 ++++++++++-------------
 lib/PublicInbox/LeiToMail.pm   |  9 +++------
 lib/PublicInbox/MboxReader.pm  | 36 ++++++++--------------------------
 lib/PublicInbox/ProcessPipe.pm |  9 +++++++++
 lib/PublicInbox/Spawn.pm       | 20 +++++++++----------
 script/public-inbox-convert    |  6 ++----
 t/lei-convert.t                | 27 +++++++++++++++++++++++--
 8 files changed, 68 insertions(+), 67 deletions(-)

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/3] convert: use ProcessPipe with popen_rd
  2023-09-27  6:02 [PATCH 0/3] more process management cleanups + bugfix Eric Wong
@ 2023-09-27  6:02 ` Eric Wong
  2023-09-27  6:02 ` [PATCH 2/3] spawn: add popen_wr support Eric Wong
  2023-09-27  6:02 ` [PATCH 3/3] lei: don't gzip --rsyncable by default for mbox* Eric Wong
  2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2023-09-27  6:02 UTC (permalink / raw)
  To: meta

ProcessPipe->CLOSE will already run waitpid for us and
exit on errors, so we can do less, here.
---
 script/public-inbox-convert | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/script/public-inbox-convert b/script/public-inbox-convert
index 96931cbf..780f7194 100755
--- a/script/public-inbox-convert
+++ b/script/public-inbox-convert
@@ -117,7 +117,7 @@ $clone may not be valid after migrating to v2, not copying
 }
 my $state = '';
 my $head = $old->{ref_head} || 'HEAD';
-my ($rd, $pid) = $old->git->popen(qw(fast-export --use-done-feature), $head);
+my $rd = $old->git->popen(qw(fast-export --use-done-feature), $head);
 $v2w->idx_init($opt);
 my $im = $v2w->importer;
 my ($r, $w) = $im->gfi_start;
@@ -164,9 +164,7 @@ while (<$rd>) {
 	last if $_ eq "done\n";
 	print $w $_ or $im->wfail;
 }
-close $rd or die "close fast-export: $!\n";
-waitpid($pid, 0) or die "waitpid failed: $!\n";
-$? == 0 or die "fast-export failed: $?\n";
+close $rd or die "fast-export: \$?=$? \$!=$!\n";
 $r = $w = undef; # v2w->done does the actual close and error checking
 $v2w->done;
 if (my $old_mm = $old->mm) {

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/3] spawn: add popen_wr support
  2023-09-27  6:02 [PATCH 0/3] more process management cleanups + bugfix Eric Wong
  2023-09-27  6:02 ` [PATCH 1/3] convert: use ProcessPipe with popen_rd Eric Wong
@ 2023-09-27  6:02 ` Eric Wong
  2023-09-27  6:02 ` [PATCH 3/3] lei: don't gzip --rsyncable by default for mbox* Eric Wong
  2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2023-09-27  6:02 UTC (permalink / raw)
  To: meta

This makes interesting parts of our code easier to read IMHO.
We can take advantage of `local' while avoiding `fileno' calls
since it's called in spawn() anyways to reduce LoC even further.
---
 lib/PublicInbox/LeiRediff.pm   | 25 ++++++++++---------------
 lib/PublicInbox/LeiToMail.pm   |  7 ++-----
 lib/PublicInbox/ProcessPipe.pm |  9 +++++++++
 lib/PublicInbox/Spawn.pm       | 20 +++++++++-----------
 4 files changed, 30 insertions(+), 31 deletions(-)

diff --git a/lib/PublicInbox/LeiRediff.pm b/lib/PublicInbox/LeiRediff.pm
index 824289d6..efd24d17 100644
--- a/lib/PublicInbox/LeiRediff.pm
+++ b/lib/PublicInbox/LeiRediff.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # The "lei rediff" sub-command, regenerates diffs with new options
@@ -7,7 +7,7 @@ use strict;
 use v5.10.1;
 use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
 use File::Temp 0.19 (); # 0.19 for ->newdir
-use PublicInbox::Spawn qw(run_wait spawn which);
+use PublicInbox::Spawn qw(run_wait popen_wr which);
 use PublicInbox::MsgIter qw(msg_part_text);
 use PublicInbox::ViewDiff;
 use PublicInbox::LeiBlob;
@@ -121,15 +121,11 @@ EOM
 		close $fh or die "close $f: $!";
 		$rw = PublicInbox::Git->new($d);
 	}
-	pipe(my ($r, $w)) or die "pipe: $!";
-	my $pid = spawn(['git', "--git-dir=$rw->{git_dir}",
+	my $w = popen_wr(['git', "--git-dir=$rw->{git_dir}",
 			qw(fast-import --quiet --done --date-format=raw)],
-			$lei->{env}, { 2 => $lei->{2}, 0 => $r });
-	close $r or die "close r fast-import: $!";
+			$lei->{env}, { 2 => $lei->{2} });
 	print $w $ta, "\n", $tb, "\ndone\n" or die "print fast-import: $!";
-	close $w or die "close w fast-import: $!";
-	waitpid($pid, 0);
-	die "fast-import failed: \$?=$?" if $?;
+	close $w or die "close w fast-import: \$?=$? \$!=$!";
 
 	my $cmd = [ 'diff' ];
 	_lei_diff_prepare($lei, $cmd);
@@ -141,7 +137,7 @@ EOM
 	undef;
 }
 
-sub wait_requote ($$$) { # OnDestroy callback
+sub wait_requote { # OnDestroy callback
 	my ($lei, $pid, $old_1) = @_;
 	$lei->{1} = $old_1; # closes stdin of `perl -pE 's/^/> /'`
 	waitpid($pid, 0) == $pid or die "BUG(?) waitpid: \$!=$! \$?=$?";
@@ -150,13 +146,12 @@ sub wait_requote ($$$) { # OnDestroy callback
 
 sub requote ($$) {
 	my ($lei, $pfx) = @_;
-	pipe(my($r, $w)) or die "pipe: $!";
-	my $rdr = { 0 => $r, 1 => $lei->{1}, 2 => $lei->{2} };
-	# $^X (perl) is overkill, but maybe there's a weird system w/o sed
-	my $pid = spawn([$^X, '-pE', "s/^/$pfx/"], $lei->{env}, $rdr);
 	my $old_1 = $lei->{1};
+	my $opt = { 1 => $old_1, 2 => $lei->{2} };
+	# $^X (perl) is overkill, but maybe there's a weird system w/o sed
+	my ($w, $pid) = popen_wr([$^X, '-pE', "s/^/$pfx/"], $lei->{env}, $opt);
 	$w->autoflush(1);
-	binmode $w, ':utf8';
+	binmode $w, ':utf8'; # incompatible with ProcessPipe due to syswrite
 	$lei->{1} = $w;
 	PublicInbox::OnDestroy->new(\&wait_requote, $lei, $pid, $old_1);
 }
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 4adcc33e..a2cd8650 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -9,7 +9,6 @@ use parent qw(PublicInbox::IPC);
 use PublicInbox::Eml;
 use PublicInbox::ProcessPipe;
 use PublicInbox::Spawn qw(spawn);
-use Symbol qw(gensym);
 use IO::Handle; # ->autoflush
 use Fcntl qw(SEEK_SET SEEK_END O_CREAT O_EXCL O_WRONLY);
 use PublicInbox::Syscall qw(rename_noreplace);
@@ -163,10 +162,8 @@ sub _post_augment_mbox { # open a compressor process from top-level process
 	my ($r, $w) = @{delete $lei->{zpipe}};
 	my $rdr = { 0 => $r, 1 => $lei->{1}, 2 => $lei->{2}, pgid => 0 };
 	my $pid = spawn($cmd, undef, $rdr);
-	my $pp = gensym;
-	tie *$pp, 'PublicInbox::ProcessPipe', $pid, $w,
-			\&reap_compress, $lei, $cmd, $lei->{1};
-	$lei->{1} = $pp;
+	$lei->{1} = PublicInbox::ProcessPipe->maybe_new($pid, $w, {
+			cb_arg => [\&reap_compress, $lei, $cmd, $lei->{1} ] });
 }
 
 # --augment existing output destination, with deduplication
diff --git a/lib/PublicInbox/ProcessPipe.pm b/lib/PublicInbox/ProcessPipe.pm
index bbba75a2..16971801 100644
--- a/lib/PublicInbox/ProcessPipe.pm
+++ b/lib/PublicInbox/ProcessPipe.pm
@@ -8,6 +8,15 @@
 package PublicInbox::ProcessPipe;
 use v5.12;
 use PublicInbox::DS qw(awaitpid);
+use Symbol qw(gensym);
+
+sub maybe_new {
+	my ($cls, $pid, $fh, $opt) = @_;
+	return ($fh, $pid) if wantarray;
+	my $s = gensym;
+	tie *$s, $cls, $pid, $fh, @{$opt->{cb_arg} // []};
+	$s;
+}
 
 sub waitcb { # awaitpid callback
 	my ($pid, $err_ref, $cb, @args) = @_;
diff --git a/lib/PublicInbox/Spawn.pm b/lib/PublicInbox/Spawn.pm
index 38619fc2..75ef0137 100644
--- a/lib/PublicInbox/Spawn.pm
+++ b/lib/PublicInbox/Spawn.pm
@@ -17,12 +17,11 @@
 package PublicInbox::Spawn;
 use v5.12;
 use parent qw(Exporter);
-use Symbol qw(gensym);
 use Fcntl qw(LOCK_EX SEEK_SET);
 use IO::Handle ();
 use Carp qw(croak);
 use PublicInbox::ProcessPipe;
-our @EXPORT_OK = qw(which spawn popen_rd run_die run_wait);
+our @EXPORT_OK = qw(which spawn popen_rd popen_wr run_die run_wait);
 our @RLIMITS = qw(RLIMIT_CPU RLIMIT_CORE RLIMIT_DATA);
 
 BEGIN {
@@ -332,7 +331,6 @@ sub spawn ($;$$) {
 	my ($cmd, $env, $opts) = @_;
 	my $f = which($cmd->[0]) // die "$cmd->[0]: command not found\n";
 	my @env;
-	$opts ||= {};
 	my %env = (%ENV, $env ? %$env : ());
 	while (my ($k, $v) = each %env) {
 		push @env, "$k=$v" if defined($v);
@@ -366,14 +364,14 @@ sub spawn ($;$$) {
 
 sub popen_rd {
 	my ($cmd, $env, $opt) = @_;
-	pipe(my ($r, $w)) or die "pipe: $!\n";
-	$opt ||= {};
-	$opt->{1} = fileno($w);
-	my $pid = spawn($cmd, $env, $opt);
-	return ($r, $pid) if wantarray;
-	my $s = gensym;
-	tie *$s, 'PublicInbox::ProcessPipe', $pid, $r, @{$opt->{cb_arg} // []};
-	$s;
+	pipe(my $r, local $opt->{1}) or die "pipe: $!\n";
+	PublicInbox::ProcessPipe->maybe_new(spawn($cmd, $env, $opt), $r, $opt)
+}
+
+sub popen_wr {
+	my ($cmd, $env, $opt) = @_;
+	pipe(local $opt->{0}, my $w) or die "pipe: $!\n";
+	PublicInbox::ProcessPipe->maybe_new(spawn($cmd, $env, $opt), $w, $opt)
 }
 
 sub run_wait ($;$$) {

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 3/3] lei: don't gzip --rsyncable by default for mbox*
  2023-09-27  6:02 [PATCH 0/3] more process management cleanups + bugfix Eric Wong
  2023-09-27  6:02 ` [PATCH 1/3] convert: use ProcessPipe with popen_rd Eric Wong
  2023-09-27  6:02 ` [PATCH 2/3] spawn: add popen_wr support Eric Wong
@ 2023-09-27  6:02 ` Eric Wong
  2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2023-09-27  6:02 UTC (permalink / raw)
  To: meta

Using and memoizing the usability of `--rsyncable' is unsafe
since pigz (or GNU gzip) can be uninstalled and leave a user
with a non-rsync-aware gzip implementation in the long-running
daemon.  So we stop passing --rsyncable by default to pigz/gzip
and no longer attempt to check for it (since it was a TOCTTOU
error, anyways).

Specifying --rsyncable explicitly didn't work, either, and
ended up passing `1' to the gzip/pigz argv :x

Finally, we now test --rsyncable on the CLI by adding support
for it in `lei convert' and testing it in t/lei-convert.t
---
 lib/PublicInbox/LEI.pm        |  3 ++-
 lib/PublicInbox/LeiToMail.pm  |  2 +-
 lib/PublicInbox/MboxReader.pm | 36 ++++++++---------------------------
 t/lei-convert.t               | 27 ++++++++++++++++++++++++--
 4 files changed, 36 insertions(+), 32 deletions(-)

diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 2f8d7a96..beb0f897 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -275,7 +275,8 @@ our %CMD = ( # sorted in order of importance/use:
 	qw(all:s mode=s), @net_opt, @c_opt ],
 'convert' => [ 'LOCATION...|--stdin',
 	'one-time conversion from URL or filesystem to another format',
-	qw(stdin| in-format|F=s out-format|f=s output|mfolder|o=s lock=s@ kw!),
+	qw(stdin| in-format|F=s out-format|f=s output|mfolder|o=s lock=s@ kw!
+		rsyncable),
 	@net_opt, @c_opt ],
 'p2q' => [ 'LOCATION_OR_COMMIT...|--stdin',
 	"use a patch to generate a query for `lei q --stdin'",
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index a2cd8650..2dddf00b 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -155,7 +155,7 @@ sub reap_compress { # awaitpid callback
 	$lei->fail($?, "@$cmd failed") if $?;
 }
 
-sub _post_augment_mbox { # open a compressor process from top-level process
+sub _post_augment_mbox { # open a compressor process from top-level lei-daemon
 	my ($self, $lei) = @_;
 	my $zsfx = $self->{zsfx} or return;
 	my $cmd = PublicInbox::MboxReader::zsfx2cmd($zsfx, undef, $lei);
diff --git a/lib/PublicInbox/MboxReader.pm b/lib/PublicInbox/MboxReader.pm
index beffabe8..e4209022 100644
--- a/lib/PublicInbox/MboxReader.pm
+++ b/lib/PublicInbox/MboxReader.pm
@@ -1,10 +1,10 @@
-# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
-# reader for mbox variants we support
+# reader for mbox variants we support (and also sets up commands for writing)
 package PublicInbox::MboxReader;
 use strict;
-use v5.10.1;
+use v5.10.1; # check regexps before v5.12
 use Data::Dumper;
 $Data::Dumper::Useqq = 1; # should've been the default, for bad data
 
@@ -141,10 +141,9 @@ sub reads {
 
 # all of these support -c for stdout and -d for decompression,
 # mutt is commonly distributed with hooks for gz, bz2 and xz, at least
-# { foo => '' } means "--foo" is passed to the command-line,
-# otherwise { foo => '--bar' } passes "--bar"
+# { foo => '' } means "--foo" is passed to the command-line
 my %zsfx2cmd = (
-	gz => [ qw(GZIP pigz gzip) ],
+	gz => [ qw(GZIP pigz gzip), { rsyncable => '' } ],
 	bz2 => [ 'bzip2', {} ],
 	xz => [ 'xz', {} ],
 	# don't add new entries here unless MUA support is widely available
@@ -173,28 +172,9 @@ sub zsfx2cmd ($$$) {
 	}
 	$cmd[0] // die join(' or ', @info)." missing for .$zsfx";
 
-	# not all gzip support --rsyncable, FreeBSD gzip doesn't even exit
-	# with an error code
-	if (!$decompress && $cmd[0] =~ m!/gzip\z! && !defined($cmd_opt)) {
-		pipe(my ($r, $w)) or die "pipe: $!";
-		open my $null, '+>', '/dev/null' or die "open: $!";
-		my $rdr = { 0 => $null, 1 => $null, 2 => $w };
-		my $tst = [ $cmd[0], '--rsyncable' ];
-		my $pid = PublicInbox::Spawn::spawn($tst, undef, $rdr);
-		close $w;
-		my $err = do { local $/; <$r> };
-		waitpid($pid, 0) == $pid or die "BUG: waitpid: $!";
-		$cmd_opt = $err ? {} : { rsyncable => '' };
-		push(@$x, $cmd_opt);
-	}
-	for my $bool (keys %$cmd_opt) {
-		my $switch = $cmd_opt->{$bool} // next;
-		push @cmd, '--'.($switch || $bool);
-	}
-	for my $key (qw(rsyncable)) { # support compression level?
-		my $switch = $cmd_opt->{$key} // next;
-		my $val = $lei->{opt}->{$key} // next;
-		push @cmd, $switch, $val;
+	# only for --rsyncable.  TODO: support compression level?
+	for my $key (keys %$cmd_opt) {
+		push @cmd, '--'.$key if $lei->{opt}->{$key};
 	}
 	\@cmd;
 }
diff --git a/t/lei-convert.t b/t/lei-convert.t
index e1849ff7..115e7ed0 100644
--- a/t/lei-convert.t
+++ b/t/lei-convert.t
@@ -1,12 +1,13 @@
 #!perl -w
-# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-use strict; use v5.10.1; use PublicInbox::TestCommon;
+use v5.12; use PublicInbox::TestCommon;
 use PublicInbox::MboxReader;
 use PublicInbox::MdirReader;
 use PublicInbox::NetReader;
 use PublicInbox::Eml;
 use IO::Uncompress::Gunzip;
+use autodie qw(open);
 require_mods(qw(lei -imapd -nntpd Mail::IMAPClient Net::NNTP));
 my ($tmpdir, $for_destroy) = tmpdir;
 my $sock = tcp_server;
@@ -125,5 +126,27 @@ test_lei({ tmpdir => $tmpdir }, sub {
 	like($md[0], qr/:2,S\z/, "`seen' flag set in Maildir");
 	lei_ok(qw(convert -o mboxrd:/dev/stdout), "$d/md2");
 	like($lei_out, qr/^Status: RO/sm, "`seen' flag preserved");
+
+	SKIP: {
+		my $ok;
+		for my $x (($ENV{GZIP}//''), qw(pigz gzip)) {
+			$x && `$x -h 2>&1` =~ /--rsyncable\b/s or next;
+			$ok = $x;
+			last;
+		}
+		skip 'pigz || gzip do not support --rsyncable' if !$ok;
+		lei_ok qw(convert --rsyncable), "mboxrd:$d/qp.gz",
+			'-o', "mboxcl2:$d/qp2.gz";
+		undef $fh; # necessary to make IO::Uncompress::Gunzip happy
+		open $fh, '<', "$d/qp2.gz";
+		$fh = IO::Uncompress::Gunzip->new($fh, MultiStream => 1);
+		my @tmp;
+		PublicInbox::MboxReader->mboxcl2($fh, sub {
+			my ($eml) = @_;
+			$eml->header_set($_) for qw(Content-Length Lines);
+			push @tmp, $eml;
+		});
+		is_deeply(\@tmp, \@bar, 'read rsyncable-gzipped mboxcl2');
+	}
 });
 done_testing;

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-09-27  6:02 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-27  6:02 [PATCH 0/3] more process management cleanups + bugfix Eric Wong
2023-09-27  6:02 ` [PATCH 1/3] convert: use ProcessPipe with popen_rd Eric Wong
2023-09-27  6:02 ` [PATCH 2/3] spawn: add popen_wr support Eric Wong
2023-09-27  6:02 ` [PATCH 3/3] lei: don't gzip --rsyncable by default for mbox* Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).