unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH 0/2] use `git cat-file --batch-command' if available
@ 2023-01-26  9:32 Eric Wong
  2023-01-26  9:32 ` [PATCH 1/2] git: use --batch-command in git 2.36+ to save processes Eric Wong
  2023-01-26  9:32 ` [PATCH 2/2] git: drop needless checks for old git Eric Wong
  0 siblings, 2 replies; 7+ messages in thread
From: Eric Wong @ 2023-01-26  9:32 UTC (permalink / raw)
  To: meta

This should reduce git processes needed to service solver
blob lookup/reconstruction requests.

Eric Wong (2):
  git: use --batch-command in git 2.36+ to save processes
  git: drop needless checks for old git

 lib/PublicInbox/Git.pm         | 153 +++++++++++++++++++++------------
 lib/PublicInbox/GitAsyncCat.pm |   1 +
 2 files changed, 98 insertions(+), 56 deletions(-)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/2] git: use --batch-command in git 2.36+ to save processes
  2023-01-26  9:32 [PATCH 0/2] use `git cat-file --batch-command' if available Eric Wong
@ 2023-01-26  9:32 ` Eric Wong
  2023-01-27  8:14   ` Eric Wong
  2023-02-09  1:29   ` Eric Wong
  2023-01-26  9:32 ` [PATCH 2/2] git: drop needless checks for old git Eric Wong
  1 sibling, 2 replies; 7+ messages in thread
From: Eric Wong @ 2023-01-26  9:32 UTC (permalink / raw)
  To: meta

`git cat-file --batch-command' combines the functionality of
`--batch' and `--batch-check' into a single process.  This
reduces the amount of running processes and is primarily
useful for coderepos (e.g. solver).
---
 lib/PublicInbox/Git.pm         | 140 ++++++++++++++++++++++-----------
 lib/PublicInbox/GitAsyncCat.pm |   1 +
 2 files changed, 94 insertions(+), 47 deletions(-)

diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 12f997dc..63458ec6 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -31,7 +31,10 @@ our $async_warn; # true in read-only daemons
 # 512: POSIX PIPE_BUF minimum (see pipe(7))
 # 3: @$inflight is flattened [ $OID, $cb, $arg ]
 # 65: SHA-256 hex size + "\n" in preparation for git using non-SHA1
-use constant MAX_INFLIGHT => 512 * 3 / 65;
+use constant {
+	MAX_INFLIGHT => 512 * 3 / (65 + length('contents ')),
+	BATCH_CMD_VER => (2 << 24 | 36 << 16), # git 2.36+
+};
 
 my %GIT_ESC = (
 	a => "\a",
@@ -46,6 +49,24 @@ my %GIT_ESC = (
 );
 my %ESC_GIT = map { $GIT_ESC{$_} => $_ } keys %GIT_ESC;
 
+my $EXE_ST = ''; # pack('dd', st_ctime, st_size);
+my ($GIT_EXE, $GIT_VER);
+
+sub check_git_exe () {
+	$GIT_EXE = which('git') // die "git not found in $ENV{PATH}";
+	my @st = stat($GIT_EXE) or die "stat: $!";
+	my $st = pack('dd', $st[10], $st[7]);
+	if ($st ne $EXE_ST) {
+		my $rd = popen_rd([ $GIT_EXE, '--version' ]);
+		my $v = readline($rd);
+		$v =~ /\b([0-9]+(?:\.[0-9]+){2})/ or die
+			"$GIT_EXE --version output: $v # unparseable";
+		my @v = split(/\./, $1, 3);
+		$GIT_VER = ($v[0] << 24) | ($v[1] << 16) | $v[2];
+		$EXE_ST = $st;
+	}
+}
+
 # unquote pathnames used by git, see quote.c::unquote_c_style.c in git.git
 sub git_unquote ($) {
 	return $_[0] unless ($_[0] =~ /\A"(.*)"\z/);
@@ -122,25 +143,6 @@ sub _bidi_pipe {
 		return;
 	}
 
-	state $EXE_ST = ''; # pack('dd', st_ctime, st_size);
-	my $exe = which('git') // die "git not found in $ENV{PATH}";
-	my @st = stat($exe) or die "stat: $!";
-	my $st = pack('dd', $st[10], $st[7]);
-	state $VER;
-	if ($st ne $EXE_ST) {
-		my $rd = popen_rd([ $exe, '--version' ]);
-		my $v = readline($rd);
-		$v =~ /\b([0-9]+(?:\.[0-9]+){2})/ or die
-			"$exe --version output: $v # unparseable";
-		my @v = split(/\./, $1, 3);
-		$VER = ($v[0] << 24) | ($v[1] << 16) | $v[2];
-		$EXE_ST = $st;
-	}
-
-	# git 2.31.0+ supports -c core.abbrev=no, don't bother with
-	# core.abbrev=64 since not many releases had SHA-256 prior to 2.31
-	my $abbr = $VER < (2 << 24 | 31 << 16) ? 40 : 'no';
-
 	pipe(my ($out_r, $out_w)) or $self->fail("pipe failed: $!");
 	my $rdr = { 0 => $out_r, pgid => 0 };
 	my $gd = $self->{git_dir};
@@ -148,8 +150,12 @@ sub _bidi_pipe {
 		$rdr->{-C} = $gd;
 		$gd = $1;
 	}
-	my @cmd = ($exe, "--git-dir=$gd", '-c', "core.abbrev=$abbr",
-			'cat-file', $batch);
+
+	# git 2.31.0+ supports -c core.abbrev=no, don't bother with
+	# core.abbrev=64 since not many releases had SHA-256 prior to 2.31
+	my $abbr = $GIT_VER < (2 << 24 | 31 << 16) ? 40 : 'no';
+	my @cmd = ($GIT_EXE, "--git-dir=$gd", '-c', "core.abbrev=$abbr",
+			'cat-file', "--$batch");
 	if ($err) {
 		my $id = "git.$self->{git_dir}$batch.err";
 		my $fh = tmpfile($id) or $self->fail("tmpfile($id): $!");
@@ -163,7 +169,7 @@ sub _bidi_pipe {
 	$out_w->autoflush(1);
 	if ($^O eq 'linux') { # 1031: F_SETPIPE_SZ
 		fcntl($out_w, 1031, 4096);
-		fcntl($in_r, 1031, 4096) if $batch eq '--batch-check';
+		fcntl($in_r, 1031, 4096) if $batch eq 'batch-check';
 	}
 	$out_w->blocking(0);
 	$self->{$out} = $out_w;
@@ -240,12 +246,22 @@ sub cat_async_step ($$) {
 	my $rbuf = delete($self->{rbuf}) // \(my $new = '');
 	my ($bref, $oid, $type, $size);
 	my $head = my_readline($self->{in}, $rbuf);
+	my $cmd = ref($req) ? $$req : $req;
 	# ->fail may be called via Gcf2Client.pm
+	my $bc = $self->{-bc};
 	if ($head =~ /^([0-9a-f]{40,}) (\S+) ([0-9]+)$/) {
 		($oid, $type, $size) = ($1, $2, $3 + 0);
-		$bref = my_read($self->{in}, $rbuf, $size + 1) or
-			$self->fail(defined($bref) ? 'read EOF' : "read: $!");
-		chop($$bref) eq "\n" or $self->fail('LF missing after blob');
+		unless ($bc && $cmd =~ /\Ainfo /) { # --batch-command
+			$bref = my_read($self->{in}, $rbuf, $size + 1) or
+				$self->fail(defined($bref) ?
+						'read EOF' : "read: $!");
+			chop($$bref) eq "\n" or
+					$self->fail('LF missing after blob');
+		}
+	} elsif ($bc && $cmd =~ /\Ainfo / &&
+			$head =~ / (missing|ambiguous)\n/) {
+		$type = $1;
+		$oid = substr($cmd, 5);
 	} elsif ($head =~ s/ missing\n//s) {
 		$oid = $head;
 		# ref($req) indicates it's already been retried
@@ -254,15 +270,23 @@ sub cat_async_step ($$) {
 			return cat_async_retry($self, $inflight);
 		}
 		$type = 'missing';
-		$oid = ref($req) ? $$req : $req if $oid eq '';
+		if ($oid eq '') {
+			$oid = $cmd;
+			$oid =~ s/\A(?:contents|info) // if $bc;
+		}
 	} else {
 		my $err = $! ? " ($!)" : '';
 		$self->fail("bad result from async cat-file: $head$err");
 	}
 	$self->{rbuf} = $rbuf if $$rbuf ne '';
 	splice(@$inflight, 0, 3); # don't retry $cb on ->fail
-	eval { $cb->($bref, $oid, $type, $size, $arg) };
-	async_err($self, $req, $oid, $@, 'cat') if $@;
+	if ($bc && $cmd =~ /\Ainfo /) {
+		eval { $cb->($oid, $type, $size, $arg, $self) };
+		async_err($self, $req, $oid, $@, 'check') if $@;
+	} else {
+		eval { $cb->($bref, $oid, $type, $size, $arg) };
+		async_err($self, $req, $oid, $@, 'cat') if $@;
+	}
 }
 
 sub cat_async_wait ($) {
@@ -274,7 +298,15 @@ sub cat_async_wait ($) {
 }
 
 sub batch_prepare ($) {
-	_bidi_pipe($_[0], qw(--batch in out pid));
+	my ($self) = @_;
+	check_git_exe();
+	if ($GIT_VER >= BATCH_CMD_VER) {
+		_bidi_pipe($self, qw(batch-command in out pid err_c));
+		$self->{-bc} = 1;
+	} else {
+		_bidi_pipe($self, qw(batch in out pid));
+	}
+	$self->{inflight} = [];
 }
 
 sub _cat_file_cb {
@@ -314,18 +346,24 @@ sub check_async_step ($$) {
 
 sub check_async_wait ($) {
 	my ($self) = @_;
+	return cat_async_wait($self) if $self->{-bc};
 	my $inflight_c = $self->{inflight_c} or return;
-	while (scalar(@$inflight_c)) {
-		check_async_step($self, $inflight_c);
-	}
+	check_async_step($self, $inflight_c) while (scalar(@$inflight_c));
 }
 
 sub check_async_begin ($) {
 	my ($self) = @_;
-	cleanup($self) if alternates_changed($self);
-	_bidi_pipe($self, qw(--batch-check in_c out_c pid_c err_c));
 	die 'BUG: already in async check' if $self->{inflight_c};
-	$self->{inflight_c} = [];
+	cleanup($self) if alternates_changed($self);
+	check_git_exe();
+	if ($GIT_VER >= BATCH_CMD_VER) {
+		_bidi_pipe($self, qw(batch-command in out pid err_c));
+		$self->{-bc} = 1;
+		$self->{inflight} = [];
+	} else {
+		_bidi_pipe($self, qw(batch-check in_c out_c pid_c err_c));
+		$self->{inflight_c} = [];
+	}
 }
 
 sub write_all {
@@ -345,10 +383,18 @@ sub write_all {
 
 sub check_async ($$$$) {
 	my ($self, $oid, $cb, $arg) = @_;
-	my $inflight_c = $self->{inflight_c} // check_async_begin($self);
-	write_all($self, $self->{out_c}, $oid."\n",
-		\&check_async_step, $inflight_c);
-	push(@$inflight_c, $oid, $cb, $arg);
+	my $inflight = $self->{-bc} ?
+			($self->{inflight} // cat_async_begin($self)) :
+			($self->{inflight_c} // check_async_begin($self));
+	if ($self->{-bc}) {
+		substr($oid, 0, 0) = 'info ';
+		write_all($self, $self->{out}, "$oid\n",
+				\&cat_async_step, $inflight);
+	} else {
+		write_all($self, $self->{out_c}, "$oid\n",
+				\&check_async_step, $inflight);
+	}
+	push(@$inflight, $oid, $cb, $arg);
 }
 
 sub _check_cb { # check_async callback
@@ -389,6 +435,8 @@ sub async_abort ($) {
 			while (@$q) {
 				my ($req, $cb, $arg) = splice(@$q, 0, 3);
 				$req = $$req if ref($req);
+				$self->{-bc} and
+					$req =~ s/\A(?:contents|info) //;
 				$req =~ s/ .*//; # drop git_dir for Gcf2Client
 				eval { $cb->(undef, $req, undef, undef, $arg) };
 				warn "E: (in abort) $req: $@" if $@;
@@ -461,12 +509,10 @@ sub cleanup {
 	return 1 if $lazy && (scalar(@{$self->{inflight_c} // []}) ||
 				scalar(@{$self->{inflight} // []}));
 	local $in_cleanup = 1;
-	delete $self->{async_cat};
-	delete $self->{async_chk};
+	delete @$self{qw(async_cat async_chk)};
 	async_wait_all($self);
-	delete $self->{inflight};
-	delete $self->{inflight_c};
-	_destroy($self, qw(pid rbuf in out));
+	delete @$self{qw(inflight inflight_c -bc)};
+	_destroy($self, qw(pid rbuf in out err_c));
 	_destroy($self, qw(pid_c rbuf_c in_c out_c err_c));
 	undef;
 }
@@ -524,14 +570,14 @@ sub pub_urls {
 sub cat_async_begin {
 	my ($self) = @_;
 	cleanup($self) if $self->alternates_changed;
-	$self->batch_prepare;
 	die 'BUG: already in async' if $self->{inflight};
-	$self->{inflight} = [];
+	batch_prepare($self);
 }
 
 sub cat_async ($$$;$) {
 	my ($self, $oid, $cb, $arg) = @_;
 	my $inflight = $self->{inflight} // cat_async_begin($self);
+	substr($oid, 0, 0) = 'contents ' if $self->{-bc};
 	write_all($self, $self->{out}, $oid."\n", \&cat_async_step, $inflight);
 	push(@$inflight, $oid, $cb, $arg);
 }
diff --git a/lib/PublicInbox/GitAsyncCat.pm b/lib/PublicInbox/GitAsyncCat.pm
index 6dda7340..1f86aad0 100644
--- a/lib/PublicInbox/GitAsyncCat.pm
+++ b/lib/PublicInbox/GitAsyncCat.pm
@@ -78,6 +78,7 @@ sub async_check ($$$$) {
 	my ($ibx, $oidish, $cb, $arg) = @_; # $ibx may be $ctx
 	my $git = $ibx->{git} // $ibx->git;
 	$git->check_async($oidish, $cb, $arg);
+	return watch_cat($git) if $git->{-bc}; # --batch-command
 	$git->{async_chk} //= do {
 		my $self = bless { git => $git }, 'PublicInbox::GitAsyncCheck';
 		$git->{in_c}->blocking(0);

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/2] git: drop needless checks for old git
  2023-01-26  9:32 [PATCH 0/2] use `git cat-file --batch-command' if available Eric Wong
  2023-01-26  9:32 ` [PATCH 1/2] git: use --batch-command in git 2.36+ to save processes Eric Wong
@ 2023-01-26  9:32 ` Eric Wong
  1 sibling, 0 replies; 7+ messages in thread
From: Eric Wong @ 2023-01-26  9:32 UTC (permalink / raw)
  To: meta

`ambiguous' was added in git 2.21, and `dangling' was the only
other possible phrase which was inadvertantly slipped in prior
to 2.21.  Thus there's no need to check for `notdir' or `loop'
responses since we aren't using `git cat-file --follow-symlinks'
anywhere.
---
 lib/PublicInbox/Git.pm | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 63458ec6..1c77b4ce 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -330,11 +330,9 @@ sub check_async_step ($$) {
 	chomp(my $line = my_readline($self->{in_c}, $rbuf));
 	my ($hex, $type, $size) = split(/ /, $line);
 
-	# Future versions of git.git may have type=ambiguous, but for now,
-	# we must handle 'dangling' below (and maybe some other oddball
-	# stuff):
+	# git <2.21 would show `dangling' (2.21+ shows `ambiguous')
 	# https://public-inbox.org/git/20190118033845.s2vlrb3wd3m2jfzu@dcvr/T/
-	if ($hex eq 'dangling' || $hex eq 'notdir' || $hex eq 'loop') {
+	if ($hex eq 'dangling') {
 		my $ret = my_read($self->{in_c}, $rbuf, $type + 1);
 		$self->fail(defined($ret) ? 'read EOF' : "read: $!") if !$ret;
 	}
@@ -409,12 +407,9 @@ sub check {
 	check_async_wait($self);
 	my ($hex, $type, $size) = @$result;
 
-	# Future versions of git.git may show 'ambiguous', but for now,
-	# we must handle 'dangling' below (and maybe some other oddball
-	# stuff):
+	# git <2.21 would show `dangling' (2.21+ shows `ambiguous')
 	# https://public-inbox.org/git/20190118033845.s2vlrb3wd3m2jfzu@dcvr/T/
-	return if $type eq 'missing' || $type eq 'ambiguous';
-	return if $hex eq 'dangling' || $hex eq 'notdir' || $hex eq 'loop';
+	return if $type =~ /\A(?:missing|ambiguous)\z/ || $hex eq 'dangling';
 	($hex, $type, $size);
 }
 

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] git: use --batch-command in git 2.36+ to save processes
  2023-01-26  9:32 ` [PATCH 1/2] git: use --batch-command in git 2.36+ to save processes Eric Wong
@ 2023-01-27  8:14   ` Eric Wong
  2023-01-27  8:17     ` Eric Wong
  2023-02-09  1:29   ` Eric Wong
  1 sibling, 1 reply; 7+ messages in thread
From: Eric Wong @ 2023-01-27  8:14 UTC (permalink / raw)
  To: meta

t/imapd.t on my slower 32-bit machine fails without the
following fix.  I guess prefetch wasn't being tested on my
faster workstation?

Also, the prior use of `print { $git->{out} }' is a potential (but
unlikely) bug since commit d4ba8828ab23f278
(git: fix asynchronous batching for deep pipelines, 2023-01-04)

diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 1c77b4ce..a9db8ad7 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -239,6 +239,16 @@ sub cat_async_retry ($$) {
 	cat_async_step($self, $inflight); # take one step
 }
 
+# returns true if prefetch is successful
+sub async_prefetch {
+	my ($self, $oid, $cb, $arg) = @_;
+	my $inflight = $self->{inflight} or return;
+	return if @$inflight;
+	substr($oid, 0, 0) = 'contents ' if $self->{-bc};
+	write_all($self, $self->{out}, "$oid\n", \&cat_async_step, $inflight);
+	push(@$inflight, $oid, $cb, $arg);
+}
+
 sub cat_async_step ($$) {
 	my ($self, $inflight) = @_;
 	die 'BUG: inflight empty or odd' if scalar(@$inflight) < 3;
diff --git a/lib/PublicInbox/GitAsyncCat.pm b/lib/PublicInbox/GitAsyncCat.pm
index 1f86aad0..49a3c602 100644
--- a/lib/PublicInbox/GitAsyncCat.pm
+++ b/lib/PublicInbox/GitAsyncCat.pm
@@ -98,12 +98,8 @@ sub ibx_async_prefetch {
 			$oid .= " $git->{git_dir}\n";
 			return $GCF2C->gcf2_async(\$oid, $cb, $arg); # true
 		}
-	} elsif ($git->{async_cat} && (my $inflight = $git->{inflight})) {
-		if (!@$inflight) {
-			print { $git->{out} } $oid, "\n" or
-						$git->fail("write error: $!");
-			return push(@$inflight, $oid, $cb, $arg);
-		}
+	} elsif ($git->{async_cat}) {
+		return $git->async_prefetch($oid, $cb, $arg);
 	}
 	undef;
 }

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] git: use --batch-command in git 2.36+ to save processes
  2023-01-27  8:14   ` Eric Wong
@ 2023-01-27  8:17     ` Eric Wong
  0 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2023-01-27  8:17 UTC (permalink / raw)
  To: meta

t/nntpd.t and t/nntpd-v2.t also need a fix on my 32-bit machine
which lacks libgit2 (and in retrospect, same problem w/ the previous
fix for t/imapd.t)

The change to PublicInbox/Git.pm isn't necessary, but it looks a
bit nicer since I stopped passing `--' in to that sub and only
add `--' when invoking the command.

diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index a9db8ad7..3e2b435c 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -157,7 +157,7 @@ sub _bidi_pipe {
 	my @cmd = ($GIT_EXE, "--git-dir=$gd", '-c', "core.abbrev=$abbr",
 			'cat-file', "--$batch");
 	if ($err) {
-		my $id = "git.$self->{git_dir}$batch.err";
+		my $id = "git.$self->{git_dir}.$batch.err";
 		my $fh = tmpfile($id) or $self->fail("tmpfile($id): $!");
 		$self->{$err} = $fh;
 		$rdr->{2} = $fh;
diff --git a/t/nntpd.t b/t/nntpd.t
index 30233ce0..bebf4203 100644
--- a/t/nntpd.t
+++ b/t/nntpd.t
@@ -365,7 +365,7 @@ Date: Fri, 02 Oct 1993 00:00:00 +0000
 		($^O =~ /\A(?:linux)\z/) or
 			skip "lsof /(deleted)/ check untested on $^O", 1;
 		my @lsof = xqx([$lsof, '-p', $td->{pid}], undef, $noerr);
-		my $d = [ grep(/\(deleted\)/, @lsof) ];
+		my $d = [ grep(/\(deleted\)/, grep(!/batch-command\.err/, @lsof)) ];
 		is_deeply($d, [], 'no deleted files') or diag explain($d);
 	};
 	SKIP: { test_watch($tmpdir, $host_port, $group) };

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] git: use --batch-command in git 2.36+ to save processes
  2023-01-26  9:32 ` [PATCH 1/2] git: use --batch-command in git 2.36+ to save processes Eric Wong
  2023-01-27  8:14   ` Eric Wong
@ 2023-02-09  1:29   ` Eric Wong
  2023-02-20  8:19     ` [PATCH] git_async_cat: don't mis-abort replaced process Eric Wong
  1 sibling, 1 reply; 7+ messages in thread
From: Eric Wong @ 2023-02-09  1:29 UTC (permalink / raw)
  To: meta

--batch-command seems to be causing problems with ViewVCS.
I've been noticing "Failed to retrieve generated blob" errors in
syslog.  It doesn't affect any mail retrievals AFAIK, since
those are (currently) separate processes and those are contents-only
(no info header-only retrievals)

Not sure what's going on, yet, but using this to gather
more data:

diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index 5fd46610..0fb77c06 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -549,7 +549,8 @@ sub show_blob { # git->cat_async callback
 	my ($blob, $oid, $type, $size, $ctx) = @_;
 	if (!$blob) {
 		my $e = "Failed to retrieve generated blob ($oid)";
-		warn "$e ($ctx->{git}->{git_dir}) type=$type";
+		warn("$e ($ctx->{git}->{git_dir}) type=$type ",
+			"u=$ctx->{env}->{REQUEST_URI}".Carp::longmess()."\n");
 		return html_page($ctx, 500, "<pre><b>$e</b></pre>".dbg_log($ctx))
 	}
 

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH] git_async_cat: don't mis-abort replaced process
  2023-02-09  1:29   ` Eric Wong
@ 2023-02-20  8:19     ` Eric Wong
  0 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2023-02-20  8:19 UTC (permalink / raw)
  To: meta

When a git process gets replaced (e.g. due to new
epochs/alternates), we must be careful and not abort the wrong
one.

I suspect this fixes the problem exacerbated by --batch-command.
It was theoretically possible w/o --batch-command, but it seems
to have made it surface more readily.

This should fix "Failed to retrieve generated blob" errors from
PublicInbox/ViewVCS.pm appearing in syslog

Link: https://public-inbox.org/meta/20230209012932.M934961@dcvr/
---
 lib/PublicInbox/GitAsyncCat.pm | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/lib/PublicInbox/GitAsyncCat.pm b/lib/PublicInbox/GitAsyncCat.pm
index c428f6ef..671654b5 100644
--- a/lib/PublicInbox/GitAsyncCat.pm
+++ b/lib/PublicInbox/GitAsyncCat.pm
@@ -14,6 +14,12 @@ use PublicInbox::Git ();
 
 our $GCF2C; # singleton PublicInbox::Gcf2Client
 
+# close w/o aborting another git process
+sub vanish {
+	delete $_[0]->{git};
+	$_[0]->close;
+}
+
 sub close {
 	my ($self) = @_;
 	if (my $git = delete $self->{git}) {
@@ -22,19 +28,25 @@ sub close {
 	$self->SUPER::close; # PublicInbox::DS::close
 }
 
-sub aclose { $_[1]->close } # ignore PID ($_[0])
+sub aclose {
+	my (undef, $self, $f) = @_; # ignore PID ($_[0])
+	if (my $g = $self->{git}) {
+		return vanish($self) if ($g->{$f} // 0) != ($self->{sock} // 1);
+	}
+	$self->close;
+}
 
 sub event_step {
 	my ($self) = @_;
 	my $git = $self->{git} or return;
-	return $self->close if ($git->{in} // 0) != ($self->{sock} // 1);
+	return vanish($self) if ($git->{in} // 0) != ($self->{sock} // 1);
 	my $inflight = $git->{inflight};
 	if ($inflight && @$inflight) {
 		$git->cat_async_step($inflight);
 
 		# child death?
 		if (($git->{in} // 0) != ($self->{sock} // 1)) {
-			$self->close;
+			vanish($self);
 		} elsif (@$inflight || exists $git->{rbuf}) {
 			# ok, more to do, requeue for fairness
 			$self->requeue;
@@ -48,7 +60,7 @@ sub watch_cat {
 		my $self = bless { git => $git }, __PACKAGE__;
 		$git->{in}->blocking(0);
 		$self->SUPER::new($git->{in}, EPOLLIN|EPOLLET);
-		awaitpid($git->{pid}, \&aclose, $self);
+		awaitpid($git->{pid}, \&aclose, $self, 'in');
 		\undef; # this is a true ref()
 	};
 }
@@ -81,7 +93,7 @@ sub async_check ($$$$) {
 		my $self = bless { git => $git }, 'PublicInbox::GitAsyncCheck';
 		$git->{in_c}->blocking(0);
 		$self->SUPER::new($git->{in_c}, EPOLLIN|EPOLLET);
-		awaitpid($git->{pid_c}, \&aclose, $self);
+		awaitpid($git->{pid_c}, \&aclose, $self, 'in_c');
 		\undef; # this is a true ref()
 	};
 }
@@ -113,14 +125,14 @@ use PublicInbox::Syscall qw(EPOLLIN EPOLLET);
 sub event_step {
 	my ($self) = @_;
 	my $git = $self->{git} or return;
-	return $self->close if ($git->{in_c} // 0) != ($self->{sock} // 1);
+	return $self->vanish if ($git->{in_c} // 0) != ($self->{sock} // 1);
 	my $inflight = $git->{inflight_c};
 	if ($inflight && @$inflight) {
 		$git->check_async_step($inflight);
 
 		# child death?
 		if (($git->{in_c} // 0) != ($self->{sock} // 1)) {
-			$self->close;
+			$self->vanish;
 		} elsif (@$inflight || exists $git->{rbuf_c}) {
 			# ok, more to do, requeue for fairness
 			$self->requeue;

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-02-20  8:19 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-26  9:32 [PATCH 0/2] use `git cat-file --batch-command' if available Eric Wong
2023-01-26  9:32 ` [PATCH 1/2] git: use --batch-command in git 2.36+ to save processes Eric Wong
2023-01-27  8:14   ` Eric Wong
2023-01-27  8:17     ` Eric Wong
2023-02-09  1:29   ` Eric Wong
2023-02-20  8:19     ` [PATCH] git_async_cat: don't mis-abort replaced process Eric Wong
2023-01-26  9:32 ` [PATCH 2/2] git: drop needless checks for old git Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).