From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id BC6D01FBD1 for ; Wed, 10 Jun 2020 07:06:25 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 35/82] git: async: automatic retry on alternates change Date: Wed, 10 Jun 2020 07:04:32 +0000 Message-Id: <20200610070519.18252-36-e@yhbt.net> In-Reply-To: <20200610070519.18252-1-e@yhbt.net> References: <20200610070519.18252-1-e@yhbt.net> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This matches the behavior of the existing synchronous ->cat_file method. In fact, ->cat_file now becomes a small wrapper around the ->cat_async method. --- lib/PublicInbox/Git.pm | 64 +++++++++++++++++++++++++----------------- t/git.t | 37 ++++++++++++++++++++---- 2 files changed, 70 insertions(+), 31 deletions(-) diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index 60236afe7bf..a55c48d5f52 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -18,6 +18,7 @@ use base qw(Exporter); our @EXPORT_OK = qw(git_unquote git_quote); use Errno qw(EINTR); our $PIPE_BUFSIZ = 65536; # Linux default +our $in_cleanup; use constant MAX_INFLIGHT => (($^O eq 'linux' ? 4096 : POSIX::_POSIX_PIPE_BUF()) * 3) @@ -155,6 +156,26 @@ sub my_readline ($$) { } } +sub cat_async_retry ($$$$$) { + my ($self, $inflight, $req, $cb, $arg) = @_; + + # {inflight} may be non-existent, but if it isn't we delete it + # here to prevent cleanup() from waiting: + delete $self->{inflight}; + cleanup($self); + + $self->{inflight} = $inflight; + batch_prepare($self); + my $buf = "$req\n"; + for (my $i = 0; $i < @$inflight; $i += 3) { + $buf .= "$inflight->[$i]\n"; + } + print { $self->{out} } $buf or fail($self, "write error: $!"); + unshift(@$inflight, \$req, $cb, $arg); # \$ref to indicate retried + + cat_async_step($self, $inflight); # take one step +} + sub cat_async_step ($$) { my ($self, $inflight) = @_; die 'BUG: inflight empty or odd' if scalar(@$inflight) < 3; @@ -168,8 +189,13 @@ sub cat_async_step ($$) { fail($self, defined($bref) ? 'read EOF' : "read: $!"); chop($$bref) eq "\n" or fail($self, 'LF missing after blob'); } elsif ($head =~ / missing$/) { + # ref($req) indicates it's already been retried + if (!ref($req) && !$in_cleanup && alternates_changed($self)) { + return cat_async_retry($self, $inflight, + $req, $cb, $arg); + } $type = 'missing'; - $oid = $req; + $oid = ref($req) ? $$req : $req; } else { fail($self, "Unexpected result from async git cat-file: $head"); } @@ -190,33 +216,18 @@ sub batch_prepare ($) { _bidi_pipe($_[0], qw(--batch in out pid)); } +sub _cat_file_cb { + my ($bref, undef, undef, $size, $result) = @_; + @$result = ($bref, $size); +} + sub cat_file { - my ($self, $obj, $sizeref) = @_; - my ($retried, $head, $rbuf); + my ($self, $oid, $sizeref) = @_; + my $result = []; + cat_async($self, $oid, \&_cat_file_cb, $result); cat_async_wait($self); -again: - batch_prepare($self); - $rbuf = delete($self->{cat_rbuf}) // \(my $new = ''); - print { $self->{out} } $obj, "\n" or fail($self, "write error: $!"); - $head = my_readline($self->{in}, $rbuf); - if ($head =~ / missing$/) { - if (!$retried && alternates_changed($self)) { - $retried = 1; - cleanup($self); - goto again; - } - return; - } - $head =~ /^[0-9a-f]{40} \S+ ([0-9]+)$/ or - fail($self, "Unexpected result from git cat-file: $head"); - - my $size = $1 + 0; - $$sizeref = $size if $sizeref; - my $ret = my_read($self->{in}, $rbuf, $size + 1); - $self->{cat_rbuf} = $rbuf if $$rbuf ne ''; - fail($self, defined($ret) ? 'read EOF' : "read: $!") if !$ret; - chop($$ret) eq "\n" or fail($self, 'newline missing after blob'); - $ret; + $$sizeref = $result->[1] if $sizeref; + $result->[0]; } sub check { @@ -283,6 +294,7 @@ sub qx { # returns true if there are pending "git cat-file" processes sub cleanup { my ($self) = @_; + local $in_cleanup = 1; if (my $ac = $self->{async_cat}) { $ac->close; # PublicInbox::GitAsyncCat::close -> EPOLL_CTL_DEL } diff --git a/t/git.t b/t/git.t index 98d16f289c3..228df90fa45 100644 --- a/t/git.t +++ b/t/git.t @@ -86,23 +86,50 @@ if (1) { if ('alternates reloaded') { my ($alt, $alt_obj) = tmpdir(); - my @cmd = ('git', "--git-dir=$alt", qw(hash-object -w --stdin)); + my $hash_obj = [ 'git', "--git-dir=$alt", qw(hash-object -w --stdin) ]; PublicInbox::Import::init_bare($alt); open my $fh, '<', "$alt/config" or die "open failed: $!\n"; - my $rd = popen_rd(\@cmd, {}, { 0 => $fh } ); - close $fh or die "close failed: $!"; - chomp(my $remote = <$rd>); + chomp(my $remote = xqx($hash_obj, undef, { 0 => $fh })); my $gcf = PublicInbox::Git->new($dir); is($gcf->cat_file($remote), undef, "remote file not found"); open $fh, '>>', "$dir/objects/info/alternates" or die "open failed: $!\n"; - print $fh "$alt/objects" or die "print failed: $!\n"; + print $fh "$alt/objects\n" or die "print failed: $!\n"; close $fh or die "close failed: $!"; my $found = $gcf->cat_file($remote); open $fh, '<', "$alt/config" or die "open failed: $!\n"; my $config = eval { local $/; <$fh> }; is($$found, $config, 'alternates reloaded'); + # with the async interface + my ($async_alt, $async_dir_obj) = tmpdir(); + PublicInbox::Import::init_bare($async_alt); + my @exist = map { chomp; [ split / / ] } (xqx(['git', "--git-dir=$dir", + qw(cat-file --batch-all-objects --batch-check)])); + my $results = []; + my $cb = sub { + my ($bref, $oid, $type, $size) = @_; + push @$results, [ $oid, $type, $size ]; + }; + for my $i (0..5) { + $gcf->cat_async($exist[$i]->[0], $cb, $results); + next if $i != 3; + + # stick a new alternate into a running async pipeline + $hash_obj->[1] = "--git-dir=$async_alt"; + $remote = xqx($hash_obj, undef, { 0 => \'async' }); + chomp $remote; + open $fh, '>>', "$dir/objects/info/alternates" or + die "open failed: $!\n"; + print $fh "$async_alt/objects\n" or die "print failed: $!\n"; + close $fh or die "close failed: $!"; + # trigger cat_async_retry: + $gcf->cat_async($remote, $cb, $results); + } + $gcf->cat_async_wait; + my $expect = [ @exist[0..3], [ $remote, 'blob', 5 ], @exist[4..5] ]; + is_deeply($results, $expect, 'got expected results'); + ok(!$gcf->cleanup, 'cleanup can expire'); ok(!$gcf->cleanup, 'cleanup idempotent');