From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 3DB1F1FA17 for ; Sat, 19 Sep 2020 09:37:15 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 6/7] gcf2: require git dir with OID Date: Sat, 19 Sep 2020 09:37:13 +0000 Message-Id: <20200919093714.21776-7-e@80x24.org> In-Reply-To: <20200919093714.21776-1-e@80x24.org> References: <20200919093714.21776-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This amortizes the cost of recreating PublicInbox::Gcf2 objects when alternates change in v2 all.git. --- lib/PublicInbox/Gcf2Client.pm | 13 ------------- lib/PublicInbox/Git.pm | 5 +++-- lib/PublicInbox/GitAsyncCat.pm | 24 +++++++++++++++--------- script/public-inbox-gcf2 | 14 +++++--------- t/gcf2_client.t | 11 ++++------- 5 files changed, 27 insertions(+), 40 deletions(-) diff --git a/lib/PublicInbox/Gcf2Client.pm b/lib/PublicInbox/Gcf2Client.pm index a048cd1a..30f85c71 100644 --- a/lib/PublicInbox/Gcf2Client.pm +++ b/lib/PublicInbox/Gcf2Client.pm @@ -23,19 +23,6 @@ sub new { $self; } -sub add_git_dir { - my ($self, $git_dir) = @_; - - # ensure buffers are drained, length($git_dir) may exceed - # PIPE_BUF on platforms where PIPE_BUF is only 512 bytes - my $inflight = $self->{inflight}; - while (scalar(@$inflight)) { - $self->cat_async_step($inflight); - } - print { $self->{out} } $git_dir, "\n" or - $self->fail("write error: $!"); -} - # always false, since -gcf2 retries internally sub alternates_changed {} diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index b49b5bd3..6bb82b6b 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -190,7 +190,8 @@ sub cat_async_step ($$) { $bref = my_read($self->{in}, $rbuf, $size + 1) or fail($self, defined($bref) ? 'read EOF' : "read: $!"); chop($$bref) eq "\n" or fail($self, 'LF missing after blob'); - } elsif ($head =~ / missing$/) { + } elsif ($head =~ s/ missing\n//s) { + $oid = $head; # ref($req) indicates it's already been retried # -gcf2 retries internally, so it never hits this path: if (!ref($req) && !$in_cleanup && $self->alternates_changed) { @@ -198,7 +199,7 @@ sub cat_async_step ($$) { $req, $cb, $arg); } $type = 'missing'; - $oid = ref($req) ? $$req : $req; + $oid = ref($req) ? $$req : $req if $oid eq ''; } else { fail($self, "Unexpected result from async git cat-file: $head"); } diff --git a/lib/PublicInbox/GitAsyncCat.pm b/lib/PublicInbox/GitAsyncCat.pm index db1a7f94..8a54c608 100644 --- a/lib/PublicInbox/GitAsyncCat.pm +++ b/lib/PublicInbox/GitAsyncCat.pm @@ -16,21 +16,27 @@ our @EXPORT = qw(git_async_cat); sub event_step { my ($self) = @_; - my $git = $self->{git}; - return $self->close if ($git->{in} // 0) != ($self->{sock} // 1); - my $inflight = $git->{inflight}; + my $gitish = $self->{gitish}; + return $self->close if ($gitish->{in} // 0) != ($self->{sock} // 1); + my $inflight = $gitish->{inflight}; if ($inflight && @$inflight) { - $git->cat_async_step($inflight); - $self->requeue if @$inflight || exists $git->{cat_rbuf}; + $gitish->cat_async_step($inflight); + $self->requeue if @$inflight || exists $gitish->{cat_rbuf}; } } sub git_async_cat ($$$$) { my ($git, $oid, $cb, $arg) = @_; - $git->cat_async($oid, $cb, $arg); - $git->{async_cat} //= do { - my $self = bless { git => $git }, __PACKAGE__; - $self->SUPER::new($git->{in}, EPOLLIN|EPOLLET); + my $gitish = $git->{gcf2c}; # PublicInbox::Gcf2Client + if ($gitish) { + $oid .= " $git->{git_dir}"; + } else { + $gitish = $git; + } + $gitish->cat_async($oid, $cb, $arg); + $gitish->{async_cat} //= do { + my $self = bless { gitish => $gitish }, __PACKAGE__; + $self->SUPER::new($gitish->{in}, EPOLLIN|EPOLLET); \undef; # this is a true ref() }; } diff --git a/script/public-inbox-gcf2 b/script/public-inbox-gcf2 index d2d2ac8b..4a44b654 100755 --- a/script/public-inbox-gcf2 +++ b/script/public-inbox-gcf2 @@ -3,7 +3,6 @@ # License: AGPL-3.0+ eval { require PublicInbox::Gcf2 }; die "libgit2 development package or Inline::C missing for $0: $@\n" if $@; -my @dirs; # may get big (30K-100K) my $gcf2 = PublicInbox::Gcf2::new(); use IO::Handle; # autoflush STDERR->autoflush(1); @@ -11,19 +10,16 @@ STDOUT->autoflush(1); while () { chomp; - if (m!\A/!) { # +/path/to/git-dir - push @dirs, $_; - $gcf2->add_alternate("$_/objects"); - } elsif (!$gcf2->cat_oid(1, $_)) { + my ($oid, $git_dir) = split(/ /, $_, 2); + $gcf2->add_alternate("$git_dir/objects"); + if (!$gcf2->cat_oid(1, $oid)) { # retry once if missing. We only get unabbreviated OIDs # from SQLite or Xapian DBs, here, so malicious clients # can't trigger excessive retries: - my $oid = $_; - warn "I: $$ $oid missing, retrying...\n"; + warn "I: $$ $oid missing, retrying in $git_dir...\n"; - # clients may need to wait a bit for this: $gcf2 = PublicInbox::Gcf2::new(); - $gcf2->add_alternate("$_/objects") for @dirs; + $gcf2->add_alternate("$git_dir/objects"); if ($gcf2->cat_oid(1, $oid)) { warn "I: $$ $oid found after retry\n"; diff --git a/t/gcf2_client.t b/t/gcf2_client.t index 0f7e7203..19462379 100644 --- a/t/gcf2_client.t +++ b/t/gcf2_client.t @@ -27,9 +27,7 @@ my $err_f = "$tmpdir/err"; local $ENV{PATH} = getcwd()."/blib/script:$ENV{PATH}"; open my $err, '>', $err_f or BAIL_OUT $!; my $gcf2c = PublicInbox::Gcf2Client::new({ 2 => $err }); - $gcf2c->add_git_dir($git_a); - - $gcf2c->cat_async($tree, sub { + $gcf2c->cat_async("$tree $git_a", sub { my ($bref, $oid, $type, $size, $arg) = @_; is($oid, $tree, 'got expected OID'); is($size, 30, 'got expected length'); @@ -45,7 +43,7 @@ my $err_f = "$tmpdir/err"; is($estr, '', 'nothing in stderr'); my $trunc = substr($tree, 0, 39); - $gcf2c->cat_async($trunc, sub { + $gcf2c->cat_async("$trunc $git_a", sub { my ($bref, $oid, $type, $size, $arg) = @_; is(undef, $bref, 'missing bref is undef'); is($oid, $trunc, 'truncated OID printed'); @@ -63,8 +61,7 @@ my $err_f = "$tmpdir/err"; # try failed alternates lookup open $err, '>', $err_f or BAIL_OUT $!; $gcf2c = PublicInbox::Gcf2Client::new({ 2 => $err }); - $gcf2c->add_git_dir($git_b); - $gcf2c->cat_async($tree, sub { + $gcf2c->cat_async("$tree $git_b", sub { my ($bref, $oid, $type, $size, $arg) = @_; is(undef, $bref, 'missing bref from alt is undef'); $called++; @@ -79,7 +76,7 @@ my $err_f = "$tmpdir/err"; print $alt "$git_a/objects\n" or BAIL_OUT $!; close $alt or BAIL_OUT; my $expect = xqx(['git', "--git-dir=$git_a", qw(cat-file tree), $tree]); - $gcf2c->cat_async($tree, sub { + $gcf2c->cat_async("$tree $git_a", sub { my ($bref, $oid, $type, $size, $arg) = @_; is($oid, $tree, 'oid match on alternates retry'); is($$bref, $expect, 'tree content matched');