From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 6/7] gcf2: require git dir with OID
Date: Sat, 19 Sep 2020 09:37:13 +0000 [thread overview]
Message-ID: <20200919093714.21776-7-e@80x24.org> (raw)
In-Reply-To: <20200919093714.21776-1-e@80x24.org>
This amortizes the cost of recreating PublicInbox::Gcf2 objects
when alternates change in v2 all.git.
---
lib/PublicInbox/Gcf2Client.pm | 13 -------------
lib/PublicInbox/Git.pm | 5 +++--
lib/PublicInbox/GitAsyncCat.pm | 24 +++++++++++++++---------
script/public-inbox-gcf2 | 14 +++++---------
t/gcf2_client.t | 11 ++++-------
5 files changed, 27 insertions(+), 40 deletions(-)
diff --git a/lib/PublicInbox/Gcf2Client.pm b/lib/PublicInbox/Gcf2Client.pm
index a048cd1a..30f85c71 100644
--- a/lib/PublicInbox/Gcf2Client.pm
+++ b/lib/PublicInbox/Gcf2Client.pm
@@ -23,19 +23,6 @@ sub new {
$self;
}
-sub add_git_dir {
- my ($self, $git_dir) = @_;
-
- # ensure buffers are drained, length($git_dir) may exceed
- # PIPE_BUF on platforms where PIPE_BUF is only 512 bytes
- my $inflight = $self->{inflight};
- while (scalar(@$inflight)) {
- $self->cat_async_step($inflight);
- }
- print { $self->{out} } $git_dir, "\n" or
- $self->fail("write error: $!");
-}
-
# always false, since -gcf2 retries internally
sub alternates_changed {}
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index b49b5bd3..6bb82b6b 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -190,7 +190,8 @@ sub cat_async_step ($$) {
$bref = my_read($self->{in}, $rbuf, $size + 1) or
fail($self, defined($bref) ? 'read EOF' : "read: $!");
chop($$bref) eq "\n" or fail($self, 'LF missing after blob');
- } elsif ($head =~ / missing$/) {
+ } elsif ($head =~ s/ missing\n//s) {
+ $oid = $head;
# ref($req) indicates it's already been retried
# -gcf2 retries internally, so it never hits this path:
if (!ref($req) && !$in_cleanup && $self->alternates_changed) {
@@ -198,7 +199,7 @@ sub cat_async_step ($$) {
$req, $cb, $arg);
}
$type = 'missing';
- $oid = ref($req) ? $$req : $req;
+ $oid = ref($req) ? $$req : $req if $oid eq '';
} else {
fail($self, "Unexpected result from async git cat-file: $head");
}
diff --git a/lib/PublicInbox/GitAsyncCat.pm b/lib/PublicInbox/GitAsyncCat.pm
index db1a7f94..8a54c608 100644
--- a/lib/PublicInbox/GitAsyncCat.pm
+++ b/lib/PublicInbox/GitAsyncCat.pm
@@ -16,21 +16,27 @@ our @EXPORT = qw(git_async_cat);
sub event_step {
my ($self) = @_;
- my $git = $self->{git};
- return $self->close if ($git->{in} // 0) != ($self->{sock} // 1);
- my $inflight = $git->{inflight};
+ my $gitish = $self->{gitish};
+ return $self->close if ($gitish->{in} // 0) != ($self->{sock} // 1);
+ my $inflight = $gitish->{inflight};
if ($inflight && @$inflight) {
- $git->cat_async_step($inflight);
- $self->requeue if @$inflight || exists $git->{cat_rbuf};
+ $gitish->cat_async_step($inflight);
+ $self->requeue if @$inflight || exists $gitish->{cat_rbuf};
}
}
sub git_async_cat ($$$$) {
my ($git, $oid, $cb, $arg) = @_;
- $git->cat_async($oid, $cb, $arg);
- $git->{async_cat} //= do {
- my $self = bless { git => $git }, __PACKAGE__;
- $self->SUPER::new($git->{in}, EPOLLIN|EPOLLET);
+ my $gitish = $git->{gcf2c}; # PublicInbox::Gcf2Client
+ if ($gitish) {
+ $oid .= " $git->{git_dir}";
+ } else {
+ $gitish = $git;
+ }
+ $gitish->cat_async($oid, $cb, $arg);
+ $gitish->{async_cat} //= do {
+ my $self = bless { gitish => $gitish }, __PACKAGE__;
+ $self->SUPER::new($gitish->{in}, EPOLLIN|EPOLLET);
\undef; # this is a true ref()
};
}
diff --git a/script/public-inbox-gcf2 b/script/public-inbox-gcf2
index d2d2ac8b..4a44b654 100755
--- a/script/public-inbox-gcf2
+++ b/script/public-inbox-gcf2
@@ -3,7 +3,6 @@
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
eval { require PublicInbox::Gcf2 };
die "libgit2 development package or Inline::C missing for $0: $@\n" if $@;
-my @dirs; # may get big (30K-100K)
my $gcf2 = PublicInbox::Gcf2::new();
use IO::Handle; # autoflush
STDERR->autoflush(1);
@@ -11,19 +10,16 @@ STDOUT->autoflush(1);
while (<STDIN>) {
chomp;
- if (m!\A/!) { # +/path/to/git-dir
- push @dirs, $_;
- $gcf2->add_alternate("$_/objects");
- } elsif (!$gcf2->cat_oid(1, $_)) {
+ my ($oid, $git_dir) = split(/ /, $_, 2);
+ $gcf2->add_alternate("$git_dir/objects");
+ if (!$gcf2->cat_oid(1, $oid)) {
# retry once if missing. We only get unabbreviated OIDs
# from SQLite or Xapian DBs, here, so malicious clients
# can't trigger excessive retries:
- my $oid = $_;
- warn "I: $$ $oid missing, retrying...\n";
+ warn "I: $$ $oid missing, retrying in $git_dir...\n";
- # clients may need to wait a bit for this:
$gcf2 = PublicInbox::Gcf2::new();
- $gcf2->add_alternate("$_/objects") for @dirs;
+ $gcf2->add_alternate("$git_dir/objects");
if ($gcf2->cat_oid(1, $oid)) {
warn "I: $$ $oid found after retry\n";
diff --git a/t/gcf2_client.t b/t/gcf2_client.t
index 0f7e7203..19462379 100644
--- a/t/gcf2_client.t
+++ b/t/gcf2_client.t
@@ -27,9 +27,7 @@ my $err_f = "$tmpdir/err";
local $ENV{PATH} = getcwd()."/blib/script:$ENV{PATH}";
open my $err, '>', $err_f or BAIL_OUT $!;
my $gcf2c = PublicInbox::Gcf2Client::new({ 2 => $err });
- $gcf2c->add_git_dir($git_a);
-
- $gcf2c->cat_async($tree, sub {
+ $gcf2c->cat_async("$tree $git_a", sub {
my ($bref, $oid, $type, $size, $arg) = @_;
is($oid, $tree, 'got expected OID');
is($size, 30, 'got expected length');
@@ -45,7 +43,7 @@ my $err_f = "$tmpdir/err";
is($estr, '', 'nothing in stderr');
my $trunc = substr($tree, 0, 39);
- $gcf2c->cat_async($trunc, sub {
+ $gcf2c->cat_async("$trunc $git_a", sub {
my ($bref, $oid, $type, $size, $arg) = @_;
is(undef, $bref, 'missing bref is undef');
is($oid, $trunc, 'truncated OID printed');
@@ -63,8 +61,7 @@ my $err_f = "$tmpdir/err";
# try failed alternates lookup
open $err, '>', $err_f or BAIL_OUT $!;
$gcf2c = PublicInbox::Gcf2Client::new({ 2 => $err });
- $gcf2c->add_git_dir($git_b);
- $gcf2c->cat_async($tree, sub {
+ $gcf2c->cat_async("$tree $git_b", sub {
my ($bref, $oid, $type, $size, $arg) = @_;
is(undef, $bref, 'missing bref from alt is undef');
$called++;
@@ -79,7 +76,7 @@ my $err_f = "$tmpdir/err";
print $alt "$git_a/objects\n" or BAIL_OUT $!;
close $alt or BAIL_OUT;
my $expect = xqx(['git', "--git-dir=$git_a", qw(cat-file tree), $tree]);
- $gcf2c->cat_async($tree, sub {
+ $gcf2c->cat_async("$tree $git_a", sub {
my ($bref, $oid, $type, $size, $arg) = @_;
is($oid, $tree, 'oid match on alternates retry');
is($$bref, $expect, 'tree content matched');
next prev parent reply other threads:[~2020-09-19 9:37 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-09-19 9:37 [PATCH 0/7] gcf2: libgit2-based cat-file alternative Eric Wong
2020-09-19 9:37 ` [PATCH 1/7] gcf2: libgit2-based git " Eric Wong
2020-09-19 9:37 ` [PATCH 2/7] t/gcf2: test changes to alternates Eric Wong
2020-09-19 9:37 ` [PATCH 3/7] add gcf2 client and executable script Eric Wong
2020-09-19 9:37 ` [PATCH 4/7] gcf2: transparently retry on missing OID Eric Wong
2020-09-19 9:37 ` [PATCH 5/7] gcf2*: more descriptive package descriptions Eric Wong
2020-09-19 9:37 ` Eric Wong [this message]
2020-09-19 9:37 ` [PATCH 7/7] gcf2: wire up read-only daemons and rm -gcf2 script Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200919093714.21776-7-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).