* [PATCH] favor git(1) rather than libgit2 for ExtSearch
@ 2021-06-24 5:50 Eric Wong
0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2021-06-24 5:50 UTC (permalink / raw)
To: meta
While both git and libgit2 take around 16 minutes to load 100K
alternates there's already a proposed patch to make git faster:
<https://lore.kernel.org/git/20210624005806.12079-1-e@80x24.org/>
It's also easier to patch and install git locally since the
git.git build system defaults to prefix=$HOME and dealing with
dynamic linking with libgit2 is more difficult for end users
relying on Inline::C.
libgit2 remains in use for the non-ALL.git case, but maybe it's
not necessary (libgit2 is significantly slower than git in
Debian 10 due to SHA-1 collision checking).
---
Documentation/technical/ds.txt | 2 +-
lib/PublicInbox/GitAsyncCat.pm | 21 +++++++++++++--------
lib/PublicInbox/GzipFilter.pm | 3 +--
lib/PublicInbox/HTTPD.pm | 2 +-
lib/PublicInbox/IMAP.pm | 10 +++++-----
lib/PublicInbox/NNTP.pm | 4 ++--
lib/PublicInbox/SolverGit.pm | 3 +--
7 files changed, 24 insertions(+), 21 deletions(-)
diff --git a/Documentation/technical/ds.txt b/Documentation/technical/ds.txt
index a0793ca2..7bc1ad79 100644
--- a/Documentation/technical/ds.txt
+++ b/Documentation/technical/ds.txt
@@ -64,7 +64,7 @@ Augmented features:
* ->requeue support. An optimization of the AddTimer(0, ...) idiom
for immediately dispatching code at the next event loop iteration.
public-inbox uses this for fairly generating large responses
- iteratively (see PublicInbox::NNTP::long_response or git_async_cat
+ iteratively (see PublicInbox::NNTP::long_response or ibx_async_cat
for blob retrievals).
New features
diff --git a/lib/PublicInbox/GitAsyncCat.pm b/lib/PublicInbox/GitAsyncCat.pm
index 7d1a13db..57c194d9 100644
--- a/lib/PublicInbox/GitAsyncCat.pm
+++ b/lib/PublicInbox/GitAsyncCat.pm
@@ -8,7 +8,7 @@ use strict;
use parent qw(PublicInbox::DS Exporter);
use POSIX qw(WNOHANG);
use PublicInbox::Syscall qw(EPOLLIN EPOLLET);
-our @EXPORT = qw(git_async_cat git_async_prefetch);
+our @EXPORT = qw(ibx_async_cat ibx_async_prefetch);
use PublicInbox::Git ();
our $GCF2C; # singleton PublicInbox::Gcf2Client
@@ -45,12 +45,16 @@ sub event_step {
}
}
-sub git_async_cat ($$$$) {
- my ($git, $oid, $cb, $arg) = @_;
- if ($GCF2C //= eval {
+sub ibx_async_cat ($$$$) {
+ my ($ibx, $oid, $cb, $arg) = @_;
+ my $git = $ibx->git;
+ # {topdir} means ExtSearch (likely [extindex "all"]) with potentially
+ # 100K alternates. git(1) has a proposed patch for 100K alternates:
+ # <https://lore.kernel.org/git/20210624005806.12079-1-e@80x24.org/>
+ if (!defined($ibx->{topdir}) && ($GCF2C //= eval {
require PublicInbox::Gcf2Client;
PublicInbox::Gcf2Client::new();
- } // 0) { # 0: do not retry if libgit2 or Inline::C are missing
+ } // 0)) { # 0: do not retry if libgit2 or Inline::C are missing
$GCF2C->gcf2_async(\"$oid $git->{git_dir}\n", $cb, $arg);
\undef;
} else { # read-only end of git-cat-file pipe
@@ -66,9 +70,10 @@ sub git_async_cat ($$$$) {
# this is safe to call inside $cb, but not guaranteed to enqueue
# returns true if successful, undef if not.
-sub git_async_prefetch {
- my ($git, $oid, $cb, $arg) = @_;
- if ($GCF2C) {
+sub ibx_async_prefetch {
+ my ($ibx, $oid, $cb, $arg) = @_;
+ my $git = $ibx->git;
+ if (!defined($ibx->{topdir}) && $GCF2C) {
if (!$GCF2C->{wbuf}) {
$oid .= " $git->{git_dir}\n";
return $GCF2C->gcf2_async(\$oid, $cb, $arg); # true
diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm
index 48ed11a5..334d6581 100644
--- a/lib/PublicInbox/GzipFilter.pm
+++ b/lib/PublicInbox/GzipFilter.pm
@@ -180,8 +180,7 @@ sub async_blob_cb { # git->cat_async callback
sub smsg_blob {
my ($self, $smsg) = @_;
- git_async_cat($self->{ibx}->git, $smsg->{blob},
- \&async_blob_cb, $self);
+ ibx_async_cat($self->{ibx}, $smsg->{blob}, \&async_blob_cb, $self);
}
1;
diff --git a/lib/PublicInbox/HTTPD.pm b/lib/PublicInbox/HTTPD.pm
index b193c9ae..fb683f74 100644
--- a/lib/PublicInbox/HTTPD.pm
+++ b/lib/PublicInbox/HTTPD.pm
@@ -37,7 +37,7 @@ sub new {
# XXX unstable API!, only GitHTTPBackend needs
# this to limit git-http-backend(1) parallelism.
# We also check for the truthiness of this to
- # detect when to use git_async_cat for slow blobs
+ # detect when to use async paths for slow blobs
'pi-httpd.async' => \&pi_httpd_async
);
bless {
diff --git a/lib/PublicInbox/IMAP.pm b/lib/PublicInbox/IMAP.pm
index af8ce72b..9402aa41 100644
--- a/lib/PublicInbox/IMAP.pm
+++ b/lib/PublicInbox/IMAP.pm
@@ -612,7 +612,7 @@ sub fetch_run_ops {
$self->msg_more(")\r\n");
}
-sub fetch_blob_cb { # called by git->cat_async via git_async_cat
+sub fetch_blob_cb { # called by git->cat_async via ibx_async_cat
my ($bref, $oid, $type, $size, $fetch_arg) = @_;
my ($self, undef, $msgs, $range_info, $ops, $partial) = @$fetch_arg;
my $ibx = $self->{ibx} or return $self->close; # client disconnected
@@ -627,8 +627,8 @@ sub fetch_blob_cb { # called by git->cat_async via git_async_cat
}
my $pre;
if (!$self->{wbuf} && (my $nxt = $msgs->[0])) {
- $pre = git_async_prefetch($ibx->git, $nxt->{blob},
- \&fetch_blob_cb, $fetch_arg);
+ $pre = ibx_async_prefetch($ibx, $nxt->{blob},
+ \&fetch_blob_cb, $fetch_arg);
}
fetch_run_ops($self, $smsg, $bref, $ops, $partial);
$pre ? $self->zflush : requeue_once($self);
@@ -760,7 +760,7 @@ sub fetch_blob { # long_response
}
}
uo2m_extend($self, $msgs->[-1]->{num});
- git_async_cat($self->{ibx}->git, $msgs->[0]->{blob},
+ ibx_async_cat($self->{ibx}, $msgs->[0]->{blob},
\&fetch_blob_cb, \@_);
}
@@ -1228,7 +1228,7 @@ sub long_step {
} elsif ($more) { # $self->{wbuf}:
$self->update_idle_time;
- # control passed to git_async_cat if $more == \undef
+ # control passed to ibx_async_cat if $more == \undef
requeue_once($self) if !ref($more);
} else { # all done!
delete $self->{long_cb};
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index f7d99913..9df47133 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -515,7 +515,7 @@ found:
$smsg->{nntp_code} = $code;
set_art($self, $art);
# this dereferences to `undef'
- ${git_async_cat($ibx->git, $smsg->{blob}, \&blob_cb, $smsg)};
+ ${ibx_async_cat($ibx, $smsg->{blob}, \&blob_cb, $smsg)};
}
}
@@ -549,7 +549,7 @@ sub msg_hdr_write ($$) {
$smsg->{nntp}->msg_more($$hdr);
}
-sub blob_cb { # called by git->cat_async via git_async_cat
+sub blob_cb { # called by git->cat_async via ibx_async_cat
my ($bref, $oid, $type, $size, $smsg) = @_;
my $self = $smsg->{nntp};
my $code = $smsg->{nntp_code};
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index 92106e75..b0cd0f2c 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -593,8 +593,7 @@ sub resolve_patch ($$) {
if (my $msgs = $want->{try_smsgs}) {
my $smsg = shift @$msgs;
if ($self->{psgi_env}->{'pi-httpd.async'}) {
- return git_async_cat($want->{cur_ibx}->git,
- $smsg->{blob},
+ return ibx_async_cat($want->{cur_ibx}, $smsg->{blob},
\&extract_diff_async,
[$self, $want, $smsg]);
} else {
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2021-06-24 5:50 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-24 5:50 [PATCH] favor git(1) rather than libgit2 for ExtSearch Eric Wong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).