From: Eric Wong <e@yhbt.net>
To: meta@public-inbox.org
Subject: [PATCH] git: various minor speedups
Date: Tue, 28 Apr 2020 08:48:58 +0000 [thread overview]
Message-ID: <20200428084858.16048-1-e@yhbt.net> (raw)
While testing performance improvements elsewhere, I noticed some
micro-optimizations could give a small ~2-3% speedup in my test
using the git async API to parse a large inbox.
The `read' perlfunc already has read-in-full behavior (unless
git is killed unexpectedly), so there's no point in using a
loop. SearchIdxShard in the parallel v2 indexing code path
never looped on `read', either.
Furthermore, we can avoid method dispatch overhead on ->getline
and ->print by using `readline' and `print' as ops which can be
resolved during the Perl compilation phase.
Finally, avoid passing the IO handle around as a parameter,
since avoiding hash lookups with a local variable has its own
costs in stack and refcount bumping.
Best off all, there's less code :>
---
lib/PublicInbox/Git.pm | 48 +++++++++++++++++-------------------------
1 file changed, 19 insertions(+), 29 deletions(-)
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 8410b2fc..f1911534 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -125,59 +125,49 @@ sub _bidi_pipe {
$self->{$in} = $in_r;
}
-sub read_cat_in_full ($$$) {
- my ($self, $in, $left) = @_;
- my $offset = 0;
- my $buf = '';
- while ($left > 0) {
- my $r = read($in, $buf, $left, $offset);
- defined($r) or fail($self, "read failed: $!");
- $r == 0 and fail($self, 'exited unexpectedly');
- $left -= $r;
- $offset += $r;
- }
- my $r = read($in, my $lf, 1);
- defined($r) or fail($self, "read failed: $!");
- fail($self, 'newline missing after blob') if ($r != 1 || $lf ne "\n");
+sub read_cat_in_full ($$) {
+ my ($self, $left) = @_;
+ ++$left; # for final "\n" added by git
+ my $r = read($self->{in}, my $buf, $left) == $left or
+ fail($self, 'short read');
+ chop($buf) eq "\n" or fail($self, 'newline missing after blob');
\$buf;
}
-sub _cat_async_step ($$$) {
- my ($self, $inflight, $in) = @_;
+sub _cat_async_step ($$) {
+ my ($self, $inflight) = @_;
my $pair = shift @$inflight or die 'BUG: inflight empty';
my ($cb, $arg) = @$pair;
local $/ = "\n";
- my $head = $in->getline;
+ my $head = readline($self->{in});
$head =~ / missing$/ and return
eval { $cb->(undef, undef, undef, undef, $arg) };
$head =~ /^([0-9a-f]{40}) (\S+) ([0-9]+)$/ or
fail($self, "Unexpected result from async git cat-file: $head");
my ($oid_hex, $type, $size) = ($1, $2, $3 + 0);
- my $bref = read_cat_in_full($self, $in, $size);
+ my $bref = read_cat_in_full($self, $size);
eval { $cb->($bref, $oid_hex, $type, $size, $arg) };
}
sub cat_async_wait ($) {
my ($self) = @_;
my $inflight = delete $self->{inflight} or return;
- my $in = $self->{in};
while (scalar(@$inflight)) {
- _cat_async_step($self, $inflight, $in);
+ _cat_async_step($self, $inflight);
}
}
sub cat_file {
my ($self, $obj, $ref) = @_;
- my ($retried, $in, $head);
+ my ($retried, $head);
cat_async_wait($self);
again:
batch_prepare($self);
- $self->{out}->print($obj, "\n") or fail($self, "write error: $!");
+ print { $self->{out} } $obj, "\n" or fail($self, "write error: $!");
- $in = $self->{in};
local $/ = "\n";
- $head = $in->getline;
+ $head = readline($self->{in});
if ($head =~ / missing$/) {
if (!$retried && alternates_changed($self)) {
$retried = 1;
@@ -191,7 +181,7 @@ again:
my $size = $1;
$$ref = $size if $ref;
- read_cat_in_full($self, $in, $size);
+ read_cat_in_full($self, $size);
}
sub batch_prepare ($) { _bidi_pipe($_[0], qw(--batch in out pid)) }
@@ -199,9 +189,9 @@ sub batch_prepare ($) { _bidi_pipe($_[0], qw(--batch in out pid)) }
sub check {
my ($self, $obj) = @_;
_bidi_pipe($self, qw(--batch-check in_c out_c pid_c err_c));
- $self->{out_c}->print($obj, "\n") or fail($self, "write error: $!");
+ print { $self->{out_c} } $obj, "\n" or fail($self, "write error: $!");
local $/ = "\n";
- chomp(my $line = $self->{in_c}->getline);
+ chomp(my $line = readline($self->{in_c}));
my ($hex, $type, $size) = split(' ', $line);
# Future versions of git.git may show 'ambiguous', but for now,
@@ -320,10 +310,10 @@ sub cat_async ($$$;$) {
my ($self, $oid, $cb, $arg) = @_;
my $inflight = $self->{inflight} or die 'BUG: not in async';
if (scalar(@$inflight) >= MAX_INFLIGHT) {
- _cat_async_step($self, $inflight, $self->{in});
+ _cat_async_step($self, $inflight);
}
- $self->{out}->print($oid, "\n") or fail($self, "write error: $!");
+ print { $self->{out} } $oid, "\n" or fail($self, "write error: $!");
push(@$inflight, [ $cb, $arg ]);
}
next reply other threads:[~2020-04-28 8:48 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-04-28 8:48 Eric Wong [this message]
2020-04-29 20:33 ` [PATCH] git: various minor speedups Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200428084858.16048-1-e@yhbt.net \
--to=e@yhbt.net \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).