From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 5/5] git: parallelize manifest_entry
Date: Wed, 12 Apr 2023 00:13:02 +0000 [thread overview]
Message-ID: <20230412001302.685421-6-e@80x24.org> (raw)
In-Reply-To: <20230412001302.685421-1-e@80x24.org>
This saves a few milliseconds per-epoch without incurring
any dependencies on the event loop. It can be parallelized
further, of course, but it may not be worth it for -extindex
users since it's already cached.
---
lib/PublicInbox/Git.pm | 59 +++++++++++++++++++++---------------------
1 file changed, 29 insertions(+), 30 deletions(-)
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index fd5bbc6b..3108ed85 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -28,6 +28,10 @@ our $in_cleanup;
our $RDTIMEO = 60_000; # milliseconds
our $async_warn; # true in read-only daemons
+# committerdate:unix is git 2.9.4+ (2017-05-05), so using raw instead
+my @MODIFIED_DATE = qw[for-each-ref --sort=-committerdate
+ --format=%(committerdate:raw) --count=1];
+
# 512: POSIX PIPE_BUF minimum (see pipe(7))
# 3: @$inflight is flattened [ $OID, $cb, $arg ]
# 65: SHA-256 hex size + "\n" in preparation for git using non-SHA1
@@ -592,10 +596,8 @@ sub cat_async ($$$;$) {
# returns the modified time of a git repo, same as the "modified" field
# of a grokmirror manifest
-sub modified ($) {
- # committerdate:unix is git 2.9.4+ (2017-05-05), so using raw instead
- my $fh = popen($_[0], qw[for-each-ref --sort=-committerdate
- --format=%(committerdate:raw) --count=1]);
+sub modified ($;$) {
+ my $fh = $_[1] // popen($_[0], @MODIFIED_DATE);
(split(/ /, <$fh> // time))[0] + 0; # integerize for JSON
}
@@ -632,41 +634,38 @@ sub cloneurl {
# templates/this--description in git.git
sub manifest_entry {
my ($self, $epoch, $default_desc) = @_;
- my $fh = $self->popen('show-ref');
- my $dig = PublicInbox::SHA->new(1);
- while (read($fh, my $buf, 65536)) {
- $dig->add($buf);
- }
- close $fh or return; # empty, uninitialized git repo
- undef $fh; # for open, below
- my $git_dir = $self->{git_dir};
- my $ent = {
- fingerprint => $dig->hexdigest,
- reference => undef,
- modified => modified($self),
- };
- chomp(my $owner = $self->qx('config', 'gitweb.owner'));
- utf8::decode($owner);
- $ent->{owner} = $owner eq '' ? undef : $owner;
- my $desc = description($self);
- if (defined $epoch && index($desc, 'Unnamed repository') == 0) {
- $desc = "$default_desc [epoch $epoch]";
+ check_git_exe();
+ my $gd = $self->{git_dir};
+ my @git = ($GIT_EXE, "--git-dir=$gd");
+ my $sr = popen_rd([@git, 'show-ref']);
+ my $own = popen_rd([@git, qw(config gitweb.owner)]);
+ my $mod = popen_rd([@git, @MODIFIED_DATE]);
+ my $buf = description($self);
+ if (defined $epoch && index($buf, 'Unnamed repository') == 0) {
+ $buf = "$default_desc [epoch $epoch]";
}
- $ent->{description} = $desc;
- if (open($fh, '<', "$git_dir/objects/info/alternates")) {
+ my $ent = { description => $buf, reference => undef };
+ if (open(my $alt, '<', "$gd/objects/info/alternates")) {
# n.b.: GitPython doesn't seem to handle comments or C-quoted
# strings like native git does; and we don't for now, either.
local $/ = "\n";
- chomp(my @alt = <$fh>);
+ chomp(my @alt = <$alt>);
# grokmirror only supports 1 alternate for "reference",
if (scalar(@alt) == 1) {
- my $objdir = "$git_dir/objects";
- my $ref = File::Spec->rel2abs($alt[0], $objdir);
- $ref =~ s!/[^/]+/?\z!!; # basename
- $ent->{reference} = $ref;
+ $buf = File::Spec->rel2abs($alt[0], "$gd/objects");
+ $buf =~ s!/[^/]+/?\z!!; # basename
+ $ent->{reference} = $buf;
}
}
+ my $dig = PublicInbox::SHA->new(1);
+ while (read($sr, $buf, 65536)) { $dig->add($buf) }
+ close $sr or return; # empty, uninitialized git repo
+ $ent->{fingerprint} = $dig->hexdigest;
+ $ent->{modified} = modified(undef, $mod);
+ chomp($buf = <$own> // '');
+ utf8::decode($buf);
+ $ent->{owner} = $buf eq '' ? undef : $buf;
$ent;
}
prev parent reply other threads:[~2023-04-12 0:13 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-04-12 0:12 [PATCH 0/5] various cleanups and tweaks Eric Wong
2023-04-12 0:12 ` [PATCH 1/5] git: cat_async_step: reduce batch-command info checks Eric Wong
2023-04-12 0:12 ` [PATCH 2/5] gzip_filter: use carp in ->bail for failure checks Eric Wong
2023-04-12 0:13 ` [PATCH 3/5] git: rename version() to git_version() Eric Wong
2023-04-12 0:13 ` [PATCH 4/5] www_coderepo: drop unused $EACH_REF variable Eric Wong
2023-04-12 0:13 ` Eric Wong [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230412001302.685421-6-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).