unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 5/5] git: parallelize manifest_entry
Date: Wed, 12 Apr 2023 00:13:02 +0000	[thread overview]
Message-ID: <20230412001302.685421-6-e@80x24.org> (raw)
In-Reply-To: <20230412001302.685421-1-e@80x24.org>

This saves a few milliseconds per-epoch without incurring
any dependencies on the event loop.  It can be parallelized
further, of course, but it may not be worth it for -extindex
users since it's already cached.
---
 lib/PublicInbox/Git.pm | 59 +++++++++++++++++++++---------------------
 1 file changed, 29 insertions(+), 30 deletions(-)

diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index fd5bbc6b..3108ed85 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -28,6 +28,10 @@ our $in_cleanup;
 our $RDTIMEO = 60_000; # milliseconds
 our $async_warn; # true in read-only daemons
 
+# committerdate:unix is git 2.9.4+ (2017-05-05), so using raw instead
+my @MODIFIED_DATE = qw[for-each-ref --sort=-committerdate
+			--format=%(committerdate:raw) --count=1];
+
 # 512: POSIX PIPE_BUF minimum (see pipe(7))
 # 3: @$inflight is flattened [ $OID, $cb, $arg ]
 # 65: SHA-256 hex size + "\n" in preparation for git using non-SHA1
@@ -592,10 +596,8 @@ sub cat_async ($$$;$) {
 
 # returns the modified time of a git repo, same as the "modified" field
 # of a grokmirror manifest
-sub modified ($) {
-	# committerdate:unix is git 2.9.4+ (2017-05-05), so using raw instead
-	my $fh = popen($_[0], qw[for-each-ref --sort=-committerdate
-				--format=%(committerdate:raw) --count=1]);
+sub modified ($;$) {
+	my $fh = $_[1] // popen($_[0], @MODIFIED_DATE);
 	(split(/ /, <$fh> // time))[0] + 0; # integerize for JSON
 }
 
@@ -632,41 +634,38 @@ sub cloneurl {
 # templates/this--description in git.git
 sub manifest_entry {
 	my ($self, $epoch, $default_desc) = @_;
-	my $fh = $self->popen('show-ref');
-	my $dig = PublicInbox::SHA->new(1);
-	while (read($fh, my $buf, 65536)) {
-		$dig->add($buf);
-	}
-	close $fh or return; # empty, uninitialized git repo
-	undef $fh; # for open, below
-	my $git_dir = $self->{git_dir};
-	my $ent = {
-		fingerprint => $dig->hexdigest,
-		reference => undef,
-		modified => modified($self),
-	};
-	chomp(my $owner = $self->qx('config', 'gitweb.owner'));
-	utf8::decode($owner);
-	$ent->{owner} = $owner eq '' ? undef : $owner;
-	my $desc = description($self);
-	if (defined $epoch && index($desc, 'Unnamed repository') == 0) {
-		$desc = "$default_desc [epoch $epoch]";
+	check_git_exe();
+	my $gd = $self->{git_dir};
+	my @git = ($GIT_EXE, "--git-dir=$gd");
+	my $sr = popen_rd([@git, 'show-ref']);
+	my $own = popen_rd([@git, qw(config gitweb.owner)]);
+	my $mod = popen_rd([@git, @MODIFIED_DATE]);
+	my $buf = description($self);
+	if (defined $epoch && index($buf, 'Unnamed repository') == 0) {
+		$buf = "$default_desc [epoch $epoch]";
 	}
-	$ent->{description} = $desc;
-	if (open($fh, '<', "$git_dir/objects/info/alternates")) {
+	my $ent = { description => $buf, reference => undef };
+	if (open(my $alt, '<', "$gd/objects/info/alternates")) {
 		# n.b.: GitPython doesn't seem to handle comments or C-quoted
 		# strings like native git does; and we don't for now, either.
 		local $/ = "\n";
-		chomp(my @alt = <$fh>);
+		chomp(my @alt = <$alt>);
 
 		# grokmirror only supports 1 alternate for "reference",
 		if (scalar(@alt) == 1) {
-			my $objdir = "$git_dir/objects";
-			my $ref = File::Spec->rel2abs($alt[0], $objdir);
-			$ref =~ s!/[^/]+/?\z!!; # basename
-			$ent->{reference} = $ref;
+			$buf = File::Spec->rel2abs($alt[0], "$gd/objects");
+			$buf =~ s!/[^/]+/?\z!!; # basename
+			$ent->{reference} = $buf;
 		}
 	}
+	my $dig = PublicInbox::SHA->new(1);
+	while (read($sr, $buf, 65536)) { $dig->add($buf) }
+	close $sr or return; # empty, uninitialized git repo
+	$ent->{fingerprint} = $dig->hexdigest;
+	$ent->{modified} = modified(undef, $mod);
+	chomp($buf = <$own> // '');
+	utf8::decode($buf);
+	$ent->{owner} = $buf eq '' ? undef : $buf;
 	$ent;
 }
 

      parent reply	other threads:[~2023-04-12  0:13 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-12  0:12 [PATCH 0/5] various cleanups and tweaks Eric Wong
2023-04-12  0:12 ` [PATCH 1/5] git: cat_async_step: reduce batch-command info checks Eric Wong
2023-04-12  0:12 ` [PATCH 2/5] gzip_filter: use carp in ->bail for failure checks Eric Wong
2023-04-12  0:13 ` [PATCH 3/5] git: rename version() to git_version() Eric Wong
2023-04-12  0:13 ` [PATCH 4/5] www_coderepo: drop unused $EACH_REF variable Eric Wong
2023-04-12  0:13 ` Eric Wong [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230412001302.685421-6-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).