From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id A90E31F489 for ; Wed, 12 Apr 2023 00:13:03 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1681258383; bh=Tuyj7DiEOrCki5s88+NTJWdIZz82NZOUlqHp0FRItjI=; h=From:To:Subject:Date:In-Reply-To:References:From; b=iZbV3JtfKIPazk6hD90fN85nw5emwY0VbXXu3GCx+9+IwMvBQLo7zG8VuIFRP93lu y+VT/RKPwNwFDbSN1Yr2/mjOL/kPg+tG0R4MEk4eDs+Od0R3NY231nUoAKVdeXvLDE I0Ov2EGiYP4E4qNeMT7itm/WPlojnrCD1BwDAlbc= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 5/5] git: parallelize manifest_entry Date: Wed, 12 Apr 2023 00:13:02 +0000 Message-Id: <20230412001302.685421-6-e@80x24.org> In-Reply-To: <20230412001302.685421-1-e@80x24.org> References: <20230412001302.685421-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This saves a few milliseconds per-epoch without incurring any dependencies on the event loop. It can be parallelized further, of course, but it may not be worth it for -extindex users since it's already cached. --- lib/PublicInbox/Git.pm | 59 +++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index fd5bbc6b..3108ed85 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -28,6 +28,10 @@ our $in_cleanup; our $RDTIMEO = 60_000; # milliseconds our $async_warn; # true in read-only daemons +# committerdate:unix is git 2.9.4+ (2017-05-05), so using raw instead +my @MODIFIED_DATE = qw[for-each-ref --sort=-committerdate + --format=%(committerdate:raw) --count=1]; + # 512: POSIX PIPE_BUF minimum (see pipe(7)) # 3: @$inflight is flattened [ $OID, $cb, $arg ] # 65: SHA-256 hex size + "\n" in preparation for git using non-SHA1 @@ -592,10 +596,8 @@ sub cat_async ($$$;$) { # returns the modified time of a git repo, same as the "modified" field # of a grokmirror manifest -sub modified ($) { - # committerdate:unix is git 2.9.4+ (2017-05-05), so using raw instead - my $fh = popen($_[0], qw[for-each-ref --sort=-committerdate - --format=%(committerdate:raw) --count=1]); +sub modified ($;$) { + my $fh = $_[1] // popen($_[0], @MODIFIED_DATE); (split(/ /, <$fh> // time))[0] + 0; # integerize for JSON } @@ -632,41 +634,38 @@ sub cloneurl { # templates/this--description in git.git sub manifest_entry { my ($self, $epoch, $default_desc) = @_; - my $fh = $self->popen('show-ref'); - my $dig = PublicInbox::SHA->new(1); - while (read($fh, my $buf, 65536)) { - $dig->add($buf); - } - close $fh or return; # empty, uninitialized git repo - undef $fh; # for open, below - my $git_dir = $self->{git_dir}; - my $ent = { - fingerprint => $dig->hexdigest, - reference => undef, - modified => modified($self), - }; - chomp(my $owner = $self->qx('config', 'gitweb.owner')); - utf8::decode($owner); - $ent->{owner} = $owner eq '' ? undef : $owner; - my $desc = description($self); - if (defined $epoch && index($desc, 'Unnamed repository') == 0) { - $desc = "$default_desc [epoch $epoch]"; + check_git_exe(); + my $gd = $self->{git_dir}; + my @git = ($GIT_EXE, "--git-dir=$gd"); + my $sr = popen_rd([@git, 'show-ref']); + my $own = popen_rd([@git, qw(config gitweb.owner)]); + my $mod = popen_rd([@git, @MODIFIED_DATE]); + my $buf = description($self); + if (defined $epoch && index($buf, 'Unnamed repository') == 0) { + $buf = "$default_desc [epoch $epoch]"; } - $ent->{description} = $desc; - if (open($fh, '<', "$git_dir/objects/info/alternates")) { + my $ent = { description => $buf, reference => undef }; + if (open(my $alt, '<', "$gd/objects/info/alternates")) { # n.b.: GitPython doesn't seem to handle comments or C-quoted # strings like native git does; and we don't for now, either. local $/ = "\n"; - chomp(my @alt = <$fh>); + chomp(my @alt = <$alt>); # grokmirror only supports 1 alternate for "reference", if (scalar(@alt) == 1) { - my $objdir = "$git_dir/objects"; - my $ref = File::Spec->rel2abs($alt[0], $objdir); - $ref =~ s!/[^/]+/?\z!!; # basename - $ent->{reference} = $ref; + $buf = File::Spec->rel2abs($alt[0], "$gd/objects"); + $buf =~ s!/[^/]+/?\z!!; # basename + $ent->{reference} = $buf; } } + my $dig = PublicInbox::SHA->new(1); + while (read($sr, $buf, 65536)) { $dig->add($buf) } + close $sr or return; # empty, uninitialized git repo + $ent->{fingerprint} = $dig->hexdigest; + $ent->{modified} = modified(undef, $mod); + chomp($buf = <$own> // ''); + utf8::decode($buf); + $ent->{owner} = $buf eq '' ? undef : $buf; $ent; }