unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 1/2] viewvcs: parallelize commit display
Date: Mon, 12 Feb 2024 13:13:49 +0000	[thread overview]
Message-ID: <20240212131350.3443394-2-e@80x24.org> (raw)
In-Reply-To: <20240212131350.3443394-1-e@80x24.org>

Similar to commit cbe2548c91859dfb923548ea85d8531b90d53dc3
(www_coderepo: use OnDestroy to render summary view,
2023-04-09), we can rely on OnDestroy and Qspawn to run
dependencies in a structured way and with some extra parallelism
for SMP users.

Perl (as opposed to POSIX sh) allows us to easily avoid
expensive patch generation for large root commits, and also avoid
needless `git patch-id' invocations for patches which are too
big to show.

Avoiding patch-id alone saved nearly 2s from the linux.git root
commit[1] with patch generation enabled and brought response
times down to ~6s (still slow).  Avoiding patch generation for
root commits (like cgit also does) brings it down to a few
hundred milliseconds on a public-facing server.

[1] torvalds/linux.git 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2
---
 lib/PublicInbox/ViewVCS.pm | 100 +++++++++++++++++++++++--------------
 1 file changed, 63 insertions(+), 37 deletions(-)

diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index 3d835289..2a305303 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -28,7 +28,8 @@ use PublicInbox::Eml;
 use Text::Wrap qw(wrap);
 use PublicInbox::Hval qw(ascii_html to_filename prurl utf8_maybe);
 use POSIX qw(strftime);
-use autodie qw(open);
+use autodie qw(open seek truncate);
+use Fcntl qw(SEEK_SET);
 my $hl = eval {
 	require PublicInbox::HlMod;
 	PublicInbox::HlMod->new;
@@ -59,7 +60,7 @@ sub html_page ($$;@) {
 sub dbg_log ($) {
 	my ($ctx) = @_;
 	my $log = delete $ctx->{lh} // die 'BUG: already captured debug log';
-	if (!seek($log, 0, 0)) {
+	if (!CORE::seek($log, 0, SEEK_SET)) {
 		warn "seek(log): $!";
 		return '<pre>debug log seek error</pre>';
 	}
@@ -119,17 +120,18 @@ sub show_other_result ($$) { # future-proofing
 }
 
 sub cmt_title { # git->cat_async callback
-	my ($bref, $oid, $type, $size, $ctx) = @_;
+	my ($bref, $oid, $type, $size, $ctx_cb) = @_;
 	utf8_maybe($$bref);
 	my $title = $$bref =~ /\r?\n\r?\n([^\r\n]+)\r?\n?/ ? $1 : '';
-	push(@{$ctx->{-cmt_pt}} , ascii_html($title)) == @{$ctx->{-cmt_P}} and
-		cmt_finalize($ctx);
+	# $ctx_cb is [ $ctx, $cmt_fin ]
+	push @{$ctx_cb->[0]->{-cmt_pt}}, ascii_html($title);
 }
 
 sub do_cat_async {
-	my ($ctx, $cb, @req) = @_;
+	my ($arg, $cb, @req) = @_;
 	# favor git(1) over Gcf2 (libgit2) for SHA-256 support
-	$ctx->{git}->cat_async($_, $cb, $ctx) for @req;
+	my $ctx = ref $arg eq 'ARRAY' ? $arg->[0] : $arg;
+	$ctx->{git}->cat_async($_, $cb, $arg) for @req;
 	if ($ctx->{env}->{'pi-httpd.async'}) {
 		$ctx->{git}->watch_async;
 	} else { # synchronous, generic PSGI
@@ -147,24 +149,44 @@ sub do_check_async {
 	}
 }
 
-sub show_commit_start { # ->psgi_qx callback
-	my ($bref, $ctx) = @_;
-	if (my $qsp_err = delete $ctx->{-qsp_err}) {
-		return html_page($ctx, 500, dbg_log($ctx) .
-				"git show/patch-id error:$qsp_err");
-	}
-	my $patchid = (split(/ /, $$bref))[0]; # ignore commit
-	$ctx->{-q_value_html} = "patchid:$patchid" if defined $patchid;
-	open my $fh, '<', "$ctx->{-tmp}/h";
-	chop(my $buf = do { local $/ = "\0"; <$fh> });
+sub cmt_hdr_prep { # psgi_qx cb
+	my ($fh, $ctx, $cmt_fin) = @_;
+	return if $ctx->{-qsp_err_h}; # let cmt_fin handle it
+	seek $fh, 0, SEEK_SET;
+	my $buf = do { local $/ = "\0"; <$fh> } // die "readline: $!";
+	chop($buf) eq "\0" or die 'no NUL in git show -z output';
 	utf8_maybe($buf); # non-UTF-8 commits exist
 	chomp $buf;
-	my ($P, $p);
-	($P, $p, @{$ctx->{cmt_info}}) = split(/\n/, $buf, 9);
-	return cmt_finalize($ctx) if !$P;
-	@{$ctx->{-cmt_P}} = split(/ /, $P);
-	@{$ctx->{-cmt_p}} = split(/ /, $p); # abbreviated
-	do_cat_async($ctx, \&cmt_title, @{$ctx->{-cmt_P}});
+	(my $P, my $p, @{$ctx->{cmt_info}}) = split(/\n/, $buf, 9);
+	truncate $fh, 0;
+	return unless $P;
+	seek $fh, 0, SEEK_SET;
+	my $qsp_p = PublicInbox::Qspawn->new($ctx->{git}->cmd(qw(show
+		--encoding=UTF-8 --pretty=format:%n -M --stat -p), $ctx->{oid}),
+		undef, { 1 => $fh });
+	$qsp_p->{qsp_err} = \($ctx->{-qsp_err_p} = '');
+	$qsp_p->psgi_qx($ctx->{env}, undef, \&cmt_patch_prep, $ctx, $cmt_fin);
+	@{$ctx->{-cmt_P}} = split / /, $P;
+	@{$ctx->{-cmt_p}} = split / /, $p; # abbreviated
+	do_cat_async([$ctx, $cmt_fin], \&cmt_title, @{$ctx->{-cmt_P}});
+}
+
+sub read_patchid { # psgi_qx cb
+	my ($bref, $ctx, $cmt_fin) = @_;
+	my ($patchid) = split(/ /, $$bref); # ignore commit
+	$ctx->{-q_value_html} = "patchid:$patchid" if defined $patchid;
+}
+
+sub cmt_patch_prep { # psgi_qx cb
+	my ($fh, $ctx, $cmt_fin) = @_;
+	return if $ctx->{-qsp_err_p}; # let cmt_fin handle error
+	return if -s $fh > $MAX_SIZE; # too big to show, too big to patch-id
+	seek $fh, 0, SEEK_SET;
+	my $qsp = PublicInbox::Qspawn->new(
+				$ctx->{git}->cmd(qw(patch-id --stable)),
+				undef, { 0 => $fh });
+	$qsp->{qsp_err} = \$ctx->{-qsp_err_p};
+	$qsp->psgi_qx($ctx->{env}, undef, \&read_patchid, $ctx, $cmt_fin);
 }
 
 sub ibx_url_for {
@@ -194,8 +216,14 @@ sub ibx_url_for {
 	wantarray ? (@ret) : $ret[0];
 }
 
-sub cmt_finalize {
+sub cmt_fin { # OnDestroy cb
 	my ($ctx) = @_;
+	my ($eh, $ep) = delete @$ctx{qw(-qsp_err_h -qsp_err_p)};
+	if ($eh || $ep) {
+		my $e = join(' - ', grep defined, $eh, $ep);
+		return html_page($ctx, 500, dbg_log($ctx) .
+				"git show/patch-id error:$e");
+	}
 	$ctx->{-linkify} //= PublicInbox::Linkify->new;
 	my $upfx = $ctx->{-upfx} = '../../'; # from "/$INBOX/$OID/s/"
 	my ($H, $T, $s, $f, $au, $co, $bdy) = @{delete $ctx->{cmt_info}};
@@ -243,11 +271,12 @@ committer $co
 <b>$s</b>
 EOM
 	print $zfh "\n", $ctx->{-linkify}->to_html($bdy) if length($bdy);
-	$bdy = '';
-	open my $fh, '<', "$ctx->{-tmp}/p";
+	undef $bdy; # free memory
+	my $fh = delete $ctx->{patch_fh};
 	if (-s $fh > $MAX_SIZE) {
 		print $zfh "---\n patch is too large to show\n";
 	} else { # prepare flush_diff:
+		seek $fh, 0, SEEK_SET;
 		PublicInbox::IO::read_all $fh, -s _, \$x;
 		utf8_maybe($x);
 		$ctx->{-apfx} = $ctx->{-spfx} = $upfx;
@@ -350,18 +379,15 @@ sub show_commit ($$) {
 	# patch-id needs two passes, and we use the initial show to ensure
 	# a patch embedded inside the commit message body doesn't get fed
 	# to patch-id:
-	my $cmd = [ '/bin/sh', '-c',
-		"git show --encoding=UTF-8 '$SHOW_FMT'".
-		" -z --no-notes --no-patch $oid >h && ".
-		'git show --encoding=UTF-8 --pretty=format:%n -M'.
-		" --stat -p $oid >p && ".
-		"git patch-id --stable <p" ];
-	my $e = { GIT_DIR => $git->{git_dir} };
-	my $qsp = PublicInbox::Qspawn->new($cmd, $e, { -C => "$ctx->{-tmp}" });
-	$qsp->{qsp_err} = \($ctx->{-qsp_err} = '');
-	$ctx->{env}->{'qspawn.wcb'} = $ctx->{-wcb};
+	open $ctx->{patch_fh}, '+>', "$ctx->{-tmp}/show";
+	my $qsp_h = PublicInbox::Qspawn->new($git->cmd('show', $SHOW_FMT,
+			qw(--encoding=UTF-8 -z --no-notes --no-patch), $oid),
+			undef, { 1 => $ctx->{patch_fh} });
+	$qsp_h->{qsp_err} = \($ctx->{-qsp_err_h} = '');
+	my $cmt_fin = PublicInbox::OnDestroy->new($$, \&cmt_fin, $ctx);
 	$ctx->{git} = $git;
-	$qsp->psgi_qx($ctx->{env}, undef, \&show_commit_start, $ctx);
+	$ctx->{oid} = $oid;
+	$qsp_h->psgi_qx($ctx->{env}, undef, \&cmt_hdr_prep, $ctx, $cmt_fin);
 }
 
 sub show_other ($$) { # just in case...

  reply	other threads:[~2024-02-12 13:13 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-12 13:13 [PATCH 0/2] viewvcs improvements Eric Wong
2024-02-12 13:13 ` Eric Wong [this message]
2024-02-12 19:58   ` [PATCH 1/2] viewvcs: parallelize commit display Eric Wong
2024-02-12 13:13 ` [PATCH 2/2] viewvcs: HTML fixes for commits Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240212131350.3443394-2-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).