From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id F409E1F4B4; Thu, 10 Sep 2020 01:51:53 +0000 (UTC) Date: Thu, 10 Sep 2020 01:51:53 +0000 From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 12/11] solver: async blob retrieval for diff extraction Message-ID: <20200910015153.GA8922@dcvr> References: <20200909062618.5940-1-e@80x24.org> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline In-Reply-To: <20200909062618.5940-1-e@80x24.org> List-Id: Like the rest of the WWW code, public-inbox-httpd now uses git_async_cat to retrieve blobs without blocking the event loop. This improves fairness when git blobs are on slow storage and allows us to take better advantage of SMP systems. --- lib/PublicInbox/SolverGit.pm | 85 +++++++++++++++++++++++------------- 1 file changed, 54 insertions(+), 31 deletions(-) diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm index c54d6d54..ae3997ca 100644 --- a/lib/PublicInbox/SolverGit.pm +++ b/lib/PublicInbox/SolverGit.pm @@ -16,6 +16,8 @@ use PublicInbox::Git qw(git_unquote git_quote); use PublicInbox::MsgIter qw(msg_part_text); use PublicInbox::Qspawn; use PublicInbox::Tmpfile; +use PublicInbox::GitAsyncCat; +use PublicInbox::Eml; use URI::Escape qw(uri_escape_utf8); # POSIX requires _POSIX_ARG_MAX >= 4096, and xargs is required to @@ -540,6 +542,47 @@ sub try_harder { die "E: $@" if $@; } +sub extract_diffs_done { + my ($self, $want) = @_; + + delete $want->{try_smsgs}; + delete $want->{cur_ibx}; + + my $diffs = delete $self->{tmp_diffs}; + if (scalar @$diffs) { + unshift @{$self->{patches}}, @$diffs; + dbg($self, "found $want->{oid_b} in " . join(" ||\n\t", + map { di_url($self, $_) } @$diffs)); + + # good, we can find a path to the oid we $want, now + # lets see if we need to apply more patches: + my $di = $diffs->[0]; + my $src = $di->{oid_a}; + + unless ($src =~ /\A0+\z/) { + # we have to solve it using another oid, fine: + my $job = { oid_b => $src, path_b => $di->{path_a} }; + push @{$self->{todo}}, $job; + } + goto \&next_step; # onto the next todo item + } + goto \&try_harder; +} + +sub extract_diff_async { + my ($bref, $oid, $type, $size, $x) = @_; + my ($self, $want, $smsg) = @$x; + if (defined($oid)) { + $smsg->{blob} eq $oid or + ERR($self, "BUG: $smsg->{blob} != $oid"); + PublicInbox::Eml->new($bref)->each_part(\&extract_diff, $x, 1); + } + + scalar(@{$want->{try_smsgs}}) ? + retry_current($self, $want) : + extract_diffs_done($self, $want); +} + sub resolve_patch ($$) { my ($self, $want) = @_; @@ -550,39 +593,19 @@ sub resolve_patch ($$) { if (my $msgs = $want->{try_smsgs}) { my $smsg = shift @$msgs; - if (my $eml = $want->{cur_ibx}->smsg_eml($smsg)) { - $eml->each_part(\&extract_diff, - [ $self, $want, $smsg ], 1); - } - - # try the remaining smsgs later - goto \&retry_current if scalar @$msgs; - - delete $want->{try_smsgs}; - delete $want->{cur_ibx}; - - my $diffs = delete $self->{tmp_diffs}; - if (scalar @$diffs) { - unshift @{$self->{patches}}, @$diffs; - dbg($self, "found $cur_want in " . join(" ||\n\t", - map { di_url($self, $_) } @$diffs)); - - # good, we can find a path to the oid we $want, now - # lets see if we need to apply more patches: - my $di = $diffs->[0]; - my $src = $di->{oid_a}; - - unless ($src =~ /\A0+\z/) { - # we have to solve it using another oid, fine: - my $job = { - oid_b => $src, - path_b => $di->{path_a}, - }; - push @{$self->{todo}}, $job; + if ($self->{psgi_env}->{'pi-httpd.async'}) { + return git_async_cat($want->{cur_ibx}->git, + $smsg->{blob}, + \&extract_diff_async, + [$self, $want, $smsg]); + } else { + if (my $eml = $want->{cur_ibx}->smsg_eml($smsg)) { + $eml->each_part(\&extract_diff, + [ $self, $want, $smsg ], 1); } - goto \&next_step; # onto the next todo item } - goto \&try_harder; + + goto(scalar @$msgs ? \&retry_current : \&extract_diffs_done); } # see if we can find the blob in an existing git repo: