A couple more things to mitigate the effects of slow storage with many inboxes. Mostly solver-related, and still more to come... (Hoping the electrical grid stays up and dust bunny removal solved overheating problems). Eric Wong (11): xt/solver: test with public-inbox-httpd, too solver: drop warnings, modernize use v5.10.1, use SEEK_SET use "\&" where possible when referring to subroutines www: manifest.js.gz generation no longer hogs event loop config: flatten each_inbox and iterate_start args config: split out iterator into separate object t/cgi.t: show stderr on failures extmsg: prevent cross-inbox matches from hogging event loop wwwlisting: avoid hogging event loop solver: check one git coderepo and inbox at a time solver: break apart inbox blob retrieval MANIFEST | 2 + lib/PublicInbox/Cgit.pm | 5 +- lib/PublicInbox/Config.pm | 22 +-- lib/PublicInbox/ConfigIter.pm | 40 +++++ lib/PublicInbox/ExtMsg.pm | 102 ++++++++---- lib/PublicInbox/IMAPD.pm | 6 +- lib/PublicInbox/Inbox.pm | 2 +- lib/PublicInbox/ManifestJsGz.pm | 135 ++++++++++++++++ lib/PublicInbox/SolverGit.pm | 190 +++++++++++++--------- lib/PublicInbox/TestCommon.pm | 4 +- lib/PublicInbox/WWW.pm | 21 +-- lib/PublicInbox/Watch.pm | 13 +- lib/PublicInbox/WwwListing.pm | 279 ++++++++------------------------ t/cgi.t | 2 +- t/replace.t | 8 +- t/solver_git.t | 7 +- t/www_listing.t | 7 +- xt/msgtime_cmp.t | 2 +- xt/solver.t | 31 +++- 19 files changed, 499 insertions(+), 379 deletions(-) create mode 100644 lib/PublicInbox/ConfigIter.pm create mode 100644 lib/PublicInbox/ManifestJsGz.pm
We'll be making changes to solver to make it even fairer to slow clients on slow storage. Ensure we test with public-inbox-httpd-specific codepaths, since the generic PSGI code paths are rare in production use. --- xt/solver.t | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/xt/solver.t b/xt/solver.t index d2206b28..99fca0d3 100644 --- a/xt/solver.t +++ b/xt/solver.t @@ -33,11 +33,11 @@ my $todo = { ], }; -my ($ibx, $urls); +my ($ibx_name, $urls, @gone); my $client = sub { my ($cb) = @_; for (@$urls) { - my $url = "/$ibx/$_"; + my $url = "/$ibx_name/$_"; my $res = $cb->(GET($url)); is($res->code, 200, $url); next if $res->code == 200; @@ -46,14 +46,33 @@ my $client = sub { } }; -while (($ibx, $urls) = each %$todo) { +my $nr = 0; +while (($ibx_name, $urls) = each %$todo) { SKIP: { - if (!$cfg->lookup_name($ibx)) { - skip("$ibx not configured", scalar(@$urls)); + if (!$cfg->lookup_name($ibx_name)) { + push @gone, $ibx_name; + skip("$ibx_name not configured", scalar(@$urls)); } test_psgi($app, $client); + $nr++; + } +} + +SKIP: { + require_mods(qw(Plack::Test::ExternalServer), $nr); + delete @$todo{@gone}; + + my $sock = tcp_server() or BAIL_OUT $!; + my ($tmpdir, $for_destroy) = tmpdir(); + my ($out, $err) = map { "$tmpdir/std$_.log" } qw(out err); + my $cmd = [ qw(-httpd -W0), "--stdout=$out", "--stderr=$err" ]; + my $td = start_script($cmd, undef, { 3 => $sock }); + my ($h, $p) = ($sock->sockhost, $sock->sockport); + + local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = "http://$h:$p"; + while (($ibx_name, $urls) = each %$todo) { + Plack::Test::ExternalServer::test_psgi(client => $client); } } done_testing(); -1;
With Perl upstream preparing to deprecate things, we'll move towards only enabling warnings during development via shebang and stop enabling them via "use". We'll also favor "use v5.10.1" over the Perl 5.6-compatible "use 5.010_001", since our code base never worked on 5.6. Finally, were also importing SEEK_SET without using it, just use it for readability since we can't avoid loading Fcntl in other places and it'll get constant-folded, anyways. --- lib/PublicInbox/SolverGit.pm | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm index ff8a4946..dd95f400 100644 --- a/lib/PublicInbox/SolverGit.pm +++ b/lib/PublicInbox/SolverGit.pm @@ -9,8 +9,7 @@ # local filesystem layouts in the process. package PublicInbox::SolverGit; use strict; -use warnings; -use 5.010_001; +use v5.10.1; use File::Temp 0.19 (); # 0.19 for ->newdir use Fcntl qw(SEEK_SET); use PublicInbox::Git qw(git_unquote git_quote); @@ -270,7 +269,7 @@ sub prepare_index ($) { my $in = tmpfile("update-index.$oid_full") or die "tmpfile: $!"; print $in "$mode_a $oid_full\t$path_a\0" or die "print: $!"; $in->flush or die "flush: $!"; - sysseek($in, 0, 0) or die "seek: $!"; + sysseek($in, 0, SEEK_SET) or die "seek: $!"; dbg($self, 'preparing index'); my $rdr = { 0 => $in };
"*foo" is ambiguous in that it may refer to a bareword file handle; so we'll use it where we can without triggering warnings. PublicInbox::TestCommon::run_script_exit required dropping the prototype, however. We'll also future-proof by dropping "use warnings" in Cgit.pm and use the less-ambiguous "//=" in Inbox.pm while we're in the area. --- lib/PublicInbox/Cgit.pm | 5 ++--- lib/PublicInbox/Inbox.pm | 2 +- lib/PublicInbox/TestCommon.pm | 4 ++-- lib/PublicInbox/WwwListing.pm | 6 +++--- t/replace.t | 8 ++++---- t/solver_git.t | 2 +- xt/msgtime_cmp.t | 2 +- 7 files changed, 14 insertions(+), 15 deletions(-) diff --git a/lib/PublicInbox/Cgit.pm b/lib/PublicInbox/Cgit.pm index 9a51b451..fb0d0e60 100644 --- a/lib/PublicInbox/Cgit.pm +++ b/lib/PublicInbox/Cgit.pm @@ -10,9 +10,8 @@ use strict; use PublicInbox::GitHTTPBackend; use PublicInbox::Git; # not bothering with Exporter for a one-off -*input_prepare = *PublicInbox::GitHTTPBackend::input_prepare; -*serve = *PublicInbox::GitHTTPBackend::serve; -use warnings; +*input_prepare = \&PublicInbox::GitHTTPBackend::input_prepare; +*serve = \&PublicInbox::GitHTTPBackend::serve; use PublicInbox::Qspawn; use PublicInbox::WwwStatic qw(r); diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index 3b5ac970..b0894a7d 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -70,7 +70,7 @@ sub _cleanup_later ($) { my ($self) = @_; $cleanup_avail = cleanup_possible() if $cleanup_avail < 0; return if $cleanup_avail != 1; - $cleanup_timer ||= PublicInbox::DS::later(*cleanup_task); + $cleanup_timer //= PublicInbox::DS::later(\&cleanup_task); $CLEANUP->{"$self"} = $self; } diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm index b03e93e0..42819179 100644 --- a/lib/PublicInbox/TestCommon.pm +++ b/lib/PublicInbox/TestCommon.pm @@ -158,7 +158,7 @@ sub _undo_redirects ($) { # The default is 2. our $run_script_exit_code; sub RUN_SCRIPT_EXIT () { "RUN_SCRIPT_EXIT\n" }; -sub run_script_exit (;$) { +sub run_script_exit { $run_script_exit_code = $_[0] // 0; die RUN_SCRIPT_EXIT; } @@ -180,7 +180,7 @@ package $pkg; use strict; use subs qw(exit); -*exit = *PublicInbox::TestCommon::run_script_exit; +*exit = \\&PublicInbox::TestCommon::run_script_exit; sub main { # the below "line" directive is a magic comment, see perlsyn(1) manpage # line 1 "$f" diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm index 365743cf..0be3764c 100644 --- a/lib/PublicInbox/WwwListing.pm +++ b/lib/PublicInbox/WwwListing.pm @@ -60,9 +60,9 @@ sub list_404 ($$) { [] } # TODO: +cgit my %VALID = ( - all => *list_all, - 'match=domain' => *list_match_domain, - 404 => *list_404, + all => \&list_all, + 'match=domain' => \&list_match_domain, + 404 => \&list_404, ); sub set_cb ($$$) { diff --git a/t/replace.t b/t/replace.t index a1e2d63b..95241adf 100644 --- a/t/replace.t +++ b/t/replace.t @@ -179,10 +179,10 @@ EOF } } -my $opt = { pre => *pad_msgs }; +my $opt = { pre => \&pad_msgs }; test_replace(2, 'basic', {}); test_replace(2, 'basic', $opt); -test_replace(2, 'basic', $opt = { %$opt, post => *pad_msgs }); +test_replace(2, 'basic', $opt = { %$opt, post => \&pad_msgs }); test_replace(2, 'basic', $opt = { %$opt, rotate_bytes => 1 }); SKIP: { @@ -190,9 +190,9 @@ SKIP: { PublicInbox::Search::load_xapian() or skip 'Search::Xapian missing', 8; for my $l (qw(medium)) { test_replace(2, $l, {}); - $opt = { pre => *pad_msgs }; + $opt = { pre => \&pad_msgs }; test_replace(2, $l, $opt); - test_replace(2, $l, $opt = { %$opt, post => *pad_msgs }); + test_replace(2, $l, $opt = { %$opt, post => \&pad_msgs }); test_replace(2, $l, $opt = { %$opt, rotate_bytes => 1 }); } }; diff --git a/t/solver_git.t b/t/solver_git.t index 78cc0edd..c162b605 100644 --- a/t/solver_git.t +++ b/t/solver_git.t @@ -41,7 +41,7 @@ $ibx->{-repo_objs} = [ $git ]; my $res; my $solver = PublicInbox::SolverGit->new($ibx, sub { $res = $_[0] }); open my $log, '+>>', "$inboxdir/solve.log" or die "open: $!"; -my $psgi_env = { 'psgi.errors' => *STDERR, 'psgi.url_scheme' => 'http', +my $psgi_env = { 'psgi.errors' => \*STDERR, 'psgi.url_scheme' => 'http', 'HTTP_HOST' => 'example.com' }; $solver->solve($psgi_env, $log, '69df7d5', {}); ok($res, 'solved a blob!'); diff --git a/xt/msgtime_cmp.t b/xt/msgtime_cmp.t index 0ce3c042..aa96be4d 100644 --- a/xt/msgtime_cmp.t +++ b/xt/msgtime_cmp.t @@ -62,7 +62,7 @@ my $fh = $git->popen(@cat); while (<$fh>) { my ($oid, $type) = split / /; next if $type ne 'blob'; - $git->cat_async($oid, *compare); + $git->cat_async($oid, \&compare); } $git->cat_async_wait; ok(1);
It's still as slow as before with hundreds/thousands of inboxes, but at least it's fair. Future changes will allow it to be cached and memoized with persistent HTTP servers. --- MANIFEST | 1 + lib/PublicInbox/ManifestJsGz.pm | 153 ++++++++++++++++++++++++++++++++ lib/PublicInbox/WWW.pm | 4 +- lib/PublicInbox/WwwListing.pm | 117 +----------------------- t/www_listing.t | 7 +- 5 files changed, 162 insertions(+), 120 deletions(-) create mode 100644 lib/PublicInbox/ManifestJsGz.pm diff --git a/MANIFEST b/MANIFEST index 44670c7e..0e225b6a 100644 --- a/MANIFEST +++ b/MANIFEST @@ -157,6 +157,7 @@ lib/PublicInbox/Lock.pm lib/PublicInbox/MDA.pm lib/PublicInbox/MID.pm lib/PublicInbox/MIME.pm +lib/PublicInbox/ManifestJsGz.pm lib/PublicInbox/Mbox.pm lib/PublicInbox/MboxGz.pm lib/PublicInbox/MsgIter.pm diff --git a/lib/PublicInbox/ManifestJsGz.pm b/lib/PublicInbox/ManifestJsGz.pm new file mode 100644 index 00000000..328303ce --- /dev/null +++ b/lib/PublicInbox/ManifestJsGz.pm @@ -0,0 +1,153 @@ +# Copyright (C) 2020 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +package PublicInbox::ManifestJsGz; +use strict; +use v5.10.1; +use Digest::SHA (); +use File::Spec (); +use bytes (); # length +use PublicInbox::Inbox; +use PublicInbox::Git; +use IO::Compress::Gzip qw(gzip); +use HTTP::Date qw(time2str); +*try_cat = \&PublicInbox::Inbox::try_cat; + +our $json; +for my $mod (qw(JSON::MaybeXS JSON JSON::PP)) { + eval "require $mod" or next; + # ->ascii encodes non-ASCII to "\uXXXX" + $json = $mod->new->ascii(1) and last; +} + +sub response { + my ($env, $list) = @_; + $json or return [ 404, [], [] ]; + my $self = bless { + -abs2urlpath => {}, + -mtime => 0, + manifest => {}, + -list => $list, + psgi_env => $env, + }, __PACKAGE__; + + # PSGI server will call this immediately and give us a callback (-wcb) + sub { + $self->{-wcb} = $_[0]; # HTTP write callback + iterate_start($self); + }; +} + +sub fingerprint ($) { + my ($git) = @_; + # TODO: convert to qspawn for fairness when there's + # thousands of repos + my ($fh, $pid) = $git->popen('show-ref'); + my $dig = Digest::SHA->new(1); + while (read($fh, my $buf, 65536)) { + $dig->add($buf); + } + close $fh; + waitpid($pid, 0); + return if $?; # empty, uninitialized git repo + $dig->hexdigest; +} + +sub manifest_add ($$;$$) { + my ($self, $ibx, $epoch, $default_desc) = @_; + my $url_path = "/$ibx->{name}"; + my $git_dir = $ibx->{inboxdir}; + if (defined $epoch) { + $git_dir .= "/git/$epoch.git"; + $url_path .= "/git/$epoch.git"; + } + return unless -d $git_dir; + my $git = PublicInbox::Git->new($git_dir); + my $fingerprint = fingerprint($git) or return; # no empty repos + + chomp(my $owner = $git->qx('config', 'gitweb.owner')); + chomp(my $desc = try_cat("$git_dir/description")); + utf8::decode($owner); + utf8::decode($desc); + $owner = undef if $owner eq ''; + $desc = 'Unnamed repository' if $desc eq ''; + + # templates/hooks--update.sample and git-multimail in git.git + # only match "Unnamed repository", not the full contents of + # templates/this--description in git.git + if ($desc =~ /\AUnnamed repository/) { + $desc = "$default_desc [epoch $epoch]" if defined($epoch); + } + + my $reference; + chomp(my $alt = try_cat("$git_dir/objects/info/alternates")); + if ($alt) { + # n.b.: GitPython doesn't seem to handle comments or C-quoted + # strings like native git does; and we don't for now, either. + my @alt = split(/\n+/, $alt); + + # grokmirror only supports 1 alternate for "reference", + if (scalar(@alt) == 1) { + my $objdir = "$git_dir/objects"; + $reference = File::Spec->rel2abs($alt[0], $objdir); + $reference =~ s!/[^/]+/?\z!!; # basename + } + } + $self->{-abs2urlpath}->{$git_dir} = $url_path; + my $modified = $git->modified; + if ($modified > $self->{-mtime}) { + $self->{-mtime} = $modified; + } + $self->{manifest}->{$url_path} = { + owner => $owner, + reference => $reference, + description => $desc, + modified => $modified, + fingerprint => $fingerprint, + }; +} + +sub iterate_start { + my ($self) = @_; + if (my $async = $self->{psgi_env}->{'pi-httpd.async'}) { + # PublicInbox::HTTPD::Async->new + $async->(undef, undef, $self); + } else { + event_step($self) while $self->{-wcb}; + } +} + +sub event_step { + my ($self) = @_; + while (my $ibx = shift(@{$self->{-list}})) { + eval { + if (defined(my $max = $ibx->max_git_epoch)) { + my $desc = $ibx->description; + for my $epoch (0..$max) { + manifest_add($self, $ibx, $epoch, $desc) + } + } else { + manifest_add($self, $ibx); + } + }; + warn "E: $@" if $@; + if (my $async = $self->{psgi_env}->{'pi-httpd.async'}) { + # PublicInbox::HTTPD::Async->new + $async->(undef, undef, $self); + } + return; # more steps needed + } + my $abs2urlpath = delete $self->{-abs2urlpath}; + my $wcb = delete $self->{-wcb}; + my $manifest = delete $self->{manifest}; + while (my ($url_path, $repo) = each %$manifest) { + defined(my $abs = $repo->{reference}) or next; + $repo->{reference} = $abs2urlpath->{$abs}; + } + $manifest = $json->encode($manifest); + gzip(\$manifest => \(my $out)); + $wcb->([ 200, [ qw(Content-Type application/gzip), + 'Last-Modified', time2str($self->{-mtime}), + 'Content-Length', bytes::length($out) ], [ $out ] ]); +} + +1; diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 2ea5d80d..93ab3c9d 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -509,8 +509,8 @@ sub get_inbox_manifest ($$$) { my ($ctx, $inbox, $key) = @_; my $r404 = invalid_inbox($ctx, $inbox); return $r404 if $r404; - require PublicInbox::WwwListing; - PublicInbox::WwwListing::js($ctx->{env}, [$ctx->{-inbox}]); + require PublicInbox::ManifestJsGz; + PublicInbox::ManifestJsGz::response($ctx->{env}, [$ctx->{-inbox}]); } sub get_attach { diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm index 0be3764c..12b0d9ad 100644 --- a/lib/PublicInbox/WwwListing.pm +++ b/lib/PublicInbox/WwwListing.pm @@ -5,24 +5,11 @@ # Used by PublicInbox::WWW package PublicInbox::WwwListing; use strict; -use warnings; use PublicInbox::Hval qw(ascii_html prurl fmt_ts); use PublicInbox::Linkify; -use PublicInbox::View; -use PublicInbox::Inbox; use PublicInbox::GzipFilter qw(gzf_maybe); +use PublicInbox::ManifestJsGz; use bytes (); # bytes::length -use HTTP::Date qw(time2str); -use Digest::SHA (); -use File::Spec (); -use IO::Compress::Gzip qw(gzip); -*try_cat = \&PublicInbox::Inbox::try_cat; -our $json; -for my $mod (qw(JSON::MaybeXS JSON JSON::PP)) { - eval "require $mod" or next; - # ->ascii encodes non-ASCII to "\uXXXX" - $json = $mod->new->ascii(1) and last; -} sub list_all_i { my ($ibx, $arg) = @_; @@ -132,106 +119,6 @@ sub html ($$) { [ $code, $h, [ $out ] ]; } -sub fingerprint ($) { - my ($git) = @_; - # TODO: convert to qspawn for fairness when there's - # thousands of repos - my ($fh, $pid) = $git->popen('show-ref'); - my $dig = Digest::SHA->new(1); - while (read($fh, my $buf, 65536)) { - $dig->add($buf); - } - close $fh; - waitpid($pid, 0); - return if $?; # empty, uninitialized git repo - $dig->hexdigest; -} - -sub manifest_add ($$;$$) { - my ($manifest, $ibx, $epoch, $default_desc) = @_; - my $url_path = "/$ibx->{name}"; - my $git_dir = $ibx->{inboxdir}; - if (defined $epoch) { - $git_dir .= "/git/$epoch.git"; - $url_path .= "/git/$epoch.git"; - } - return unless -d $git_dir; - my $git = PublicInbox::Git->new($git_dir); - my $fingerprint = fingerprint($git) or return; # no empty repos - - chomp(my $owner = $git->qx('config', 'gitweb.owner')); - chomp(my $desc = try_cat("$git_dir/description")); - utf8::decode($owner); - utf8::decode($desc); - $owner = undef if $owner eq ''; - $desc = 'Unnamed repository' if $desc eq ''; - - # templates/hooks--update.sample and git-multimail in git.git - # only match "Unnamed repository", not the full contents of - # templates/this--description in git.git - if ($desc =~ /\AUnnamed repository/) { - $desc = "$default_desc [epoch $epoch]" if defined($epoch); - } - - my $reference; - chomp(my $alt = try_cat("$git_dir/objects/info/alternates")); - if ($alt) { - # n.b.: GitPython doesn't seem to handle comments or C-quoted - # strings like native git does; and we don't for now, either. - my @alt = split(/\n+/, $alt); - - # grokmirror only supports 1 alternate for "reference", - if (scalar(@alt) == 1) { - my $objdir = "$git_dir/objects"; - $reference = File::Spec->rel2abs($alt[0], $objdir); - $reference =~ s!/[^/]+/?\z!!; # basename - } - } - $manifest->{-abs2urlpath}->{$git_dir} = $url_path; - my $modified = $git->modified; - if ($modified > $manifest->{-mtime}) { - $manifest->{-mtime} = $modified; - } - $manifest->{$url_path} = { - owner => $owner, - reference => $reference, - description => $desc, - modified => $modified, - fingerprint => $fingerprint, - }; -} - -# manifest.js.gz -sub js ($$) { - my ($env, $list) = @_; - # $json won't be defined if IO::Compress::Gzip is missing - $json or return [ 404, [], [] ]; - - my $manifest = { -abs2urlpath => {}, -mtime => 0 }; - for my $ibx (@$list) { - if (defined(my $max = $ibx->max_git_epoch)) { - my $desc = $ibx->description; - for my $epoch (0..$max) { - manifest_add($manifest, $ibx, $epoch, $desc); - } - } else { - manifest_add($manifest, $ibx); - } - } - my $abs2urlpath = delete $manifest->{-abs2urlpath}; - my $mtime = delete $manifest->{-mtime}; - while (my ($url_path, $repo) = each %$manifest) { - defined(my $abs = $repo->{reference}) or next; - $repo->{reference} = $abs2urlpath->{$abs}; - } - my $out; - gzip(\($json->encode($manifest)) => \$out); - $manifest = undef; - [ 200, [ qw(Content-Type application/gzip), - 'Last-Modified', time2str($mtime), - 'Content-Length', bytes::length($out) ], [ $out ] ]; -} - # not really a stand-alone PSGI app, but maybe it could be... sub call { my ($self, $env) = @_; @@ -239,7 +126,7 @@ sub call { if ($env->{PATH_INFO} eq '/manifest.js.gz') { # grokmirror uses relative paths, so it's domain-dependent my $list = $self->{manifest_cb}->($self, $env, 'manifest'); - js($env, $list); + PublicInbox::ManifestJsGz::response($env, $list); } else { # / my $list = $self->{www_cb}->($self, $env, 'www'); html($env, $list); diff --git a/t/www_listing.t b/t/www_listing.t index c4511cd1..4309a5e1 100644 --- a/t/www_listing.t +++ b/t/www_listing.t @@ -10,9 +10,10 @@ use PublicInbox::Import; require_mods(qw(URI::Escape Plack::Builder Digest::SHA IO::Compress::Gzip IO::Uncompress::Gunzip HTTP::Tiny)); require PublicInbox::WwwListing; +require PublicInbox::ManifestJsGz; my $json = do { no warnings 'once'; - $PublicInbox::WwwListing::json; + $PublicInbox::ManifestJsGz::json; } or plan skip_all => "JSON module missing"; use_ok 'PublicInbox::Git'; @@ -20,7 +21,7 @@ use_ok 'PublicInbox::Git'; my ($tmpdir, $for_destroy) = tmpdir(); my $bare = PublicInbox::Git->new("$tmpdir/bare.git"); PublicInbox::Import::init_bare($bare->{git_dir}); -is(PublicInbox::WwwListing::fingerprint($bare), undef, +is(PublicInbox::ManifestJsGz::fingerprint($bare), undef, 'empty repo has no fingerprint'); { my $fi_data = './t/git.fast-import-data'; @@ -30,7 +31,7 @@ is(PublicInbox::WwwListing::fingerprint($bare), undef, 'fast-import'); } -like(PublicInbox::WwwListing::fingerprint($bare), qr/\A[a-f0-9]{40}\z/, +like(PublicInbox::ManifestJsGz::fingerprint($bare), qr/\A[a-f0-9]{40}\z/, 'got fingerprint with non-empty repo'); sub tiny_test {
In Perl, we can simplify callers by passing a single array all the way down the stack instead of a single array ref which needs to be expanded every call. --- lib/PublicInbox/Config.pm | 12 ++++++------ lib/PublicInbox/ExtMsg.pm | 7 +++---- lib/PublicInbox/Watch.pm | 13 ++++++------- lib/PublicInbox/WwwListing.pm | 13 +++++-------- 4 files changed, 20 insertions(+), 25 deletions(-) diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index ae9ad8de..f78115b6 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -90,29 +90,29 @@ sub lookup_name ($$) { } sub each_inbox { - my ($self, $cb, $arg) = @_; + my ($self, $cb, @arg) = @_; # may auto-vivify if config file is non-existent: foreach my $section (@{$self->{-section_order}}) { next if $section !~ m!\Apublicinbox\.([^/]+)\z!; my $ibx = lookup_name($self, $1) or next; - $cb->($ibx, $arg); + $cb->($ibx, @arg); } } sub iterate_start { - my ($self, $cb, $arg) = @_; + my ($self, $cb, @arg) = @_; my $i = 0; - $self->{-iter} = [ \$i, $cb, $arg ]; + $self->{-iter} = [ \$i, $cb, @arg ]; } # for PublicInbox::DS::next_tick, we only call this is if # PublicInbox::DS is already loaded sub event_step { my ($self) = @_; - my ($i, $cb, $arg) = @{$self->{-iter}}; + my ($i, $cb, @arg) = @{$self->{-iter}}; my $section = $self->{-section_order}->[$$i++]; delete($self->{-iter}) unless defined($section); - eval { $cb->($self, $section, $arg) }; + eval { $cb->($self, $section, @arg) }; warn "E: $@ in ${self}::event_step" if $@; PublicInbox::DS::requeue($self) if defined($section); } diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm index 5dffc65c..929737f1 100644 --- a/lib/PublicInbox/ExtMsg.pm +++ b/lib/PublicInbox/ExtMsg.pm @@ -74,8 +74,7 @@ sub search_partial ($$) { } sub ext_msg_i { - my ($other, $arg) = @_; - my ($cur, $mid, $ibxs, $found) = @$arg; + my ($other, $cur, $mid, $ibxs, $found) = @_; return if $other->{name} eq $cur->{name} || !$other->base_url; @@ -101,9 +100,9 @@ sub ext_msg { eval { require PublicInbox::Msgmap }; my $ibxs = []; my $found = []; - my $arg = [ $cur, $mid, $ibxs, $found ]; - $ctx->{www}->{pi_config}->each_inbox(\&ext_msg_i, $arg); + $ctx->{www}->{pi_config}->each_inbox(\&ext_msg_i, + $cur, $mid, $ibxs, $found); return exact($ctx, $found, $mid) if @$found; diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm index 17786377..0f41dff2 100644 --- a/lib/PublicInbox/Watch.pm +++ b/lib/PublicInbox/Watch.pm @@ -133,8 +133,7 @@ sub _done_for_now { } sub remove_eml_i { # each_inbox callback - my ($ibx, $arg) = @_; - my ($self, $eml, $loc) = @$arg; + my ($ibx, $self, $eml, $loc) = @_; eval { # try to avoid taking a lock or unnecessary spawning @@ -176,7 +175,7 @@ sub _remove_spam { $path =~ /:2,[A-R]*S[T-Za-z]*\z/ or return; my $eml = eml_from_path($path) or return; local $SIG{__WARN__} = warn_ignore_cb(); - $self->{config}->each_inbox(\&remove_eml_i, [ $self, $eml, $path ]); + $self->{config}->each_inbox(\&remove_eml_i, $self, $eml, $path); } sub import_eml ($$$) { @@ -419,8 +418,8 @@ sub imap_import_msg ($$$$$) { if ($flags =~ /\\Seen\b/) { local $SIG{__WARN__} = warn_ignore_cb(); my $eml = PublicInbox::Eml->new($raw); - my $arg = [ $self, $eml, "$url UID:$uid" ]; - $self->{config}->each_inbox(\&remove_eml_i, $arg); + $self->{config}->each_inbox(\&remove_eml_i, + $self, $eml, "$url UID:$uid"); } } else { die "BUG: destination unknown $inboxes"; @@ -967,8 +966,8 @@ sub nntp_fetch_all ($$$) { } } elsif ($inboxes eq 'watchspam') { my $eml = PublicInbox::Eml->new(\$raw); - my $arg = [ $self, $eml, "$url ARTICLE $art" ]; - $self->{config}->each_inbox(\&remove_eml_i, $arg); + $self->{config}->each_inbox(\&remove_eml_i, + $self, $eml, "$url ARTICLE $art"); } else { die "BUG: destination unknown $inboxes"; } diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm index 12b0d9ad..a7c7cbc1 100644 --- a/lib/PublicInbox/WwwListing.pm +++ b/lib/PublicInbox/WwwListing.pm @@ -12,21 +12,19 @@ use PublicInbox::ManifestJsGz; use bytes (); # bytes::length sub list_all_i { - my ($ibx, $arg) = @_; - my ($list, $hide_key) = @$arg; + my ($ibx, $list, $hide_key) = @_; push @$list, $ibx unless $ibx->{-hide}->{$hide_key}; } sub list_all ($$$) { my ($self, $env, $hide_key) = @_; my $list = []; - $self->{pi_config}->each_inbox(\&list_all_i, [ $list, $hide_key ]); + $self->{pi_config}->each_inbox(\&list_all_i, $list, $hide_key); $list; } sub list_match_domain_i { - my ($ibx, $arg) = @_; - my ($list, $hide_key, $re) = @$arg; + my ($ibx, $list, $hide_key, $re) = @_; if (!$ibx->{-hide}->{$hide_key} && grep(/$re/, @{$ibx->{url}})) { push @$list, $ibx; } @@ -37,9 +35,8 @@ sub list_match_domain ($$$) { my $list = []; my $host = $env->{HTTP_HOST} // $env->{SERVER_NAME}; $host =~ s/:[0-9]+\z//; - my $arg = [ $list, $hide_key, - qr!\A(?:https?:)?//\Q$host\E(?::[0-9]+)?/!i ]; - $self->{pi_config}->each_inbox(\&list_match_domain_i, $arg); + $self->{pi_config}->each_inbox(\&list_match_domain_i, $list, $hide_key, + qr!\A(?:https?:)?//\Q$host\E(?::[0-9]+)?/!i); $list; }
We will need to allow simultaneous iterators on the same config object, since we'll need this for ExtMsg, NNTPD, WwwListing, NewsWWW, and other places. --- MANIFEST | 1 + lib/PublicInbox/Config.pm | 18 ------------------ lib/PublicInbox/ConfigIter.pm | 28 ++++++++++++++++++++++++++++ lib/PublicInbox/IMAPD.pm | 6 ++++-- 4 files changed, 33 insertions(+), 20 deletions(-) create mode 100644 lib/PublicInbox/ConfigIter.pm diff --git a/MANIFEST b/MANIFEST index 0e225b6a..04a3744f 100644 --- a/MANIFEST +++ b/MANIFEST @@ -107,6 +107,7 @@ lib/PublicInbox/AltId.pm lib/PublicInbox/Cgit.pm lib/PublicInbox/CompressNoop.pm lib/PublicInbox/Config.pm +lib/PublicInbox/ConfigIter.pm lib/PublicInbox/ContentHash.pm lib/PublicInbox/DS.pm lib/PublicInbox/DSKQXS.pm diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index f78115b6..8ccf337d 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -99,24 +99,6 @@ sub each_inbox { } } -sub iterate_start { - my ($self, $cb, @arg) = @_; - my $i = 0; - $self->{-iter} = [ \$i, $cb, @arg ]; -} - -# for PublicInbox::DS::next_tick, we only call this is if -# PublicInbox::DS is already loaded -sub event_step { - my ($self) = @_; - my ($i, $cb, @arg) = @{$self->{-iter}}; - my $section = $self->{-section_order}->[$$i++]; - delete($self->{-iter}) unless defined($section); - eval { $cb->($self, $section, @arg) }; - warn "E: $@ in ${self}::event_step" if $@; - PublicInbox::DS::requeue($self) if defined($section); -} - sub lookup_newsgroup { my ($self, $ng) = @_; _lookup_fill($self, '-by_newsgroup', lc($ng)); diff --git a/lib/PublicInbox/ConfigIter.pm b/lib/PublicInbox/ConfigIter.pm new file mode 100644 index 00000000..26cc70e2 --- /dev/null +++ b/lib/PublicInbox/ConfigIter.pm @@ -0,0 +1,28 @@ +# Copyright (C) 2020 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# Intended for PublicInbox::DS->EventLoop in read-only daemons +# to avoid each_inbox() monopolizing the event loop when hundreds/thousands +# of inboxes are in play. +package PublicInbox::ConfigIter; +use strict; +use v5.10.1; + +sub new { + my ($class, $pi_cfg, $cb, @args) = @_; + my $i = 0; + bless [ $pi_cfg, \$i, $cb, @args ], __PACKAGE__; +} + +# for PublicInbox::DS::next_tick, we only call this is if +# PublicInbox::DS is already loaded +sub event_step { + my $self = shift; + my ($pi_cfg, $i, $cb, @arg) = @$self; + my $section = $pi_cfg->{-section_order}->[$$i++]; + eval { $cb->($pi_cfg, $section, @arg) }; + warn "E: $@ in ${self}::event_step" if $@; + PublicInbox::DS::requeue($self) if defined($section); +} + +1; diff --git a/lib/PublicInbox/IMAPD.pm b/lib/PublicInbox/IMAPD.pm index 09bedf5c..3c211ee1 100644 --- a/lib/PublicInbox/IMAPD.pm +++ b/lib/PublicInbox/IMAPD.pm @@ -6,6 +6,7 @@ package PublicInbox::IMAPD; use strict; use PublicInbox::Config; +use PublicInbox::ConfigIter; use PublicInbox::InboxIdle; use PublicInbox::IMAP; use PublicInbox::DummyInbox; @@ -98,8 +99,9 @@ sub refresh_groups { my $pi_config = PublicInbox::Config->new; if ($sig) { # SIGHUP is handled through the event loop $self->{imapd_next} = { dummies => {}, mailboxes => {} }; - $pi_config->iterate_start(\&imapd_refresh_step, $self); - PublicInbox::DS::requeue($pi_config); # call event_step + my $iter = PublicInbox::ConfigIter->new($pi_config, + \&imapd_refresh_step, $self); + $iter->event_step; } else { # initial start is synchronous $self->{dummies} = {}; $pi_config->each_inbox(\&imapd_refresh_ibx, $self);
This helped me diagnose an error I would've introduced in the next commit. --- t/cgi.t | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/cgi.t b/t/cgi.t index 366d6594..96c627c3 100644 --- a/t/cgi.t +++ b/t/cgi.t @@ -158,7 +158,7 @@ sub cgi_run { my ($in, $out, $err) = ("", "", ""); my $rdr = { 0 => \$in, 1 => \$out, 2 => \$err }; run_script(['.cgi'], \%env, $rdr); - die "unexpected error: \$?=$?" if $?; + die "unexpected error: \$?=$? ($err)" if $?; my ($head, $body) = split(/\r\n\r\n/, $out, 2); { head => $head, body => $body, err => $err } }
With many inboxes, checking multiple SQLite repos will be slow and time-consuming, so ensure we can schedule it fairly between multiple inboxes. --- lib/PublicInbox/ExtMsg.pm | 101 ++++++++++++++++++++++++++------------ 1 file changed, 70 insertions(+), 31 deletions(-) diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm index 929737f1..ce1a47bb 100644 --- a/lib/PublicInbox/ExtMsg.pm +++ b/lib/PublicInbox/ExtMsg.pm @@ -74,69 +74,106 @@ sub search_partial ($$) { } sub ext_msg_i { - my ($other, $cur, $mid, $ibxs, $found) = @_; + my ($other, $ctx) = @_; - return if $other->{name} eq $cur->{name} || !$other->base_url; + return if $other->{name} eq $ctx->{-inbox}->{name} || !$other->base_url; my $mm = $other->mm or return; # try to find the URL with Msgmap to avoid forking - my $num = $mm->num_for($mid); + my $num = $mm->num_for($ctx->{mid}); if (defined $num) { - push @$found, $other; + push @{$ctx->{found}}, $other; } else { # no point in trying the fork fallback if we # know Xapian is up-to-date but missing the # message in the current repo - push @$ibxs, $other; + push @{$ctx->{again}}, $other; + } +} + +sub ext_msg_step { + my ($pi_cfg, $section, $ctx) = @_; + if (defined($section)) { + return if $section !~ m!\Apublicinbox\.([^/]+)\z!; + my $ibx = $pi_cfg->lookup_name($1) or return; + ext_msg_i($ibx, $ctx); + } else { # undef == "EOF" + finalize_exact($ctx); } } sub ext_msg { my ($ctx) = @_; - my $cur = $ctx->{-inbox}; - my $mid = $ctx->{mid}; + sub { + $ctx->{-wcb} = $_[0]; # HTTP server write callback + + if ($ctx->{env}->{'pi-httpd.async'}) { + require PublicInbox::ConfigIter; + my $iter = PublicInbox::ConfigIter->new( + $ctx->{www}->{pi_config}, + \&ext_msg_step, $ctx); + $iter->event_step; + } else { + $ctx->{www}->{pi_config}->each_inbox(\&ext_msg_i, $ctx); + finalize_exact($ctx); + } + }; +} - eval { require PublicInbox::Msgmap }; - my $ibxs = []; - my $found = []; +# called via PublicInbox::DS->EventLoop +sub event_step { + my ($ctx, $sync) = @_; + # can't find a partial match in current inbox, try the others: + my $ibx = shift @{$ctx->{again}} or goto \&finalize_partial; + my $mids = search_partial($ibx, $ctx->{mid}) or + return ($sync ? undef : PublicInbox::DS::requeue($ctx)); + $ctx->{n_partial} += scalar(@$mids); + push @{$ctx->{partial}}, [ $ibx, $mids ]; + $ctx->{n_partial} >= PARTIAL_MAX ? goto(\&finalize_partial) + : ($sync ? undef : PublicInbox::DS::requeue($ctx)); +} - $ctx->{www}->{pi_config}->each_inbox(\&ext_msg_i, - $cur, $mid, $ibxs, $found); +sub finalize_exact { + my ($ctx) = @_; - return exact($ctx, $found, $mid) if @$found; + return $ctx->{-wcb}->(exact($ctx)) if $ctx->{found}; # fall back to partial MID matching - my @partial; - my $n_partial = 0; + my $mid = $ctx->{mid}; + my $cur = $ctx->{-inbox}; my $mids = search_partial($cur, $mid); if ($mids) { - $n_partial = scalar(@$mids); - push @partial, [ $cur, $mids ]; - } - - # can't find a partial match in current inbox, try the others: - if (!$n_partial && length($mid) >= $MIN_PARTIAL_LEN) { - foreach my $ibx (@$ibxs) { - $mids = search_partial($ibx, $mid) or next; - $n_partial += scalar(@$mids); - push @partial, [ $ibx, $mids]; - last if $n_partial >= PARTIAL_MAX; + $ctx->{n_partial} = scalar(@$mids); + push @{$ctx->{partial}}, [ $cur, $mids ]; + } elsif ($ctx->{again} && length($mid) >= $MIN_PARTIAL_LEN) { + bless $ctx, __PACKAGE__; + if ($ctx->{env}->{'pi-httpd.async'}) { + $ctx->event_step; + return; } + + # synchronous fall-through + $ctx->event_step while @{$ctx->{again}}; } + goto \&finalize_partial; +} +sub finalize_partial { + my ($ctx) = @_; + my $mid = $ctx->{mid}; my $code = 404; my $href = mid_href($mid); my $html = ascii_html($mid); my $title = "<$html> not found"; my $s = "<pre>Message-ID <$html>\nnot found\n"; - if ($n_partial) { + if (my $n_partial = $ctx->{n_partial}) { $code = 300; my $es = $n_partial == 1 ? '' : 'es'; $n_partial .= '+' if ($n_partial == PARTIAL_MAX); $s .= "\n$n_partial partial match$es found:\n\n"; - my $cur_name = $cur->{name}; - foreach my $pair (@partial) { + my $cur_name = $ctx->{-inbox}->{name}; + foreach my $pair (@{$ctx->{partial}}) { my ($ibx, $res) = @$pair; my $env = $ctx->{env} if $ibx->{name} eq $cur_name; my $u = $ibx->base_url($env) or next; @@ -155,7 +192,7 @@ sub ext_msg { $ctx->{-html_tip} = $s .= '</pre>'; $ctx->{-title_html} = $title; $ctx->{-upfx} = '../'; - html_oneshot($ctx, $code); + $ctx->{-wcb}->(html_oneshot($ctx, $code)); } sub ext_urls { @@ -177,7 +214,9 @@ sub ext_urls { } sub exact { - my ($ctx, $found, $mid) = @_; + my ($ctx) = @_; + my $mid = $ctx->{mid}; + my $found = $ctx->{found}; my $href = mid_href($mid); my $html = ascii_html($mid); my $title = "<$html> found in ";
By using the just-introduced ConfigIter class. And make ManifestJsGz a subclass of it to reduce duplication. --- lib/PublicInbox/ConfigIter.pm | 12 +++ lib/PublicInbox/ManifestJsGz.pm | 92 ++++++++---------- lib/PublicInbox/WWW.pm | 19 ++-- lib/PublicInbox/WwwListing.pm | 163 ++++++++++++++------------------ 4 files changed, 125 insertions(+), 161 deletions(-) diff --git a/lib/PublicInbox/ConfigIter.pm b/lib/PublicInbox/ConfigIter.pm index 26cc70e2..e6fa8172 100644 --- a/lib/PublicInbox/ConfigIter.pm +++ b/lib/PublicInbox/ConfigIter.pm @@ -25,4 +25,16 @@ sub event_step { PublicInbox::DS::requeue($self) if defined($section); } +# for generic PSGI servers +sub each_section { + my $self = shift; + my ($pi_cfg, $i, $cb, @arg) = @$self; + while (defined(my $section = $pi_cfg->{-section_order}->[$$i++])) { + eval { $cb->($pi_cfg, $section, @arg) }; + warn "E: $@ in ${self}::each_section" if $@; + } + eval { $cb->($pi_cfg, undef, @arg) }; + warn "E: $@ in ${self}::each_section" if $@; +} + 1; diff --git a/lib/PublicInbox/ManifestJsGz.pm b/lib/PublicInbox/ManifestJsGz.pm index 328303ce..f98d9d01 100644 --- a/lib/PublicInbox/ManifestJsGz.pm +++ b/lib/PublicInbox/ManifestJsGz.pm @@ -1,8 +1,11 @@ # Copyright (C) 2020 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# generates manifest.js.gz for grokmirror(1) package PublicInbox::ManifestJsGz; use strict; use v5.10.1; +use parent qw(PublicInbox::WwwListing); use Digest::SHA (); use File::Spec (); use bytes (); # length @@ -19,22 +22,12 @@ for my $mod (qw(JSON::MaybeXS JSON JSON::PP)) { $json = $mod->new->ascii(1) and last; } -sub response { - my ($env, $list) = @_; - $json or return [ 404, [], [] ]; - my $self = bless { - -abs2urlpath => {}, - -mtime => 0, - manifest => {}, - -list => $list, - psgi_env => $env, - }, __PACKAGE__; - - # PSGI server will call this immediately and give us a callback (-wcb) - sub { - $self->{-wcb} = $_[0]; # HTTP write callback - iterate_start($self); - }; +# called by WwwListing +sub url_regexp { + my ($ctx) = @_; + # grokmirror uses relative paths, so it's domain-dependent + # SUPER calls PublicInbox::WwwListing::url_regexp + $ctx->SUPER::url_regexp('publicInbox.grokManifest', 'match=domain'); } sub fingerprint ($) { @@ -53,7 +46,7 @@ sub fingerprint ($) { } sub manifest_add ($$;$$) { - my ($self, $ibx, $epoch, $default_desc) = @_; + my ($ctx, $ibx, $epoch, $default_desc) = @_; my $url_path = "/$ibx->{name}"; my $git_dir = $ibx->{inboxdir}; if (defined $epoch) { @@ -92,12 +85,12 @@ sub manifest_add ($$;$$) { $reference =~ s!/[^/]+/?\z!!; # basename } } - $self->{-abs2urlpath}->{$git_dir} = $url_path; + $ctx->{-abs2urlpath}->{$git_dir} = $url_path; my $modified = $git->modified; - if ($modified > $self->{-mtime}) { - $self->{-mtime} = $modified; + if ($modified > ($ctx->{-mtime} // 0)) { + $ctx->{-mtime} = $modified; } - $self->{manifest}->{$url_path} = { + $ctx->{manifest}->{$url_path} = { owner => $owner, reference => $reference, description => $desc, @@ -106,48 +99,37 @@ sub manifest_add ($$;$$) { }; } -sub iterate_start { - my ($self) = @_; - if (my $async = $self->{psgi_env}->{'pi-httpd.async'}) { - # PublicInbox::HTTPD::Async->new - $async->(undef, undef, $self); - } else { - event_step($self) while $self->{-wcb}; - } -} - -sub event_step { - my ($self) = @_; - while (my $ibx = shift(@{$self->{-list}})) { - eval { - if (defined(my $max = $ibx->max_git_epoch)) { - my $desc = $ibx->description; - for my $epoch (0..$max) { - manifest_add($self, $ibx, $epoch, $desc) - } - } else { - manifest_add($self, $ibx); +sub ibx_entry { + my ($ctx, $ibx) = @_; + eval { + if (defined(my $max = $ibx->max_git_epoch)) { + my $desc = $ibx->description; + for my $epoch (0..$max) { + manifest_add($ctx, $ibx, $epoch, $desc); } - }; - warn "E: $@" if $@; - if (my $async = $self->{psgi_env}->{'pi-httpd.async'}) { - # PublicInbox::HTTPD::Async->new - $async->(undef, undef, $self); + } else { + manifest_add($ctx, $ibx); } - return; # more steps needed - } - my $abs2urlpath = delete $self->{-abs2urlpath}; - my $wcb = delete $self->{-wcb}; - my $manifest = delete $self->{manifest}; + }; + warn "E: $@" if $@; +} + +sub hide_key { 'manifest' } + +# overrides WwwListing->psgi_triple +sub psgi_triple { + my ($ctx) = @_; + my $abs2urlpath = delete($ctx->{-abs2urlpath}) // {}; + my $manifest = delete($ctx->{manifest}) // {}; while (my ($url_path, $repo) = each %$manifest) { defined(my $abs = $repo->{reference}) or next; $repo->{reference} = $abs2urlpath->{$abs}; } $manifest = $json->encode($manifest); gzip(\$manifest => \(my $out)); - $wcb->([ 200, [ qw(Content-Type application/gzip), - 'Last-Modified', time2str($self->{-mtime}), - 'Content-Length', bytes::length($out) ], [ $out ] ]); + [ 200, [ qw(Content-Type application/gzip), + 'Last-Modified', time2str($ctx->{-mtime}), + 'Content-Length', bytes::length($out) ], [ $out ] ] } 1; diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 93ab3c9d..85abf327 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -77,8 +77,12 @@ sub call { } # top-level indices and feeds - if ($path_info eq '/' || $path_info eq '/manifest.js.gz') { - www_listing($self)->call($env); + if ($path_info eq '/') { + require PublicInbox::WwwListing; + PublicInbox::WwwListing->response($ctx); + } elsif ($path_info eq '/manifest.js.gz') { + require PublicInbox::ManifestJsGz; + PublicInbox::ManifestJsGz->response($ctx); } elsif ($path_info =~ m!$INBOX_RE\z!o) { invalid_inbox($ctx, $1) || r301($ctx, $1); } elsif ($path_info =~ m!$INBOX_RE(?:/|/index\.html)?\z!o) { @@ -171,7 +175,6 @@ sub preload { } $self->cgit; $self->stylesheets_prepare($_) for ('', '../', '../../'); - $self->www_listing; $self->news_www; $pi_config->each_inbox(\&preload_inbox); } @@ -496,21 +499,13 @@ sub cgit { } } -sub www_listing { - my ($self) = @_; - $self->{www_listing} ||= do { - require PublicInbox::WwwListing; - PublicInbox::WwwListing->new($self); - } -} - # GET $INBOX/manifest.js.gz sub get_inbox_manifest ($$$) { my ($ctx, $inbox, $key) = @_; my $r404 = invalid_inbox($ctx, $inbox); return $r404 if $r404; require PublicInbox::ManifestJsGz; - PublicInbox::ManifestJsGz::response($ctx->{env}, [$ctx->{-inbox}]); + PublicInbox::ManifestJsGz->response($ctx); } sub get_attach { diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm index a7c7cbc1..bda2761c 100644 --- a/lib/PublicInbox/WwwListing.pm +++ b/lib/PublicInbox/WwwListing.pm @@ -5,129 +5,104 @@ # Used by PublicInbox::WWW package PublicInbox::WwwListing; use strict; -use PublicInbox::Hval qw(ascii_html prurl fmt_ts); +use PublicInbox::Hval qw(prurl fmt_ts); use PublicInbox::Linkify; use PublicInbox::GzipFilter qw(gzf_maybe); -use PublicInbox::ManifestJsGz; +use PublicInbox::ConfigIter; use bytes (); # bytes::length -sub list_all_i { - my ($ibx, $list, $hide_key) = @_; - push @$list, $ibx unless $ibx->{-hide}->{$hide_key}; -} - -sub list_all ($$$) { - my ($self, $env, $hide_key) = @_; - my $list = []; - $self->{pi_config}->each_inbox(\&list_all_i, $list, $hide_key); - $list; -} +sub ibx_entry { + my ($ctx, $ibx) = @_; + my $mtime = $ibx->modified; + my $ts = fmt_ts($mtime); + my $url = prurl($ctx->{env}, $ibx->{url}); + my $tmp = <<""; +* $ts - $url + ${\$ibx->description} -sub list_match_domain_i { - my ($ibx, $list, $hide_key, $re) = @_; - if (!$ibx->{-hide}->{$hide_key} && grep(/$re/, @{$ibx->{url}})) { - push @$list, $ibx; + if (defined(my $info_url = $ibx->{infourl})) { + $tmp .= ' ' . prurl($ctx->{env}, $info_url) . "\n"; } + push @{$ctx->{-list}}, [ $mtime, $tmp ]; } -sub list_match_domain ($$$) { - my ($self, $env, $hide_key) = @_; - my $list = []; - my $host = $env->{HTTP_HOST} // $env->{SERVER_NAME}; - $host =~ s/:[0-9]+\z//; - $self->{pi_config}->each_inbox(\&list_match_domain_i, $list, $hide_key, - qr!\A(?:https?:)?//\Q$host\E(?::[0-9]+)?/!i); - $list; -} - -sub list_404 ($$) { [] } - -# TODO: +cgit -my %VALID = ( - all => \&list_all, - 'match=domain' => \&list_match_domain, - 404 => \&list_404, -); - -sub set_cb ($$$) { - my ($pi_config, $k, $default) = @_; - my $v = $pi_config->{lc $k} // $default; - $VALID{$v} || do { - warn <<""; -`$v' is not a valid value for `$k' -$k be one of `all', `match=domain', or `404' - - $VALID{$default}; - }; +sub list_match_i { # ConfigIter callback + my ($cfg, $section, $re, $ctx) = @_; + if (defined($section)) { + return if $section !~ m!\Apublicinbox\.([^/]+)\z!; + my $ibx = $cfg->lookup_name($1) or return; + if (!$ibx->{-hide}->{$ctx->hide_key} && + grep(/$re/, @{$ibx->{url}})) { + $ctx->ibx_entry($ibx); + } + } else { # undef == "EOF" + $ctx->{-wcb}->($ctx->psgi_triple); + } } -sub new { - my ($class, $www) = @_; - my $pi_config = $www->{pi_config}; - bless { - pi_config => $pi_config, - style => $www->style("\0"), - www_cb => set_cb($pi_config, 'publicInbox.wwwListing', 404), - manifest_cb => set_cb($pi_config, 'publicInbox.grokManifest', - 'match=domain'), - }, $class; +sub url_regexp { + my ($ctx, $key, $default) = @_; + $key //= 'publicInbox.wwwListing'; + $default //= '404'; + my $v = $ctx->{www}->{pi_config}->{lc $key} // $default; +again: + if ($v eq 'match=domain') { + my $h = $ctx->{env}->{HTTP_HOST} // $ctx->{env}->{SERVER_NAME}; + $h =~ s/:[0-9]+\z//; + qr!\A(?:https?:)?//\Q$h\E(?::[0-9]+)?/!i; + } elsif ($v eq 'all') { + qr/./; + } elsif ($v eq '404') { + undef; + } else { + warn <<EOF; +`$v' is not a valid value for `$key' +$key be one of `all', `match=domain', or `404' +EOF + $v = $default; # 'match=domain' or 'all' + goto again; + } } -sub ibx_entry { - my ($mtime, $ibx, $env) = @_; - my $ts = fmt_ts($mtime); - my $url = prurl($env, $ibx->{url}); - my $tmp = <<""; -* $ts - $url - ${\$ibx->description} - - if (defined(my $info_url = $ibx->{infourl})) { - $tmp .= ' ' . prurl($env, $info_url) . "\n"; +sub hide_key { 'www' } + +sub response { + my ($class, $ctx) = @_; + bless $ctx, $class; + my $re = $ctx->url_regexp or return $ctx->psgi_triple; + my $iter = PublicInbox::ConfigIter->new($ctx->{www}->{pi_config}, + \&list_match_i, $re, $ctx); + sub { + $ctx->{-wcb} = $_[0]; # HTTP server callback + $ctx->{env}->{'pi-httpd.async'} ? + $iter->event_step : $iter->each_section; } - $tmp; } -sub html ($$) { - my ($env, $list) = @_; +sub psgi_triple { + my ($ctx) = @_; my $h = [ 'Content-Type', 'text/html; charset=UTF-8', 'Content-Length', undef ]; - my $gzf = gzf_maybe($h, $env); + my $gzf = gzf_maybe($h, $ctx->{env}); $gzf->zmore('<html><head><title>' . 'public-inbox listing</title>' . '</head><body><pre>'); my $code = 404; - if (@$list) { + if (my $list = $ctx->{-list}) { $code = 200; - # Schwartzian transform since Inbox->modified is expensive - @$list = sort { - $b->[0] <=> $a->[0] - } map { [ $_->modified, $_ ] } @$list; - - my $tmp = join("\n", map { ibx_entry(@$_, $env) } @$list); + # sort by ->modified + @$list = map { $_->[1] } sort { $b->[0] <=> $a->[0] } @$list; + $list = join("\n", @$list); my $l = PublicInbox::Linkify->new; - $gzf->zmore($l->to_html($tmp)); + $gzf->zmore($l->to_html($list)); } else { $gzf->zmore('no inboxes, yet'); } my $out = $gzf->zflush('</pre><hr><pre>'. - PublicInbox::WwwStream::code_footer($env) . - '</pre></body></html>'); + PublicInbox::WwwStream::code_footer($ctx->{env}) . + '</pre></body></html>'); $h->[3] = bytes::length($out); [ $code, $h, [ $out ] ]; } -# not really a stand-alone PSGI app, but maybe it could be... -sub call { - my ($self, $env) = @_; - - if ($env->{PATH_INFO} eq '/manifest.js.gz') { - # grokmirror uses relative paths, so it's domain-dependent - my $list = $self->{manifest_cb}->($self, $env, 'manifest'); - PublicInbox::ManifestJsGz::response($env, $list); - } else { # / - my $list = $self->{www_cb}->($self, $env, 'www'); - html($env, $list); - } -} - 1;
With public-inbox-httpd, this mitigates the effect of slow git blob storage with multiple coderepos configured for an inbox. It's still synchronous for now (and may need to remain that way for ->last_check_err), but no longer monopolizes the event loop when checking multiple coderepos. We don't yet support multi-inbox scanning, yet; but this also prepares us for a future where we do. We'll also support >=40 char blob OIDs in preparation for future git SHA-256 support, too. --- lib/PublicInbox/SolverGit.pm | 71 ++++++++++++++++++++++-------------- t/solver_git.t | 5 ++- 2 files changed, 48 insertions(+), 28 deletions(-) diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm index dd95f400..12024dbc 100644 --- a/lib/PublicInbox/SolverGit.pm +++ b/lib/PublicInbox/SolverGit.pm @@ -70,34 +70,38 @@ sub ERR ($$) { die $err; } -# look for existing objects already in git repos +# look for existing objects already in git repos, returns arrayref +# if found, number of remaining git coderepos to try if not. sub solve_existing ($$) { my ($self, $want) = @_; + my $try = $want->{try_gits} //= [ @{$self->{gits}} ]; # array copy + my $git = shift @$try or die 'BUG {try_gits} empty'; my $oid_b = $want->{oid_b}; - my $have_hints = scalar keys %$want > 1; - my @ambiguous; # Array of [ git, $oids] - foreach my $git (@{$self->{gits}}) { - my ($oid_full, $type, $size) = $git->check($oid_b); - if (defined($type) && (!$have_hints || $type eq 'blob')) { - return [ $git, $oid_full, $type, int($size) ]; - } + my ($oid_full, $type, $size) = $git->check($oid_b); + + # other than {oid_b, try_gits, try_ibxs} + my $have_hints = scalar keys %$want > 3; + if (defined($type) && (!$have_hints || $type eq 'blob')) { + delete $want->{try_gits}; + return [ $git, $oid_full, $type, int($size) ]; # done, success + } - next if length($oid_b) == 40; + # TODO: deal with 40-char "abbreviations" with future SHA-256 git + return scalar(@$try) if length($oid_b) >= 40; - # parse stderr of "git cat-file --batch-check" - my $err = $git->last_check_err; - my (@oids) = ($err =~ /\b([a-f0-9]{40})\s+blob\b/g); - next unless scalar(@oids); + # parse stderr of "git cat-file --batch-check" + my $err = $git->last_check_err; + my (@oids) = ($err =~ /\b([a-f0-9]{40,})\s+blob\b/g); + return scalar(@$try) unless scalar(@oids); - # TODO: do something with the ambiguous array? - # push @ambiguous, [ $git, @oids ]; + # TODO: do something with the ambiguous array? + # push @ambiguous, [ $git, @oids ]; - dbg($self, "`$oid_b' ambiguous in " . - join("\n\t", $git->pub_urls($self->{psgi_env})) - . "\n" . - join('', map { "$_ blob\n" } @oids)); - } - scalar(@ambiguous) ? \@ambiguous : undef; + dbg($self, "`$oid_b' ambiguous in " . + join("\n\t", $git->pub_urls($self->{psgi_env})) + . "\n" . + join('', map { "$_ blob\n" } @oids)); + scalar(@$try); } sub extract_diff ($$) { @@ -523,10 +527,12 @@ sub resolve_patch ($$) { # see if we can find the blob in an existing git repo: my $cur_want = $want->{oid_b}; - if ($self->{seen_oid}->{$cur_want}++) { + if (!$want->{try_ibxs} && $self->{seen_oid}->{$cur_want}++) { die "Loop detected solving $cur_want\n"; } - if (my $existing = solve_existing($self, $want)) { + $want->{try_ibxs} //= [ @{$self->{inboxes}} ]; # array copy + my $existing = solve_existing($self, $want); + if (ref $existing) { my ($found_git, undef, $type, undef) = @$existing; dbg($self, "found $cur_want in " . join(" ||\n\t", @@ -539,13 +545,17 @@ sub resolve_patch ($$) { } mark_found($self, $cur_want, $existing); return next_step($self); # onto patch application + } elsif ($existing > 0) { + push @{$self->{todo}}, $want; + return next_step($self); # retry solve_existing + } else { # $existing == 0: we may retry if inbox scan (below) fails + delete $want->{try_gits}; } # scan through inboxes to look for emails which results in # the oid we want: - foreach my $ibx (@{$self->{inboxes}}) { - my $diffs = find_extract_diffs($self, $ibx, $want) or next; - + my $ibx = shift(@{$want->{try_ibxs}}) or die 'BUG: {try_ibxs} empty'; + if (my $diffs = find_extract_diffs($self, $ibx, $want)) { unshift @{$self->{patches}}, @$diffs; dbg($self, "found $cur_want in ". join(" ||\n\t", map { di_url($self, $_) } @$diffs)); @@ -562,7 +572,14 @@ sub resolve_patch ($$) { } return next_step($self); # onto the next todo item } - if (length($cur_want) > $OID_MIN) { + + if (scalar @{$want->{try_ibxs}}) { # do we have more inboxes to try? + push @{$self->{todo}}, $want; + return next_step($self); + } + + if (length($cur_want) > $OID_MIN) { # maybe a shorter OID will work + delete $want->{try_ibxs}; # drop empty arrayref chop($cur_want); dbg($self, "retrying $want->{oid_b} as $cur_want"); $want->{oid_b} = $cur_want; diff --git a/t/solver_git.t b/t/solver_git.t index c162b605..6b0ed8d2 100644 --- a/t/solver_git.t +++ b/t/solver_git.t @@ -35,6 +35,7 @@ my $deliver_patch = sub ($) { $deliver_patch->('t/solve/0001-simple-mod.patch'); my $v1_0_0_tag = 'cb7c42b1e15577ed2215356a2bf925aef59cdd8d'; +my $v1_0_0_tag_short = substr($v1_0_0_tag, 0, 16); my $git = PublicInbox::Git->new($git_dir); $ibx->{-repo_objs} = [ $git ]; @@ -173,7 +174,9 @@ EOF is($res->code, 404, 'failure with null OID'); $res = $cb->(GET("/$name/$v1_0_0_tag/s/")); - is($res->code, 200, 'shows commit'); + is($res->code, 200, 'shows commit (unabbreviated)'); + $res = $cb->(GET("/$name/$v1_0_0_tag_short/s/")); + is($res->code, 200, 'shows commit (abbreviated)'); while (my ($label, $size) = each %bin) { $res = $cb->(GET("/$name/$oid{$label}/s/")); is($res->code, 200, "$label binary file");
To avoid hogging the event loop in public-inbox-httpd when many candidate messages match, we'll separate the steps to ensure fairness on slow storage. --- lib/PublicInbox/SolverGit.pm | 136 +++++++++++++++++++++-------------- 1 file changed, 83 insertions(+), 53 deletions(-) diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm index 12024dbc..c54d6d54 100644 --- a/lib/PublicInbox/SolverGit.pm +++ b/lib/PublicInbox/SolverGit.pm @@ -106,11 +106,16 @@ sub solve_existing ($$) { sub extract_diff ($$) { my ($p, $arg) = @_; - my ($self, $diffs, $pre, $post, $ibx, $smsg) = @$arg; + my ($self, $want, $smsg) = @$arg; my ($part) = @$p; # ignore $depth and @idx; my $ct = $part->content_type || 'text/plain'; my ($s, undef) = msg_part_text($part, $ct); defined $s or return; + my $post = $want->{oid_b}; + my $pre = $want->{oid_a}; + if (!defined($pre) || $pre !~ /\A[a-f0-9]+\z/) { + $pre = '[a-f0-9]{7}'; # for RE below + } # Email::MIME::Encodings forces QP to be CRLF upon decoding, # change it back to LF: @@ -192,10 +197,10 @@ sub extract_diff ($$) { close $tmp or die "close(tmp): $!"; # for debugging/diagnostics: - $di->{ibx} = $ibx; + $di->{ibx} = $want->{cur_ibx}; $di->{smsg} = $smsg; - push @$diffs, $di; + push @{$self->{tmp_diffs}}, $di; } sub path_searchable ($) { defined($_[0]) && $_[0] =~ m!\A[\w/\. \-]+\z! } @@ -207,7 +212,7 @@ sub filename_query ($) { join('', map { qq( dfn:"$_") } split(/\.\./, $_[0])); } -sub find_extract_diffs ($$$) { +sub find_smsgs ($$$) { my ($self, $ibx, $want) = @_; my $srch = $ibx->search or return; @@ -218,8 +223,6 @@ sub find_extract_diffs ($$$) { my $pre = $want->{oid_a}; if (defined $pre && $pre =~ /\A[a-f0-9]+\z/) { $q .= " dfpre:$pre"; - } else { - $pre = '[a-f0-9]{7}'; # for $re below } my $path_b = $want->{path_b}; @@ -231,15 +234,8 @@ sub find_extract_diffs ($$$) { $q .= filename_query($path_a); } } - my $mset = $srch->mset($q, { relevance => 1 }); - my $diffs = []; - for my $smsg (@{$srch->mset_to_smsg($ibx, $mset)}) { - my $eml = $ibx->smsg_eml($smsg) or next; - $eml->each_part(\&extract_diff, - [$self, $diffs, $pre, $post, $ibx, $smsg], 1); - } - @$diffs ? $diffs : undef; + $mset->size ? $srch->mset_to_smsg($ibx, $mset) : undef; } sub update_index_result ($$) { @@ -264,7 +260,7 @@ sub prepare_index ($) { # no index creation for added files $oid_a =~ /\A0+\z/ and return next_step($self); - die "BUG: $oid_a not not found" unless $existing; + die "BUG: $oid_a not found" unless $existing; my $oid_full = $existing->[1]; my $path_a = $di->{path_a} or die "BUG: path_a missing for $oid_full"; @@ -518,15 +514,78 @@ sub di_url ($$) { defined($url) ? "$url$mid/" : "<$mid>"; } +sub retry_current { + # my ($self, $want) = @_; + push @{$_[0]->{todo}}, $_[1]; + goto \&next_step # retry solve_existing +} + +sub try_harder { + my ($self, $want) = @_; + + # do we have more inboxes to try? + goto \&retry_current if scalar @{$want->{try_ibxs}}; + + my $cur_want = $want->{oid_b}; + if (length($cur_want) > $OID_MIN) { # maybe a shorter OID will work + delete $want->{try_ibxs}; # drop empty arrayref + chop($cur_want); + dbg($self, "retrying $want->{oid_b} as $cur_want"); + $want->{oid_b} = $cur_want; + goto \&retry_current; # retry with shorter abbrev + } + + dbg($self, "could not find $cur_want"); + eval { done($self, undef) }; + die "E: $@" if $@; +} + sub resolve_patch ($$) { my ($self, $want) = @_; + my $cur_want = $want->{oid_b}; if (scalar(@{$self->{patches}}) > $MAX_PATCH) { die "Aborting, too many steps to $self->{oid_want}"; } + if (my $msgs = $want->{try_smsgs}) { + my $smsg = shift @$msgs; + if (my $eml = $want->{cur_ibx}->smsg_eml($smsg)) { + $eml->each_part(\&extract_diff, + [ $self, $want, $smsg ], 1); + } + + # try the remaining smsgs later + goto \&retry_current if scalar @$msgs; + + delete $want->{try_smsgs}; + delete $want->{cur_ibx}; + + my $diffs = delete $self->{tmp_diffs}; + if (scalar @$diffs) { + unshift @{$self->{patches}}, @$diffs; + dbg($self, "found $cur_want in " . join(" ||\n\t", + map { di_url($self, $_) } @$diffs)); + + # good, we can find a path to the oid we $want, now + # lets see if we need to apply more patches: + my $di = $diffs->[0]; + my $src = $di->{oid_a}; + + unless ($src =~ /\A0+\z/) { + # we have to solve it using another oid, fine: + my $job = { + oid_b => $src, + path_b => $di->{path_a}, + }; + push @{$self->{todo}}, $job; + } + goto \&next_step; # onto the next todo item + } + goto \&try_harder; + } + # see if we can find the blob in an existing git repo: - my $cur_want = $want->{oid_b}; if (!$want->{try_ibxs} && $self->{seen_oid}->{$cur_want}++) { die "Loop detected solving $cur_want\n"; } @@ -544,10 +603,9 @@ sub resolve_patch ($$) { return; } mark_found($self, $cur_want, $existing); - return next_step($self); # onto patch application + goto \&next_step; # onto patch application } elsif ($existing > 0) { - push @{$self->{todo}}, $want; - return next_step($self); # retry solve_existing + goto \&retry_current; } else { # $existing == 0: we may retry if inbox scan (below) fails delete $want->{try_gits}; } @@ -555,41 +613,13 @@ sub resolve_patch ($$) { # scan through inboxes to look for emails which results in # the oid we want: my $ibx = shift(@{$want->{try_ibxs}}) or die 'BUG: {try_ibxs} empty'; - if (my $diffs = find_extract_diffs($self, $ibx, $want)) { - unshift @{$self->{patches}}, @$diffs; - dbg($self, "found $cur_want in ". - join(" ||\n\t", map { di_url($self, $_) } @$diffs)); - - # good, we can find a path to the oid we $want, now - # lets see if we need to apply more patches: - my $di = $diffs->[0]; - my $src = $di->{oid_a}; - - unless ($src =~ /\A0+\z/) { - # we have to solve it using another oid, fine: - my $job = { oid_b => $src, path_b => $di->{path_a} }; - push @{$self->{todo}}, $job; - } - return next_step($self); # onto the next todo item - } - - if (scalar @{$want->{try_ibxs}}) { # do we have more inboxes to try? - push @{$self->{todo}}, $want; - return next_step($self); + if (my $msgs = find_smsgs($self, $ibx, $want)) { + $want->{try_smsgs} = $msgs; + $want->{cur_ibx} = $ibx; + $self->{tmp_diffs} = []; + goto \&retry_current; } - - if (length($cur_want) > $OID_MIN) { # maybe a shorter OID will work - delete $want->{try_ibxs}; # drop empty arrayref - chop($cur_want); - dbg($self, "retrying $want->{oid_b} as $cur_want"); - $want->{oid_b} = $cur_want; - push @{$self->{todo}}, $want; - return next_step($self); # retry with shorter abbrev - } - - dbg($self, "could not find $cur_want"); - eval { done($self, undef) }; - die "E: $@" if $@; + goto \&try_harder; } # this API is designed to avoid creating self-referential structures;
Like the rest of the WWW code, public-inbox-httpd now uses git_async_cat to retrieve blobs without blocking the event loop. This improves fairness when git blobs are on slow storage and allows us to take better advantage of SMP systems. --- lib/PublicInbox/SolverGit.pm | 85 +++++++++++++++++++++++------------- 1 file changed, 54 insertions(+), 31 deletions(-) diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm index c54d6d54..ae3997ca 100644 --- a/lib/PublicInbox/SolverGit.pm +++ b/lib/PublicInbox/SolverGit.pm @@ -16,6 +16,8 @@ use PublicInbox::Git qw(git_unquote git_quote); use PublicInbox::MsgIter qw(msg_part_text); use PublicInbox::Qspawn; use PublicInbox::Tmpfile; +use PublicInbox::GitAsyncCat; +use PublicInbox::Eml; use URI::Escape qw(uri_escape_utf8); # POSIX requires _POSIX_ARG_MAX >= 4096, and xargs is required to @@ -540,6 +542,47 @@ sub try_harder { die "E: $@" if $@; } +sub extract_diffs_done { + my ($self, $want) = @_; + + delete $want->{try_smsgs}; + delete $want->{cur_ibx}; + + my $diffs = delete $self->{tmp_diffs}; + if (scalar @$diffs) { + unshift @{$self->{patches}}, @$diffs; + dbg($self, "found $want->{oid_b} in " . join(" ||\n\t", + map { di_url($self, $_) } @$diffs)); + + # good, we can find a path to the oid we $want, now + # lets see if we need to apply more patches: + my $di = $diffs->[0]; + my $src = $di->{oid_a}; + + unless ($src =~ /\A0+\z/) { + # we have to solve it using another oid, fine: + my $job = { oid_b => $src, path_b => $di->{path_a} }; + push @{$self->{todo}}, $job; + } + goto \&next_step; # onto the next todo item + } + goto \&try_harder; +} + +sub extract_diff_async { + my ($bref, $oid, $type, $size, $x) = @_; + my ($self, $want, $smsg) = @$x; + if (defined($oid)) { + $smsg->{blob} eq $oid or + ERR($self, "BUG: $smsg->{blob} != $oid"); + PublicInbox::Eml->new($bref)->each_part(\&extract_diff, $x, 1); + } + + scalar(@{$want->{try_smsgs}}) ? + retry_current($self, $want) : + extract_diffs_done($self, $want); +} + sub resolve_patch ($$) { my ($self, $want) = @_; @@ -550,39 +593,19 @@ sub resolve_patch ($$) { if (my $msgs = $want->{try_smsgs}) { my $smsg = shift @$msgs; - if (my $eml = $want->{cur_ibx}->smsg_eml($smsg)) { - $eml->each_part(\&extract_diff, - [ $self, $want, $smsg ], 1); - } - - # try the remaining smsgs later - goto \&retry_current if scalar @$msgs; - - delete $want->{try_smsgs}; - delete $want->{cur_ibx}; - - my $diffs = delete $self->{tmp_diffs}; - if (scalar @$diffs) { - unshift @{$self->{patches}}, @$diffs; - dbg($self, "found $cur_want in " . join(" ||\n\t", - map { di_url($self, $_) } @$diffs)); - - # good, we can find a path to the oid we $want, now - # lets see if we need to apply more patches: - my $di = $diffs->[0]; - my $src = $di->{oid_a}; - - unless ($src =~ /\A0+\z/) { - # we have to solve it using another oid, fine: - my $job = { - oid_b => $src, - path_b => $di->{path_a}, - }; - push @{$self->{todo}}, $job; + if ($self->{psgi_env}->{'pi-httpd.async'}) { + return git_async_cat($want->{cur_ibx}->git, + $smsg->{blob}, + \&extract_diff_async, + [$self, $want, $smsg]); + } else { + if (my $eml = $want->{cur_ibx}->smsg_eml($smsg)) { + $eml->each_part(\&extract_diff, + [ $self, $want, $smsg ], 1); } - goto \&next_step; # onto the next todo item } - goto \&try_harder; + + goto(scalar @$msgs ? \&retry_current : \&extract_diffs_done); } # see if we can find the blob in an existing git repo: