unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 08/11] extmsg: prevent cross-inbox matches from hogging event loop
Date: Wed,  9 Sep 2020 06:26:15 +0000	[thread overview]
Message-ID: <20200909062618.5940-9-e@80x24.org> (raw)
In-Reply-To: <20200909062618.5940-1-e@80x24.org>

With many inboxes, checking multiple SQLite repos will be slow
and time-consuming, so ensure we can schedule it fairly between
multiple inboxes.
---
 lib/PublicInbox/ExtMsg.pm | 101 ++++++++++++++++++++++++++------------
 1 file changed, 70 insertions(+), 31 deletions(-)

diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm
index 929737f1..ce1a47bb 100644
--- a/lib/PublicInbox/ExtMsg.pm
+++ b/lib/PublicInbox/ExtMsg.pm
@@ -74,69 +74,106 @@ sub search_partial ($$) {
 }
 
 sub ext_msg_i {
-	my ($other, $cur, $mid, $ibxs, $found) = @_;
+	my ($other, $ctx) = @_;
 
-	return if $other->{name} eq $cur->{name} || !$other->base_url;
+	return if $other->{name} eq $ctx->{-inbox}->{name} || !$other->base_url;
 
 	my $mm = $other->mm or return;
 
 	# try to find the URL with Msgmap to avoid forking
-	my $num = $mm->num_for($mid);
+	my $num = $mm->num_for($ctx->{mid});
 	if (defined $num) {
-		push @$found, $other;
+		push @{$ctx->{found}}, $other;
 	} else {
 		# no point in trying the fork fallback if we
 		# know Xapian is up-to-date but missing the
 		# message in the current repo
-		push @$ibxs, $other;
+		push @{$ctx->{again}}, $other;
+	}
+}
+
+sub ext_msg_step {
+	my ($pi_cfg, $section, $ctx) = @_;
+	if (defined($section)) {
+		return if $section !~ m!\Apublicinbox\.([^/]+)\z!;
+		my $ibx = $pi_cfg->lookup_name($1) or return;
+		ext_msg_i($ibx, $ctx);
+	} else { # undef == "EOF"
+		finalize_exact($ctx);
 	}
 }
 
 sub ext_msg {
 	my ($ctx) = @_;
-	my $cur = $ctx->{-inbox};
-	my $mid = $ctx->{mid};
+	sub {
+		$ctx->{-wcb} = $_[0]; # HTTP server write callback
+
+		if ($ctx->{env}->{'pi-httpd.async'}) {
+			require PublicInbox::ConfigIter;
+			my $iter = PublicInbox::ConfigIter->new(
+						$ctx->{www}->{pi_config},
+						\&ext_msg_step, $ctx);
+			$iter->event_step;
+		} else {
+			$ctx->{www}->{pi_config}->each_inbox(\&ext_msg_i, $ctx);
+			finalize_exact($ctx);
+		}
+	};
+}
 
-	eval { require PublicInbox::Msgmap };
-	my $ibxs = [];
-	my $found = [];
+# called via PublicInbox::DS->EventLoop
+sub event_step {
+	my ($ctx, $sync) = @_;
+	# can't find a partial match in current inbox, try the others:
+	my $ibx = shift @{$ctx->{again}} or goto \&finalize_partial;
+	my $mids = search_partial($ibx, $ctx->{mid}) or
+			return ($sync ? undef : PublicInbox::DS::requeue($ctx));
+	$ctx->{n_partial} += scalar(@$mids);
+	push @{$ctx->{partial}}, [ $ibx, $mids ];
+	$ctx->{n_partial} >= PARTIAL_MAX ? goto(\&finalize_partial)
+			: ($sync ? undef : PublicInbox::DS::requeue($ctx));
+}
 
-	$ctx->{www}->{pi_config}->each_inbox(\&ext_msg_i,
-						$cur, $mid, $ibxs, $found);
+sub finalize_exact {
+	my ($ctx) = @_;
 
-	return exact($ctx, $found, $mid) if @$found;
+	return $ctx->{-wcb}->(exact($ctx)) if $ctx->{found};
 
 	# fall back to partial MID matching
-	my @partial;
-	my $n_partial = 0;
+	my $mid = $ctx->{mid};
+	my $cur = $ctx->{-inbox};
 	my $mids = search_partial($cur, $mid);
 	if ($mids) {
-		$n_partial = scalar(@$mids);
-		push @partial, [ $cur, $mids ];
-	}
-
-	# can't find a partial match in current inbox, try the others:
-	if (!$n_partial && length($mid) >= $MIN_PARTIAL_LEN) {
-		foreach my $ibx (@$ibxs) {
-			$mids = search_partial($ibx, $mid) or next;
-			$n_partial += scalar(@$mids);
-			push @partial, [ $ibx, $mids];
-			last if $n_partial >= PARTIAL_MAX;
+		$ctx->{n_partial} = scalar(@$mids);
+		push @{$ctx->{partial}}, [ $cur, $mids ];
+	} elsif ($ctx->{again} && length($mid) >= $MIN_PARTIAL_LEN) {
+		bless $ctx, __PACKAGE__;
+		if ($ctx->{env}->{'pi-httpd.async'}) {
+			$ctx->event_step;
+			return;
 		}
+
+		# synchronous fall-through
+		$ctx->event_step while @{$ctx->{again}};
 	}
+	goto \&finalize_partial;
+}
 
+sub finalize_partial {
+	my ($ctx) = @_;
+	my $mid = $ctx->{mid};
 	my $code = 404;
 	my $href = mid_href($mid);
 	my $html = ascii_html($mid);
 	my $title = "&lt;$html&gt; not found";
 	my $s = "<pre>Message-ID &lt;$html&gt;\nnot found\n";
-	if ($n_partial) {
+	if (my $n_partial = $ctx->{n_partial}) {
 		$code = 300;
 		my $es = $n_partial == 1 ? '' : 'es';
 		$n_partial .= '+' if ($n_partial == PARTIAL_MAX);
 		$s .= "\n$n_partial partial match$es found:\n\n";
-		my $cur_name = $cur->{name};
-		foreach my $pair (@partial) {
+		my $cur_name = $ctx->{-inbox}->{name};
+		foreach my $pair (@{$ctx->{partial}}) {
 			my ($ibx, $res) = @$pair;
 			my $env = $ctx->{env} if $ibx->{name} eq $cur_name;
 			my $u = $ibx->base_url($env) or next;
@@ -155,7 +192,7 @@ sub ext_msg {
 	$ctx->{-html_tip} = $s .= '</pre>';
 	$ctx->{-title_html} = $title;
 	$ctx->{-upfx} = '../';
-	html_oneshot($ctx, $code);
+	$ctx->{-wcb}->(html_oneshot($ctx, $code));
 }
 
 sub ext_urls {
@@ -177,7 +214,9 @@ sub ext_urls {
 }
 
 sub exact {
-	my ($ctx, $found, $mid) = @_;
+	my ($ctx) = @_;
+	my $mid = $ctx->{mid};
+	my $found = $ctx->{found};
 	my $href = mid_href($mid);
 	my $html = ascii_html($mid);
 	my $title = "&lt;$html&gt; found in ";

  parent reply	other threads:[~2020-09-09  6:26 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-09  6:26 [PATCH 00/11] httpd: further reduce event loop monopolization Eric Wong
2020-09-09  6:26 ` [PATCH 01/11] xt/solver: test with public-inbox-httpd, too Eric Wong
2020-09-09  6:26 ` [PATCH 02/11] solver: drop warnings, modernize use v5.10.1, use SEEK_SET Eric Wong
2020-09-09  6:26 ` [PATCH 03/11] use "\&" where possible when referring to subroutines Eric Wong
2020-09-09  6:26 ` [PATCH 04/11] www: manifest.js.gz generation no longer hogs event loop Eric Wong
2020-09-09  6:26 ` [PATCH 05/11] config: flatten each_inbox and iterate_start args Eric Wong
2020-09-09  6:26 ` [PATCH 06/11] config: split out iterator into separate object Eric Wong
2020-09-09  6:26 ` [PATCH 07/11] t/cgi.t: show stderr on failures Eric Wong
2020-09-09  6:26 ` Eric Wong [this message]
2020-09-09  6:26 ` [PATCH 09/11] wwwlisting: avoid hogging event loop Eric Wong
2020-09-09  6:26 ` [PATCH 10/11] solver: check one git coderepo and inbox at a time Eric Wong
2020-09-09  6:26 ` [PATCH 11/11] solver: break apart inbox blob retrieval Eric Wong
2020-09-10  1:51 ` [PATCH 12/11] solver: async blob retrieval for diff extraction Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200909062618.5940-9-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).