unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH 0/3] ds: event loop-related fixes
@ 2023-11-25 20:54 Eric Wong
  2023-11-25 20:54 ` [PATCH 1/3] http: fix pipelining during long async requests Eric Wong
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Eric Wong @ 2023-11-25 20:54 UTC (permalink / raw)
  To: meta

Eric Wong (3):
  http: fix HTTP/1.1 pipelining during long async requests
  select+poll: have caller retry on EINTR
  ds: long_step: eliminate redundant fileno call

 lib/PublicInbox/DS.pm     |  1 -
 lib/PublicInbox/DSPoll.pm |  6 ++--
 lib/PublicInbox/HTTP.pm   | 17 +++++-----
 lib/PublicInbox/Select.pm |  6 ++--
 xt/httpd-async-stream.t   | 68 +++++++++++++++++++++++++++++----------
 5 files changed, 65 insertions(+), 33 deletions(-)

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/3] http: fix pipelining during long async requests
  2023-11-25 20:54 [PATCH 0/3] ds: event loop-related fixes Eric Wong
@ 2023-11-25 20:54 ` Eric Wong
  2023-11-25 20:54 ` [PATCH 2/3] select+poll: have caller retry on EINTR Eric Wong
  2023-11-25 20:54 ` [PATCH 3/3] ds: long_step: eliminate redundant fileno call Eric Wong
  2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2023-11-25 20:54 UTC (permalink / raw)
  To: meta

We must not attempt to read request bodies from the HTTP client
while processing a long request since that drains pipelined
requests.  The NNTP/IMAP/POP3 event_step callbacks follow the
same behavior when {long_cb} is present from ->long_response.

This bug has little real-world consequence since HTTP/1.1
pipelining is not widely-used, especially when behind varnish
or other reverse proxies.

I found this bug while randomly strace-ing an active -netd
process to see the kind of traffic it was seeing.
---
 lib/PublicInbox/HTTP.pm | 17 +++++------
 xt/httpd-async-stream.t | 68 ++++++++++++++++++++++++++++++-----------
 2 files changed, 59 insertions(+), 26 deletions(-)

diff --git a/lib/PublicInbox/HTTP.pm b/lib/PublicInbox/HTTP.pm
index 85991ae7..7162732e 100644
--- a/lib/PublicInbox/HTTP.pm
+++ b/lib/PublicInbox/HTTP.pm
@@ -76,7 +76,7 @@ sub new ($$$) {
 sub event_step { # called by PublicInbox::DS
 	my ($self) = @_;
 	local $SIG{__WARN__} = $self->{srv_env}->{'pi-httpd.warn_cb'};
-	return unless $self->flush_write && $self->{sock};
+	return unless $self->flush_write && $self->{sock} && !$self->{forward};
 
 	# only read more requests if we've drained the write buffer,
 	# otherwise we can be buffering infinitely w/o backpressure
@@ -230,6 +230,13 @@ sub identity_write ($$) {
 
 sub response_done {
 	my ($self, $alive) = @_;
+	if (my $forward = delete $self->{forward}) { # avoid recursion
+		eval { $forward->close };
+		if ($@) {
+			warn "response forward->close error: $@";
+			return $self->close; # idempotent
+		}
+	}
 	delete $self->{env}; # we're no longer busy
 	# HEAD requests set $alive = 3 so we don't send "0\r\n\r\n";
 	$self->write(\"0\r\n\r\n") if $alive == 2;
@@ -268,14 +275,6 @@ sub getline_pull {
 		warn "response ->getline error: $@";
 		$self->close;
 	}
-	# avoid recursion
-	if (delete $self->{forward}) {
-		eval { $forward->close };
-		if ($@) {
-			warn "response ->close error: $@";
-			$self->close; # idempotent
-		}
-	}
 	response_done($self, delete $self->{alive});
 }
 
diff --git a/xt/httpd-async-stream.t b/xt/httpd-async-stream.t
index 099ceb79..21d09331 100644
--- a/xt/httpd-async-stream.t
+++ b/xt/httpd-async-stream.t
@@ -2,8 +2,10 @@
 # Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 # Expensive test to validate compression and TLS.
-use strict;
-use v5.10.1;
+use v5.12;
+use autodie;
+use PublicInbox::IO qw(write_file);
+use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
 use PublicInbox::TestCommon;
 use PublicInbox::DS qw(now);
 use PublicInbox::Spawn qw(popen_rd);
@@ -23,20 +25,15 @@ diag "TEST_JOBS=$JOBS TEST_ENDPOINT=$endpoint TEST_CURL_OPT=$curl_opt";
 my @CURL_OPT = (qw(-HHost:example.com -sSf), split(' ', $curl_opt));
 
 my $make_local_server = sub {
+	my ($http) = @_;
 	my $pi_config = "$tmpdir/config";
-	open my $fh, '>', $pi_config or die "open($pi_config): $!";
-	print $fh <<"" or die "print $pi_config: $!";
+	write_file '>', $pi_config, <<"";
 [publicinbox "test"]
 inboxdir = $inboxdir
 address = test\@example.com
 
-	close $fh or die "close($pi_config): $!";
 	my ($out, $err) = ("$tmpdir/out", "$tmpdir/err");
-	for ($out, $err) {
-		open my $fh, '>', $_ or die "truncate: $!";
-	}
-	my $http = tcp_server();
-	my $rdr = { 3 => $http };
+	for ($out, $err) { open my $fh, '>', $_ }
 
 	# not using multiple workers, here, since we want to increase
 	# the chance of tripping concurrency bugs within PublicInbox/HTTP*.pm
@@ -46,10 +43,22 @@ address = test\@example.com
 	my $url = "$host_port/test/$endpoint";
 	print STDERR "# CMD ". join(' ', @$cmd). "\n";
 	my $env = { PI_CONFIG => $pi_config };
-	(start_script($cmd, $env, $rdr), $url);
+	(start_script($cmd, $env, { 3 => $http }), $url)
 };
 
-my ($td, $url) = $make_local_server->();
+my ($td, $url) = $make_local_server->(my $http = tcp_server());
+
+my $s1 = tcp_connect($http);
+my $rbuf = do { # pipeline while reading long response
+	my $req = <<EOM;
+GET /test/$endpoint HTTP/1.1\r
+Host: example.com\r
+\r
+EOM
+	is syswrite($s1, $req), length($req), 'initial long req';
+	<$s1>;
+};
+like $rbuf, qr!\AHTTP/1\.1 200\b!, 'started reading 200 response';
 
 my $do_get_all = sub {
 	my ($job) = @_;
@@ -74,16 +83,16 @@ my $do_get_all = sub {
 
 my (%pids, %res);
 for my $job (1..$JOBS) {
-	pipe(my ($r, $w)) or die;
+	pipe(my $r, my $w);
 	my $pid = fork;
 	if ($pid == 0) {
-		close $r or die;
+		close $r;
 		my $res = $do_get_all->($job);
-		print $w $res or die;
-		close $w or die;
+		print $w $res;
+		close $w;
 		_exit(0);
 	}
-	close $w or die;
+	close $w;
 	$pids{$pid} = [ $job, $r ];
 }
 
@@ -96,6 +105,31 @@ while (scalar keys %pids) {
 	push @{$res{$sum}}, $job;
 }
 is(scalar keys %res, 1, 'all got the same result');
+{
+	my $req = <<EOM;
+GET /test/manifest.js.gz HTTP/1.1\r
+Host: example.com\r
+Connection: close\r
+\r
+EOM
+	is syswrite($s1, $req), length($req),
+		'pipeline another request while reading long response';
+	diag 'reading remainder of slow response';
+	my $res = do { local $/ = "\r\n\r\n"; <$s1> };
+	like $res, qr/^Transfer-Encoding: chunked\r\n/sm, 'chunked response';
+	{
+		local $/ = "\r\n"; # get to final chunk
+		while (defined(my $l = <$s1>)) { last if $l eq "0\r\n" }
+	};
+	is scalar(readline($s1)), "\r\n", 'got final CRLF from 1st response';
+	diag "second response:";
+	$res = do { local $/ = "\r\n\r\n"; <$s1> };
+	like $res, qr!\AHTTP/1\.1 200 !, 'response for pipelined req';
+	gunzip($s1 => \my $json) or xbail "gunzip $GunzipError";
+	my $m = PublicInbox::Config::json()->decode($json);
+	like $m->{'/test'}->{fingerprint}, qr/\A[0-9a-f]{40,}\z/,
+		'acceptable fingerprint in response';
+}
 $td->kill;
 $td->join;
 is($?, 0, 'no error on -httpd exit');

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/3] select+poll: have caller retry on EINTR
  2023-11-25 20:54 [PATCH 0/3] ds: event loop-related fixes Eric Wong
  2023-11-25 20:54 ` [PATCH 1/3] http: fix pipelining during long async requests Eric Wong
@ 2023-11-25 20:54 ` Eric Wong
  2023-11-25 20:54 ` [PATCH 3/3] ds: long_step: eliminate redundant fileno call Eric Wong
  2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2023-11-25 20:54 UTC (permalink / raw)
  To: meta

We can't assume signals are blocked when neither signalfd nor
EVFILT_SIGNAL are in use.  So just return an empty result so
the caller can recalculate the timeout.

I found this bug while making xt/httpd-async-stream.t
use our event loop to reap processes but have abandoned
that effort for now since it didn't save any code.
---
 lib/PublicInbox/DSPoll.pm | 6 ++----
 lib/PublicInbox/Select.pm | 6 ++++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib/PublicInbox/DSPoll.pm b/lib/PublicInbox/DSPoll.pm
index b947f756..a7055ec9 100644
--- a/lib/PublicInbox/DSPoll.pm
+++ b/lib/PublicInbox/DSPoll.pm
@@ -26,11 +26,9 @@ sub ep_wait {
 		push(@pset, $fd, $pevents);
 	}
 	@$events = ();
-	do {
-		$n = IO::Poll::_poll($timeout_msec, @pset);
-	} while ($n < 0 && $! == Errno::EINTR);
+	$n = IO::Poll::_poll($timeout_msec, @pset) or return; # timeout expired
+	return if $n < 0 && $! == Errno::EINTR; # caller recalculates timeout
 	die "poll: $!" if $n < 0;
-	return if $n == 0;
 	while (defined($fd = shift @pset)) {
 		$revents = shift @pset or next; # no event
 		if ($revents & POLLNVAL) {
diff --git a/lib/PublicInbox/Select.pm b/lib/PublicInbox/Select.pm
index 5cb7aff3..face8edc 100644
--- a/lib/PublicInbox/Select.pm
+++ b/lib/PublicInbox/Select.pm
@@ -8,6 +8,7 @@
 package PublicInbox::Select;
 use v5.12;
 use PublicInbox::Syscall qw(EPOLLONESHOT EPOLLIN EPOLLOUT);
+use Errno;
 
 sub new { bless {}, __PACKAGE__ } # fd => events
 
@@ -19,8 +20,9 @@ sub ep_wait {
 		vec($wvec, $fd, 1) = 1 if $ev & EPOLLOUT;
 	}
 	@$events = ();
-	my $n = select($rvec, $wvec, undef, $msec < 0 ? undef : ($msec/1000));
-	return if $n == 0;
+	my $to = $msec < 0 ? undef : ($msec/1000);
+	my $n = select $rvec, $wvec, undef, $to or return; # timeout expired
+	return if $n < 0 && $! == Errno::EINTR; # caller recalculates timeout
 	die "select: $!" if $n < 0;
 	while (my ($fd, $ev) = each %$self) {
 		if (vec($rvec, $fd, 1) || vec($wvec, $fd, 1)) {

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 3/3] ds: long_step: eliminate redundant fileno call
  2023-11-25 20:54 [PATCH 0/3] ds: event loop-related fixes Eric Wong
  2023-11-25 20:54 ` [PATCH 1/3] http: fix pipelining during long async requests Eric Wong
  2023-11-25 20:54 ` [PATCH 2/3] select+poll: have caller retry on EINTR Eric Wong
@ 2023-11-25 20:54 ` Eric Wong
  2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2023-11-25 20:54 UTC (permalink / raw)
  To: meta

We already stash the associated FD for reporting at startup and
don't need to call `fileno' again.  Found via manual code
inspection while considering the effort to make async {forward}
from PublicInbox::HTTP more like the generic long_response API
and {long_cb} field used by IMAP/NNTP/POP3.
---
 lib/PublicInbox/DS.pm | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/PublicInbox/DS.pm b/lib/PublicInbox/DS.pm
index 4c8b502f..8bc8cfb7 100644
--- a/lib/PublicInbox/DS.pm
+++ b/lib/PublicInbox/DS.pm
@@ -629,7 +629,6 @@ sub long_step {
 		delete $self->{long_cb};
 		$self->long_response_done;
 		my $elapsed = now() - $t0;
-		my $fd = fileno($self->{sock});
 		$self->out(" deferred[$fd] done - %0.6f", $elapsed);
 		my $wbuf = $self->{wbuf}; # do NOT autovivify
 		requeue($self) unless $wbuf && @$wbuf;

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-11-25 20:54 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-25 20:54 [PATCH 0/3] ds: event loop-related fixes Eric Wong
2023-11-25 20:54 ` [PATCH 1/3] http: fix pipelining during long async requests Eric Wong
2023-11-25 20:54 ` [PATCH 2/3] select+poll: have caller retry on EINTR Eric Wong
2023-11-25 20:54 ` [PATCH 3/3] ds: long_step: eliminate redundant fileno call Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).