unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH] www: fix unindexed v1 inboxes w/ public-inbox-httpd
@ 2022-08-26 10:15 Eric Wong
  0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2022-08-26 10:15 UTC (permalink / raw)
  To: meta

Unindexed v1 inboxes were leaving $smsg objects unpopulated when
using public-inbox-httpd (but not generic PSGI servers) and
causing missing HTML content and uninitialized value warnings.

Our existing tests for unindexed v1 inboxes only assumed generic
PSGI servers and synchronous blob retrieval.  Due to changes
several years ago to make git blob retrieval async for slow
storage using public-inbox-httpd, our tests were insufficient to
detect this regression.

So ensure $smsg->populate runs in a few places and rewrite
t/plack.t to test against both generic PSGI and -httpd
implementations.

Fortunately, unindexed v1 inboxes are uncommon, and this
bug was only (finally) discovered while developing other
features.

For ensuring we can test (and not blindly follow) redirects with
-httpd, we now provide our own LWP::UserAgent (used internally
by Plack::Test::ExternalServer) with redirect following
disabled to P:T:ES::test_psgi.
---
 lib/PublicInbox/Feed.pm          |   5 +-
 lib/PublicInbox/TestCommon.pm    |   8 +-
 lib/PublicInbox/WwwAtomStream.pm |   1 +
 t/plack.t                        | 177 ++++++++++++-------------------
 4 files changed, 80 insertions(+), 111 deletions(-)

diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index ee579f6d..e0810420 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -51,7 +51,10 @@ sub new_html_i {
 	my ($ctx, $eml) = @_;
 	$ctx->zmore($ctx->html_top) if exists $ctx->{-html_tip};
 
-	$eml and return PublicInbox::View::eml_entry($ctx, $eml);
+	if ($eml) {
+		$ctx->{smsg}->populate($eml) if !$ctx->{ibx}->{over};
+		return PublicInbox::View::eml_entry($ctx, $eml);
+	}
 	my $smsg = shift @{$ctx->{msgs}} or
 		$ctx->zmore(PublicInbox::View::pagination_footer(
 						$ctx, './new.html'));
diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm
index 04adede0..55d82fc0 100644
--- a/lib/PublicInbox/TestCommon.pm
+++ b/lib/PublicInbox/TestCommon.pm
@@ -740,14 +740,18 @@ sub test_httpd ($$;$) {
 		$env->{$_} or BAIL_OUT "$_ unset";
 	}
 	SKIP: {
-		require_mods(qw(Plack::Test::ExternalServer), $skip // 1);
+		require_mods(qw(Plack::Test::ExternalServer LWP::UserAgent),
+				$skip // 1);
 		my $sock = tcp_server() or die;
 		my ($out, $err) = map { "$env->{TMPDIR}/std$_.log" } qw(out err);
 		my $cmd = [ qw(-httpd -W0), "--stdout=$out", "--stderr=$err" ];
 		my $td = start_script($cmd, $env, { 3 => $sock });
 		my ($h, $p) = tcp_host_port($sock);
 		local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = "http://$h:$p";
-		Plack::Test::ExternalServer::test_psgi(client => $client);
+		my $ua = LWP::UserAgent->new;
+		$ua->max_redirect(0);
+		Plack::Test::ExternalServer::test_psgi(client => $client,
+							ua => $ua);
 		$td->join('TERM');
 		open my $fh, '<', $err or BAIL_OUT $!;
 		my $e = do { local $/; <$fh> };
diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm
index 82895db6..7b7047ac 100644
--- a/lib/PublicInbox/WwwAtomStream.pm
+++ b/lib/PublicInbox/WwwAtomStream.pm
@@ -38,6 +38,7 @@ sub async_next ($) {
 sub async_eml { # for async_blob_cb
 	my ($ctx, $eml) = @_;
 	my $smsg = delete $ctx->{smsg};
+	$smsg->{mid} // $smsg->populate($eml);
 	$ctx->write(feed_entry($ctx, $smsg, $eml));
 }
 
diff --git a/t/plack.t b/t/plack.t
index a5fd54c9..20f5d8d5 100644
--- a/t/plack.t
+++ b/t/plack.t
@@ -9,6 +9,7 @@ my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape);
 require_mods(@mods);
 foreach my $mod (@mods) { use_ok $mod; }
 ok(-f $psgi, "psgi example file found");
+my ($tmpdir, $for_destroy) = tmpdir();
 my $pfx = 'http://example.com/test';
 my $eml = eml_load('t/iso-2202-jp.eml');
 # ensure successful message deliveries
@@ -71,91 +72,74 @@ EOF
 	close $fh or BAIL_OUT "close: $!";
 });
 
-local $ENV{PI_CONFIG} = "$ibx->{inboxdir}/pi_config";
-my $app = require $psgi;
-test_psgi($app, sub {
+my $env = { PI_CONFIG => "$ibx->{inboxdir}/pi_config", TMPDIR => $tmpdir };
+local @ENV{keys %$env} = values %$env;
+my $c1 = sub {
 	my ($cb) = @_;
+	my $uri = $ENV{PLACK_TEST_EXTERNALSERVER_URI} // 'http://example.com';
+	$pfx = "$uri/test";
+
 	foreach my $u (qw(robots.txt favicon.ico .well-known/foo)) {
-		my $res = $cb->(GET("http://example.com/$u"));
+		my $res = $cb->(GET("$uri/$u"));
 		is($res->code, 404, "$u is missing");
 	}
-});
 
-test_psgi($app, sub {
-	my ($cb) = @_;
-	my $res = $cb->(GET('http://example.com/test/crlf@example.com/'));
+	my $res = $cb->(GET("$uri/test/crlf\@example.com/"));
 	is($res->code, 200, 'retrieved CRLF as HTML');
 	like($res->content, qr/mailto:me\@example/, 'no %40, per RFC 6068');
 	unlike($res->content, qr/\r/, 'no CR in HTML');
-	$res = $cb->(GET('http://example.com/test/crlf@example.com/raw'));
+	$res = $cb->(GET("$uri/test/crlf\@example.com/raw"));
 	is($res->code, 200, 'retrieved CRLF raw');
 	like($res->content, qr/\r/, 'CR preserved in raw message');
-	$res = $cb->(GET('http://example.com/test/bogus@example.com/raw'));
+	$res = $cb->(GET("$uri/test/bogus\@example.com/raw"));
 	is($res->code, 404, 'missing /raw is 404');
-});
 
-# redirect with newsgroup
-test_psgi($app, sub {
-	my ($cb) = @_;
-	my $from = 'http://example.com/inbox.test';
-	my $to = 'http://example.com/test/';
-	my $res = $cb->(GET($from));
+	# redirect with newsgroup
+	my $from = "$uri/inbox.test";
+	my $to = "http://example.com/test/";
+	$res = $cb->(GET($from));
 	is($res->code, 301, 'newsgroup name is permanent redirect');
 	is($to, $res->header('Location'), 'redirect location matches');
 	$from .= '/';
 	is($res->code, 301, 'newsgroup name/ is permanent redirect');
 	is($to, $res->header('Location'), 'redirect location matches');
-});
 
-# redirect with trailing /
-test_psgi($app, sub {
-	my ($cb) = @_;
-	my $from = 'http://example.com/test';
-	my $to = "$from/";
-	my $res = $cb->(GET($from));
+	# redirect with trailing /
+	$from = "$uri/test";
+	$to = "$from/";
+	$res = $cb->(GET($from));
 	is(301, $res->code, 'is permanent redirect');
 	is($to, $res->header('Location'),
 		'redirect location matches with trailing slash');
-});
 
-foreach my $t (qw(t T)) {
-	test_psgi($app, sub {
-		my ($cb) = @_;
+	for my $t (qw(T t)) {
 		my $u = $pfx . "/blah\@example.com/$t";
-		my $res = $cb->(GET($u));
+		$res = $cb->(GET($u));
 		is(301, $res->code, "redirect for missing /");
 		my $location = $res->header('Location');
 		like($location, qr!/\Q$t\E/#u\z!,
 			'redirected with missing /');
-	});
-}
-foreach my $t (qw(f)) {
-	test_psgi($app, sub {
-		my ($cb) = @_;
+	}
+
+	for my $t (qw(f)) { # legacy redirect
 		my $u = $pfx . "/blah\@example.com/$t";
-		my $res = $cb->(GET($u));
+		$res = $cb->(GET($u));
 		is(301, $res->code, "redirect for legacy /f");
 		my $location = $res->header('Location');
 		like($location, qr!/blah\@example\.com/\z!,
 			'redirected with missing /');
-	});
-}
+	}
 
-test_psgi($app, sub {
-	my ($cb) = @_;
-	my $atomurl = 'http://example.com/test/new.atom';
-	my $res = $cb->(GET('http://example.com/test/new.html'));
+	my $atomurl = "$uri/test/new.atom";
+	$res = $cb->(GET("$uri/test/new.html"));
 	is(200, $res->code, 'success response received');
 	like($res->content, qr!href="new\.atom"!,
 		'atom URL generated');
 	like($res->content, qr!href="blah\@example\.com/"!,
 		'index generated');
 	like($res->content, qr!1993-10-02!, 'date set');
-});
 
-test_psgi($app, sub {
-	my ($cb) = @_;
-	my $res = $cb->(GET($pfx . '/atom.xml'));
+	$res = $cb->(GET($pfx . '/atom.xml'));
 	is(200, $res->code, 'success response received for atom');
 	my $body = $res->content;
 	like($body, qr!link\s+href="\Q$pfx\E/blah\@example\.com/"!s,
@@ -165,12 +149,9 @@ test_psgi($app, sub {
 	like($body, qr/zzzzzz/, 'body included');
 	$res = $cb->(GET($pfx . '/description'));
 	like($res->content, qr/test for public-inbox/, 'got description');
-});
 
-test_psgi($app, sub {
-	my ($cb) = @_;
 	my $path = '/blah@example.com/';
-	my $res = $cb->(GET($pfx . $path));
+	$res = $cb->(GET($pfx . $path));
 	is(200, $res->code, "success for $path");
 	my $html = $res->content;
 	like($html, qr!<title>hihi - Me</title>!, 'HTML returned');
@@ -196,11 +177,9 @@ test_psgi($app, sub {
 
 	$res = $cb->(GET($pfx . '/qp@example.com/'));
 	like($res->content, qr/\bhi = bye\b/, "HTML output decoded QP");
-});
 
-test_psgi($app, sub {
-	my ($cb) = @_;
-	my $res = $cb->(GET($pfx . '/blah@example.com/raw'));
+
+	$res = $cb->(GET($pfx . '/blah@example.com/raw'));
 	is(200, $res->code, 'success response received for /*/raw');
 	like($res->content, qr!^From !sm, "mbox returned");
 	is($res->header('Content-Type'), 'text/plain; charset=iso-8859-1',
@@ -213,75 +192,62 @@ test_psgi($app, sub {
 	$res = $cb->(GET($pfx . '/199707281508.AAA24167@hoyogw.example/raw'));
 	is($res->header('Content-Type'), 'text/plain; charset=ISO-2022-JP',
 		'ISO-2002-JP returned');
-	chomp(my $body = $res->content);
+	chomp($body = $res->content);
 	my $raw = PublicInbox::Eml->new(\$body);
 	is($raw->body_raw, $eml->body_raw, 'ISO-2022-JP body unmodified');
 
 	$res = $cb->(GET($pfx . '/blah@example.com/t.mbox.gz'));
 	is(501, $res->code, '501 when overview missing');
 	like($res->content, qr!\bOverview\b!, 'overview omission noted');
-});
 
-# legacy redirects
-foreach my $t (qw(m f)) {
-	test_psgi($app, sub {
-		my ($cb) = @_;
-		my $res = $cb->(GET($pfx . "/$t/blah\@example.com.txt"));
+	# legacy redirects
+	for my $t (qw(m f)) {
+		$res = $cb->(GET($pfx . "/$t/blah\@example.com.txt"));
 		is(301, $res->code, "redirect for old $t .txt link");
-		my $location = $res->header('Location');
+		$location = $res->header('Location');
 		like($location, qr!/blah\@example\.com/raw\z!,
 			".txt redirected to /raw");
-	});
-}
-
-my %umap = (
-	'm' => '',
-	'f' => '',
-	't' => 't/',
-);
-while (my ($t, $e) = each %umap) {
-	test_psgi($app, sub {
-		my ($cb) = @_;
-		my $res = $cb->(GET($pfx . "/$t/blah\@example.com.html"));
+	}
+
+	my %umap = (
+		'm' => '',
+		'f' => '',
+		't' => 't/',
+	);
+	while (my ($t, $e) = each %umap) {
+		$res = $cb->(GET($pfx . "/$t/blah\@example.com.html"));
 		is(301, $res->code, "redirect for old $t .html link");
-		my $location = $res->header('Location');
-		like($location,
-			qr!/blah\@example\.com/$e(?:#u)?\z!,
-			".html redirected to new location");
-	});
-}
-foreach my $sfx (qw(mbox mbox.gz)) {
-	test_psgi($app, sub {
-		my ($cb) = @_;
-		my $res = $cb->(GET($pfx . "/t/blah\@example.com.$sfx"));
+		$location = $res->header('Location');
+		like($location, qr!/blah\@example\.com/$e(?:#u)?\z!,
+				".html redirected to new location");
+	}
+
+	for my $sfx (qw(mbox mbox.gz)) {
+		$res = $cb->(GET($pfx . "/t/blah\@example.com.$sfx"));
 		is(301, $res->code, 'redirect for old thread link');
-		my $location = $res->header('Location');
+		$location = $res->header('Location');
 		like($location,
 		     qr!/blah\@example\.com/t\.mbox(?:\.gz)?\z!,
 		     "$sfx redirected to /mbox.gz");
-	});
-}
-test_psgi($app, sub {
-	my ($cb) = @_;
+	}
+
 	# for a while, we used to support /$INBOX/$X40/
 	# when we "compressed" long Message-IDs to SHA-1
 	# Now we're stuck supporting them forever :<
-	foreach my $path ('f2912279bd7bcd8b7ab3033234942d58746d56f7') {
-		my $from = "http://example.com/test/$path/";
-		my $res = $cb->(GET($from));
+	for my $path ('f2912279bd7bcd8b7ab3033234942d58746d56f7') {
+		$from = "$uri/test/$path/";
+		$res = $cb->(GET($from));
 		is(301, $res->code, 'is permanent redirect');
 		like($res->header('Location'),
 			qr!/test/blah\@example\.com/!,
 			'redirect from x40 MIDs works');
 	}
-});
 
-# dumb HTTP clone/fetch support
-test_psgi($app, sub {
-	my ($cb) = @_;
-	my $path = '/test/info/refs';
+
+	# dumb HTTP clone/fetch support
+	$path = '/test/info/refs';
 	my $req = HTTP::Request->new('GET' => $path);
-	my $res = $cb->($req);
+	$res = $cb->($req);
 	is(200, $res->code, 'refs readable');
 	my $orig = $res->content;
 
@@ -294,19 +260,14 @@ test_psgi($app, sub {
 	$res = $cb->($req);
 	is(206, $res->code, 'got partial another response');
 	is($res->content, substr($orig, 5), 'partial body OK past end');
-});
 
-# things which should fail
-test_psgi($app, sub {
-	my ($cb) = @_;
 
-	my $res = $cb->(PUT('/'));
+	# things which should fail
+	$res = $cb->(PUT('/'));
 	is(405, $res->code, 'no PUT to / allowed');
 	$res = $cb->(PUT('/test/'));
 	is(405, $res->code, 'no PUT /$INBOX allowed');
-
-	# TODO
-	# $res = $cb->(GET('/'));
-});
-
-done_testing();
+};
+test_psgi(require $psgi, $c1);
+test_httpd($env, $c1);
+done_testing;

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2022-08-26 10:16 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-26 10:15 [PATCH] www: fix unindexed v1 inboxes w/ public-inbox-httpd Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).