From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF, T_SCC_BODY_TEXT_LINE shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 121FD1F54E for ; Fri, 26 Aug 2022 10:16:38 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1661508998; bh=+UGuKsb3QsJyctSGMvVU81uJcu82PnPWgJEhFWC4XW4=; h=From:To:Subject:Date:From; b=3o6S8lk4qXyDFX2j95HGPQl7KiIo5o0LKSitEHZGeyDn3trUdTPQV0ijwm+jvB/mi nCsb0R3bxEgHJ4L8FabzvO4UDyJFBERMBzrJ51Om5gWFSgzyz6Y/mP6ZZjIj1zL9QG vnZOuVvx+NFXkwhFJMb/jqwfoaZd1oV+ZcD7voss= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] www: fix unindexed v1 inboxes w/ public-inbox-httpd Date: Fri, 26 Aug 2022 10:15:45 +0000 Message-Id: <20220826101545.1204956-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Unindexed v1 inboxes were leaving $smsg objects unpopulated when using public-inbox-httpd (but not generic PSGI servers) and causing missing HTML content and uninitialized value warnings. Our existing tests for unindexed v1 inboxes only assumed generic PSGI servers and synchronous blob retrieval. Due to changes several years ago to make git blob retrieval async for slow storage using public-inbox-httpd, our tests were insufficient to detect this regression. So ensure $smsg->populate runs in a few places and rewrite t/plack.t to test against both generic PSGI and -httpd implementations. Fortunately, unindexed v1 inboxes are uncommon, and this bug was only (finally) discovered while developing other features. For ensuring we can test (and not blindly follow) redirects with -httpd, we now provide our own LWP::UserAgent (used internally by Plack::Test::ExternalServer) with redirect following disabled to P:T:ES::test_psgi. --- lib/PublicInbox/Feed.pm | 5 +- lib/PublicInbox/TestCommon.pm | 8 +- lib/PublicInbox/WwwAtomStream.pm | 1 + t/plack.t | 177 ++++++++++++------------------- 4 files changed, 80 insertions(+), 111 deletions(-) diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm index ee579f6d..e0810420 100644 --- a/lib/PublicInbox/Feed.pm +++ b/lib/PublicInbox/Feed.pm @@ -51,7 +51,10 @@ sub new_html_i { my ($ctx, $eml) = @_; $ctx->zmore($ctx->html_top) if exists $ctx->{-html_tip}; - $eml and return PublicInbox::View::eml_entry($ctx, $eml); + if ($eml) { + $ctx->{smsg}->populate($eml) if !$ctx->{ibx}->{over}; + return PublicInbox::View::eml_entry($ctx, $eml); + } my $smsg = shift @{$ctx->{msgs}} or $ctx->zmore(PublicInbox::View::pagination_footer( $ctx, './new.html')); diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm index 04adede0..55d82fc0 100644 --- a/lib/PublicInbox/TestCommon.pm +++ b/lib/PublicInbox/TestCommon.pm @@ -740,14 +740,18 @@ sub test_httpd ($$;$) { $env->{$_} or BAIL_OUT "$_ unset"; } SKIP: { - require_mods(qw(Plack::Test::ExternalServer), $skip // 1); + require_mods(qw(Plack::Test::ExternalServer LWP::UserAgent), + $skip // 1); my $sock = tcp_server() or die; my ($out, $err) = map { "$env->{TMPDIR}/std$_.log" } qw(out err); my $cmd = [ qw(-httpd -W0), "--stdout=$out", "--stderr=$err" ]; my $td = start_script($cmd, $env, { 3 => $sock }); my ($h, $p) = tcp_host_port($sock); local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = "http://$h:$p"; - Plack::Test::ExternalServer::test_psgi(client => $client); + my $ua = LWP::UserAgent->new; + $ua->max_redirect(0); + Plack::Test::ExternalServer::test_psgi(client => $client, + ua => $ua); $td->join('TERM'); open my $fh, '<', $err or BAIL_OUT $!; my $e = do { local $/; <$fh> }; diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm index 82895db6..7b7047ac 100644 --- a/lib/PublicInbox/WwwAtomStream.pm +++ b/lib/PublicInbox/WwwAtomStream.pm @@ -38,6 +38,7 @@ sub async_next ($) { sub async_eml { # for async_blob_cb my ($ctx, $eml) = @_; my $smsg = delete $ctx->{smsg}; + $smsg->{mid} // $smsg->populate($eml); $ctx->write(feed_entry($ctx, $smsg, $eml)); } diff --git a/t/plack.t b/t/plack.t index a5fd54c9..20f5d8d5 100644 --- a/t/plack.t +++ b/t/plack.t @@ -9,6 +9,7 @@ my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape); require_mods(@mods); foreach my $mod (@mods) { use_ok $mod; } ok(-f $psgi, "psgi example file found"); +my ($tmpdir, $for_destroy) = tmpdir(); my $pfx = 'http://example.com/test'; my $eml = eml_load('t/iso-2202-jp.eml'); # ensure successful message deliveries @@ -71,91 +72,74 @@ EOF close $fh or BAIL_OUT "close: $!"; }); -local $ENV{PI_CONFIG} = "$ibx->{inboxdir}/pi_config"; -my $app = require $psgi; -test_psgi($app, sub { +my $env = { PI_CONFIG => "$ibx->{inboxdir}/pi_config", TMPDIR => $tmpdir }; +local @ENV{keys %$env} = values %$env; +my $c1 = sub { my ($cb) = @_; + my $uri = $ENV{PLACK_TEST_EXTERNALSERVER_URI} // 'http://example.com'; + $pfx = "$uri/test"; + foreach my $u (qw(robots.txt favicon.ico .well-known/foo)) { - my $res = $cb->(GET("http://example.com/$u")); + my $res = $cb->(GET("$uri/$u")); is($res->code, 404, "$u is missing"); } -}); -test_psgi($app, sub { - my ($cb) = @_; - my $res = $cb->(GET('http://example.com/test/crlf@example.com/')); + my $res = $cb->(GET("$uri/test/crlf\@example.com/")); is($res->code, 200, 'retrieved CRLF as HTML'); like($res->content, qr/mailto:me\@example/, 'no %40, per RFC 6068'); unlike($res->content, qr/\r/, 'no CR in HTML'); - $res = $cb->(GET('http://example.com/test/crlf@example.com/raw')); + $res = $cb->(GET("$uri/test/crlf\@example.com/raw")); is($res->code, 200, 'retrieved CRLF raw'); like($res->content, qr/\r/, 'CR preserved in raw message'); - $res = $cb->(GET('http://example.com/test/bogus@example.com/raw')); + $res = $cb->(GET("$uri/test/bogus\@example.com/raw")); is($res->code, 404, 'missing /raw is 404'); -}); -# redirect with newsgroup -test_psgi($app, sub { - my ($cb) = @_; - my $from = 'http://example.com/inbox.test'; - my $to = 'http://example.com/test/'; - my $res = $cb->(GET($from)); + # redirect with newsgroup + my $from = "$uri/inbox.test"; + my $to = "http://example.com/test/"; + $res = $cb->(GET($from)); is($res->code, 301, 'newsgroup name is permanent redirect'); is($to, $res->header('Location'), 'redirect location matches'); $from .= '/'; is($res->code, 301, 'newsgroup name/ is permanent redirect'); is($to, $res->header('Location'), 'redirect location matches'); -}); -# redirect with trailing / -test_psgi($app, sub { - my ($cb) = @_; - my $from = 'http://example.com/test'; - my $to = "$from/"; - my $res = $cb->(GET($from)); + # redirect with trailing / + $from = "$uri/test"; + $to = "$from/"; + $res = $cb->(GET($from)); is(301, $res->code, 'is permanent redirect'); is($to, $res->header('Location'), 'redirect location matches with trailing slash'); -}); -foreach my $t (qw(t T)) { - test_psgi($app, sub { - my ($cb) = @_; + for my $t (qw(T t)) { my $u = $pfx . "/blah\@example.com/$t"; - my $res = $cb->(GET($u)); + $res = $cb->(GET($u)); is(301, $res->code, "redirect for missing /"); my $location = $res->header('Location'); like($location, qr!/\Q$t\E/#u\z!, 'redirected with missing /'); - }); -} -foreach my $t (qw(f)) { - test_psgi($app, sub { - my ($cb) = @_; + } + + for my $t (qw(f)) { # legacy redirect my $u = $pfx . "/blah\@example.com/$t"; - my $res = $cb->(GET($u)); + $res = $cb->(GET($u)); is(301, $res->code, "redirect for legacy /f"); my $location = $res->header('Location'); like($location, qr!/blah\@example\.com/\z!, 'redirected with missing /'); - }); -} + } -test_psgi($app, sub { - my ($cb) = @_; - my $atomurl = 'http://example.com/test/new.atom'; - my $res = $cb->(GET('http://example.com/test/new.html')); + my $atomurl = "$uri/test/new.atom"; + $res = $cb->(GET("$uri/test/new.html")); is(200, $res->code, 'success response received'); like($res->content, qr!href="new\.atom"!, 'atom URL generated'); like($res->content, qr!href="blah\@example\.com/"!, 'index generated'); like($res->content, qr!1993-10-02!, 'date set'); -}); -test_psgi($app, sub { - my ($cb) = @_; - my $res = $cb->(GET($pfx . '/atom.xml')); + $res = $cb->(GET($pfx . '/atom.xml')); is(200, $res->code, 'success response received for atom'); my $body = $res->content; like($body, qr!link\s+href="\Q$pfx\E/blah\@example\.com/"!s, @@ -165,12 +149,9 @@ test_psgi($app, sub { like($body, qr/zzzzzz/, 'body included'); $res = $cb->(GET($pfx . '/description')); like($res->content, qr/test for public-inbox/, 'got description'); -}); -test_psgi($app, sub { - my ($cb) = @_; my $path = '/blah@example.com/'; - my $res = $cb->(GET($pfx . $path)); + $res = $cb->(GET($pfx . $path)); is(200, $res->code, "success for $path"); my $html = $res->content; like($html, qr!hihi - Me!, 'HTML returned'); @@ -196,11 +177,9 @@ test_psgi($app, sub { $res = $cb->(GET($pfx . '/qp@example.com/')); like($res->content, qr/\bhi = bye\b/, "HTML output decoded QP"); -}); -test_psgi($app, sub { - my ($cb) = @_; - my $res = $cb->(GET($pfx . '/blah@example.com/raw')); + + $res = $cb->(GET($pfx . '/blah@example.com/raw')); is(200, $res->code, 'success response received for /*/raw'); like($res->content, qr!^From !sm, "mbox returned"); is($res->header('Content-Type'), 'text/plain; charset=iso-8859-1', @@ -213,75 +192,62 @@ test_psgi($app, sub { $res = $cb->(GET($pfx . '/199707281508.AAA24167@hoyogw.example/raw')); is($res->header('Content-Type'), 'text/plain; charset=ISO-2022-JP', 'ISO-2002-JP returned'); - chomp(my $body = $res->content); + chomp($body = $res->content); my $raw = PublicInbox::Eml->new(\$body); is($raw->body_raw, $eml->body_raw, 'ISO-2022-JP body unmodified'); $res = $cb->(GET($pfx . '/blah@example.com/t.mbox.gz')); is(501, $res->code, '501 when overview missing'); like($res->content, qr!\bOverview\b!, 'overview omission noted'); -}); -# legacy redirects -foreach my $t (qw(m f)) { - test_psgi($app, sub { - my ($cb) = @_; - my $res = $cb->(GET($pfx . "/$t/blah\@example.com.txt")); + # legacy redirects + for my $t (qw(m f)) { + $res = $cb->(GET($pfx . "/$t/blah\@example.com.txt")); is(301, $res->code, "redirect for old $t .txt link"); - my $location = $res->header('Location'); + $location = $res->header('Location'); like($location, qr!/blah\@example\.com/raw\z!, ".txt redirected to /raw"); - }); -} - -my %umap = ( - 'm' => '', - 'f' => '', - 't' => 't/', -); -while (my ($t, $e) = each %umap) { - test_psgi($app, sub { - my ($cb) = @_; - my $res = $cb->(GET($pfx . "/$t/blah\@example.com.html")); + } + + my %umap = ( + 'm' => '', + 'f' => '', + 't' => 't/', + ); + while (my ($t, $e) = each %umap) { + $res = $cb->(GET($pfx . "/$t/blah\@example.com.html")); is(301, $res->code, "redirect for old $t .html link"); - my $location = $res->header('Location'); - like($location, - qr!/blah\@example\.com/$e(?:#u)?\z!, - ".html redirected to new location"); - }); -} -foreach my $sfx (qw(mbox mbox.gz)) { - test_psgi($app, sub { - my ($cb) = @_; - my $res = $cb->(GET($pfx . "/t/blah\@example.com.$sfx")); + $location = $res->header('Location'); + like($location, qr!/blah\@example\.com/$e(?:#u)?\z!, + ".html redirected to new location"); + } + + for my $sfx (qw(mbox mbox.gz)) { + $res = $cb->(GET($pfx . "/t/blah\@example.com.$sfx")); is(301, $res->code, 'redirect for old thread link'); - my $location = $res->header('Location'); + $location = $res->header('Location'); like($location, qr!/blah\@example\.com/t\.mbox(?:\.gz)?\z!, "$sfx redirected to /mbox.gz"); - }); -} -test_psgi($app, sub { - my ($cb) = @_; + } + # for a while, we used to support /$INBOX/$X40/ # when we "compressed" long Message-IDs to SHA-1 # Now we're stuck supporting them forever :< - foreach my $path ('f2912279bd7bcd8b7ab3033234942d58746d56f7') { - my $from = "http://example.com/test/$path/"; - my $res = $cb->(GET($from)); + for my $path ('f2912279bd7bcd8b7ab3033234942d58746d56f7') { + $from = "$uri/test/$path/"; + $res = $cb->(GET($from)); is(301, $res->code, 'is permanent redirect'); like($res->header('Location'), qr!/test/blah\@example\.com/!, 'redirect from x40 MIDs works'); } -}); -# dumb HTTP clone/fetch support -test_psgi($app, sub { - my ($cb) = @_; - my $path = '/test/info/refs'; + + # dumb HTTP clone/fetch support + $path = '/test/info/refs'; my $req = HTTP::Request->new('GET' => $path); - my $res = $cb->($req); + $res = $cb->($req); is(200, $res->code, 'refs readable'); my $orig = $res->content; @@ -294,19 +260,14 @@ test_psgi($app, sub { $res = $cb->($req); is(206, $res->code, 'got partial another response'); is($res->content, substr($orig, 5), 'partial body OK past end'); -}); -# things which should fail -test_psgi($app, sub { - my ($cb) = @_; - my $res = $cb->(PUT('/')); + # things which should fail + $res = $cb->(PUT('/')); is(405, $res->code, 'no PUT to / allowed'); $res = $cb->(PUT('/test/')); is(405, $res->code, 'no PUT /$INBOX allowed'); - - # TODO - # $res = $cb->(GET('/')); -}); - -done_testing(); +}; +test_psgi(require $psgi, $c1); +test_httpd($env, $c1); +done_testing;