From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF, T_SCC_BODY_TEXT_LINE shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 9453D1F54E for ; Sun, 4 Sep 2022 04:28:59 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1662265739; bh=34xGz4KsEjeDqR35nECIs4Eu6Ge4aNPdbGCqIW5Rquw=; h=From:To:Subject:Date:From; b=MYsAaYFfCj4bwBbwk0pXJ8DY7LLc5rVNbWCmN9tf5E1Z5aLgHZ5mQ+S2wlQfvVKMz V0KJA3+NkAbzWEnIe1YxDwaIUtihtUhYq7FMuybBeyZXSDv7hAH1U0Xg4JlHKSgsNz WgBF3VkjYuUtmqUBsDEhSTRjGVHBFw7RGVtcMvfs= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] prepare HTML rendering maintainer tests for upcoming changes Date: Sun, 4 Sep 2022 04:27:49 +0000 Message-Id: <20220904042749.1614405-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: There'll be a number of upcoming changes to HTML rendering of messages to hopefully reduce memory usage and speedups by writing out to the gzip buffer earlier. Update the tests now so it'll be easier to test before and after results. --- MANIFEST | 1 - xt/cmp-msgview.t | 94 --------------------------------------------- xt/perf-msgview.t | 24 ++++++------ xt/perf-obfuscate.t | 26 +++++++------ 4 files changed, 27 insertions(+), 118 deletions(-) delete mode 100644 xt/cmp-msgview.t diff --git a/MANIFEST b/MANIFEST index 336c7e6a..ac21ddcc 100644 --- a/MANIFEST +++ b/MANIFEST @@ -582,7 +582,6 @@ t/x-unknown-alpine.eml t/xcpdb-reshard.t version-gen.perl xt/cmp-msgstr.t -xt/cmp-msgview.t xt/create-many-inboxes.t xt/eml_check_limits.t xt/eml_octet-stream.t diff --git a/xt/cmp-msgview.t b/xt/cmp-msgview.t deleted file mode 100644 index 9b06f88d..00000000 --- a/xt/cmp-msgview.t +++ /dev/null @@ -1,94 +0,0 @@ -#!perl -w -# Copyright (C) 2020-2021 all contributors -# License: AGPL-3.0+ -use strict; -use Test::More; -use Benchmark qw(:all); -use PublicInbox::Inbox; -use PublicInbox::View; -use PublicInbox::TestCommon; -use PublicInbox::Eml; -use Digest::MD5; -require_git(2.19); -require_mods qw(Data::Dumper Email::MIME Plack::Util); -Data::Dumper->import('Dumper'); -require PublicInbox::MIME; -my ($tmpdir, $for_destroy) = tmpdir(); -my $inboxdir = $ENV{GIANT_INBOX_DIR}; -plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir; -my @cat = qw(cat-file --buffer --batch-check --batch-all-objects --unordered); -my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'perf' }); -my $git = $ibx->git; -my $fh = $git->popen(@cat); -vec(my $vec = '', fileno($fh), 1) = 1; -select($vec, undef, undef, 60) or die "timed out waiting for --batch-check"; -my $mime_ctx = { - env => { HTTP_HOST => 'example.com', 'psgi.url_scheme' => 'https' }, - ibx => $ibx, - www => Plack::Util::inline_object(style => sub {''}), - obuf => \(my $mime_buf = ''), - mhref => '../', -}; -my $eml_ctx = { %$mime_ctx, obuf => \(my $eml_buf = '') }; -my $n = 0; -my $m = 0; -my $ndiff_html = 0; -my $dig_cls = 'Digest::MD5'; -my $digest_attach = sub { # ensure ->body (not ->body_raw) matches - my ($p, $cmp_arg) = @_; - my $part = shift @$p; - my $dig = $cmp_arg->[0] //= $dig_cls->new; - $dig->add($part->body_raw); - push @$cmp_arg, join(', ', @$p); -}; - -my $git_cb = sub { - my ($bref, $oid) = @_; - local $SIG{__WARN__} = sub { diag "$inboxdir $oid ", @_ }; - ++$m; - my $mime = PublicInbox::MIME->new($$bref); - PublicInbox::View::multipart_text_as_html($mime, $mime_ctx); - my $eml = PublicInbox::Eml->new($$bref); - PublicInbox::View::multipart_text_as_html($eml, $eml_ctx); - if ($eml_buf ne $mime_buf) { - ++$ndiff_html; - open my $fh, '>', "$tmpdir/mime" or die $!; - print $fh $mime_buf or die $!; - close $fh or die $!; - open $fh, '>', "$tmpdir/eml" or die $!; - print $fh $eml_buf or die $!; - close $fh or die $!; - # using `git diff', diff(1) may not be installed - diag "$inboxdir $oid differs"; - diag xqx([qw(git diff), "$tmpdir/mime", "$tmpdir/eml"]); - } - $eml_buf = $mime_buf = ''; - - # don't tolerate differences in attachment downloads - $mime = PublicInbox::MIME->new($$bref); - $mime->each_part($digest_attach, my $mime_cmp = [], 1); - $eml = PublicInbox::Eml->new($$bref); - $eml->each_part($digest_attach, my $eml_cmp = [], 1); - $mime_cmp->[0] = $mime_cmp->[0]->hexdigest; - $eml_cmp->[0] = $eml_cmp->[0]->hexdigest; - # don't have millions of "ok" lines - if (join("\0", @$eml_cmp) ne join("\0", @$mime_cmp)) { - diag Dumper([ $oid, eml => $eml_cmp, mime =>$mime_cmp ]); - is_deeply($eml_cmp, $mime_cmp, "$inboxdir $oid match"); - } -}; -my $t = timeit(1, sub { - while (<$fh>) { - my ($oid, $type) = split / /; - next if $type ne 'blob'; - ++$n; - $git->cat_async($oid, $git_cb); - } - $git->async_wait_all; -}); -is($m, $n, 'rendered all messages'); - -# we'll tolerate minor differences in HTML rendering -diag "$ndiff_html HTML differences"; - -done_testing(); diff --git a/xt/perf-msgview.t b/xt/perf-msgview.t index cf550c1a..7f92ce85 100644 --- a/xt/perf-msgview.t +++ b/xt/perf-msgview.t @@ -7,7 +7,7 @@ use PublicInbox::TestCommon; use Benchmark qw(:all); use PublicInbox::Inbox; use PublicInbox::View; -use PublicInbox::Spawn qw(popen_rd); +use PublicInbox::WwwStream; my $inboxdir = $ENV{GIANT_INBOX_DIR} // $ENV{GIANT_PI_DIR}; my $blob = $ENV{TEST_BLOB}; @@ -31,26 +31,28 @@ if ($fh) { die "timed out waiting for --batch-check"; } -my $ctx = { +my $ctx = bless { env => { HTTP_HOST => 'example.com', 'psgi.url_scheme' => 'https' }, ibx => $ibx, www => Plack::Util::inline_object(style => sub {''}), -}; -my ($mime, $res, $oid, $type); + gz => PublicInbox::GzipFilter::gzip_or_die(), +}, 'PublicInbox::WwwStream'; +my ($eml, $res, $oid, $type); my $n = 0; -my $obuf = ''; my $m = 0; +${$ctx->{obuf}} = ''; +$ctx->{mhref} = '../'; my $cb = sub { - $mime = PublicInbox::Eml->new(shift); - PublicInbox::View::multipart_text_as_html($mime, $ctx); + $eml = PublicInbox::Eml->new(shift); + $eml->each_part(\&PublicInbox::View::add_text_body, $ctx, 1); + $ctx->zflush; ++$m; - $obuf = ''; + delete $ctx->{zbuf}; + ${$ctx->{obuf}} = ''; }; my $t = timeit(1, sub { - $ctx->{obuf} = \$obuf; - $ctx->{mhref} = '../'; if (defined $blob) { my $nr = $ENV{NR} // 10000; for (1..$nr) { @@ -67,6 +69,6 @@ my $t = timeit(1, sub { } $git->async_wait_all; }); -diag 'multipart_text_as_html took '.timestr($t)." for $n <=> $m messages"; +diag 'add_text_body took '.timestr($t)." for $n <=> $m messages"; is($m, $n, 'rendered all messages'); done_testing(); diff --git a/xt/perf-obfuscate.t b/xt/perf-obfuscate.t index 640309d2..4da36124 100644 --- a/xt/perf-obfuscate.t +++ b/xt/perf-obfuscate.t @@ -1,5 +1,5 @@ #!perl -w -# Copyright (C) 2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ use strict; use v5.10.1; @@ -7,6 +7,7 @@ use PublicInbox::TestCommon; use Benchmark qw(:all); use PublicInbox::Inbox; use PublicInbox::View; +use PublicInbox::WwwStream; my $inboxdir = $ENV{GIANT_INBOX_DIR}; plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir; @@ -22,7 +23,6 @@ if (require_git(2.19, 1)) { "git <2.19, cat-file lacks --unordered, locality suffers\n"; } require_mods qw(Plack::Util); -use_ok 'Plack::Util'; my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'name' , obfuscate => $obfuscate}); my $git = $ibx->git; @@ -31,26 +31,28 @@ my $vec = ''; vec($vec, fileno($fh), 1) = 1; select($vec, undef, undef, 60) or die "timed out waiting for --batch-check"; -my $ctx = { +my $ctx = bless { env => { HTTP_HOST => 'example.com', 'psgi.url_scheme' => 'https' }, ibx => $ibx, www => Plack::Util::inline_object(style => sub {''}), -}; -my ($mime, $res, $oid, $type); + gz => PublicInbox::GzipFilter::gzip_or_die(), +}, 'PublicInbox::WwwStream'; +my ($eml, $res, $oid, $type); my $n = 0; -my $obuf = ''; my $m = 0; +${$ctx->{obuf}} = ''; +$ctx->{mhref} = '../'; my $cb = sub { - $mime = PublicInbox::Eml->new(shift); - PublicInbox::View::multipart_text_as_html($mime, $ctx); + $eml = PublicInbox::Eml->new(shift); + $eml->each_part(\&PublicInbox::View::add_text_body, $ctx, 1); + $ctx->zflush; ++$m; - $obuf = ''; + delete $ctx->{zbuf}; + ${$ctx->{obuf}} = ''; }; my $t = timeit(1, sub { - $ctx->{obuf} = \$obuf; - $ctx->{mhref} = '../'; while (<$fh>) { ($oid, $type) = split / /; next if $type ne 'blob'; @@ -59,6 +61,6 @@ my $t = timeit(1, sub { } $git->async_wait_all; }); -diag 'multipart_text_as_html took '.timestr($t)." for $n <=> $m messages"; +diag 'add_text_body took '.timestr($t)." for $n <=> $m messages"; is($m, $n, 'rendered all messages'); done_testing();