unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH] prepare HTML rendering maintainer tests for upcoming changes
@ 2022-09-04  4:27 Eric Wong
  0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2022-09-04  4:27 UTC (permalink / raw)
  To: meta

There'll be a number of upcoming changes to HTML rendering
of messages to hopefully reduce memory usage and speedups
by writing out to the gzip buffer earlier.

Update the tests now so it'll be easier to test before
and after results.
---
 MANIFEST            |  1 -
 xt/cmp-msgview.t    | 94 ---------------------------------------------
 xt/perf-msgview.t   | 24 ++++++------
 xt/perf-obfuscate.t | 26 +++++++------
 4 files changed, 27 insertions(+), 118 deletions(-)
 delete mode 100644 xt/cmp-msgview.t

diff --git a/MANIFEST b/MANIFEST
index 336c7e6a..ac21ddcc 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -582,7 +582,6 @@ t/x-unknown-alpine.eml
 t/xcpdb-reshard.t
 version-gen.perl
 xt/cmp-msgstr.t
-xt/cmp-msgview.t
 xt/create-many-inboxes.t
 xt/eml_check_limits.t
 xt/eml_octet-stream.t
diff --git a/xt/cmp-msgview.t b/xt/cmp-msgview.t
deleted file mode 100644
index 9b06f88d..00000000
--- a/xt/cmp-msgview.t
+++ /dev/null
@@ -1,94 +0,0 @@
-#!perl -w
-# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
-# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-use strict;
-use Test::More;
-use Benchmark qw(:all);
-use PublicInbox::Inbox;
-use PublicInbox::View;
-use PublicInbox::TestCommon;
-use PublicInbox::Eml;
-use Digest::MD5;
-require_git(2.19);
-require_mods qw(Data::Dumper Email::MIME Plack::Util);
-Data::Dumper->import('Dumper');
-require PublicInbox::MIME;
-my ($tmpdir, $for_destroy) = tmpdir();
-my $inboxdir = $ENV{GIANT_INBOX_DIR};
-plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir;
-my @cat = qw(cat-file --buffer --batch-check --batch-all-objects --unordered);
-my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'perf' });
-my $git = $ibx->git;
-my $fh = $git->popen(@cat);
-vec(my $vec = '', fileno($fh), 1) = 1;
-select($vec, undef, undef, 60) or die "timed out waiting for --batch-check";
-my $mime_ctx = {
-	env => { HTTP_HOST => 'example.com', 'psgi.url_scheme' => 'https' },
-	ibx => $ibx,
-	www => Plack::Util::inline_object(style => sub {''}),
-	obuf => \(my $mime_buf = ''),
-	mhref => '../',
-};
-my $eml_ctx = { %$mime_ctx, obuf => \(my $eml_buf = '') };
-my $n = 0;
-my $m = 0;
-my $ndiff_html = 0;
-my $dig_cls = 'Digest::MD5';
-my $digest_attach = sub { # ensure ->body (not ->body_raw) matches
-	my ($p, $cmp_arg) = @_;
-	my $part = shift @$p;
-	my $dig = $cmp_arg->[0] //= $dig_cls->new;
-	$dig->add($part->body_raw);
-	push @$cmp_arg, join(', ', @$p);
-};
-
-my $git_cb = sub {
-	my ($bref, $oid) = @_;
-	local $SIG{__WARN__} = sub { diag "$inboxdir $oid ", @_ };
-	++$m;
-	my $mime = PublicInbox::MIME->new($$bref);
-	PublicInbox::View::multipart_text_as_html($mime, $mime_ctx);
-	my $eml = PublicInbox::Eml->new($$bref);
-	PublicInbox::View::multipart_text_as_html($eml, $eml_ctx);
-	if ($eml_buf ne $mime_buf) {
-		++$ndiff_html;
-		open my $fh, '>', "$tmpdir/mime" or die $!;
-		print $fh $mime_buf or die $!;
-		close $fh or die $!;
-		open $fh, '>', "$tmpdir/eml" or die $!;
-		print $fh $eml_buf or die $!;
-		close $fh or die $!;
-		# using `git diff', diff(1) may not be installed
-		diag "$inboxdir $oid differs";
-		diag xqx([qw(git diff), "$tmpdir/mime", "$tmpdir/eml"]);
-	}
-	$eml_buf = $mime_buf = '';
-
-	# don't tolerate differences in attachment downloads
-	$mime = PublicInbox::MIME->new($$bref);
-	$mime->each_part($digest_attach, my $mime_cmp = [], 1);
-	$eml = PublicInbox::Eml->new($$bref);
-	$eml->each_part($digest_attach, my $eml_cmp = [], 1);
-	$mime_cmp->[0] = $mime_cmp->[0]->hexdigest;
-	$eml_cmp->[0] = $eml_cmp->[0]->hexdigest;
-	# don't have millions of "ok" lines
-	if (join("\0", @$eml_cmp) ne join("\0", @$mime_cmp)) {
-		diag Dumper([ $oid, eml => $eml_cmp, mime =>$mime_cmp ]);
-		is_deeply($eml_cmp, $mime_cmp, "$inboxdir $oid match");
-	}
-};
-my $t = timeit(1, sub {
-	while (<$fh>) {
-		my ($oid, $type) = split / /;
-		next if $type ne 'blob';
-		++$n;
-		$git->cat_async($oid, $git_cb);
-	}
-	$git->async_wait_all;
-});
-is($m, $n, 'rendered all messages');
-
-# we'll tolerate minor differences in HTML rendering
-diag "$ndiff_html HTML differences";
-
-done_testing();
diff --git a/xt/perf-msgview.t b/xt/perf-msgview.t
index cf550c1a..7f92ce85 100644
--- a/xt/perf-msgview.t
+++ b/xt/perf-msgview.t
@@ -7,7 +7,7 @@ use PublicInbox::TestCommon;
 use Benchmark qw(:all);
 use PublicInbox::Inbox;
 use PublicInbox::View;
-use PublicInbox::Spawn qw(popen_rd);
+use PublicInbox::WwwStream;
 
 my $inboxdir = $ENV{GIANT_INBOX_DIR} // $ENV{GIANT_PI_DIR};
 my $blob = $ENV{TEST_BLOB};
@@ -31,26 +31,28 @@ if ($fh) {
 		die "timed out waiting for --batch-check";
 }
 
-my $ctx = {
+my $ctx = bless {
 	env => { HTTP_HOST => 'example.com', 'psgi.url_scheme' => 'https' },
 	ibx => $ibx,
 	www => Plack::Util::inline_object(style => sub {''}),
-};
-my ($mime, $res, $oid, $type);
+	gz => PublicInbox::GzipFilter::gzip_or_die(),
+}, 'PublicInbox::WwwStream';
+my ($eml, $res, $oid, $type);
 my $n = 0;
-my $obuf = '';
 my $m = 0;
+${$ctx->{obuf}} = '';
+$ctx->{mhref} = '../';
 
 my $cb = sub {
-	$mime = PublicInbox::Eml->new(shift);
-	PublicInbox::View::multipart_text_as_html($mime, $ctx);
+	$eml = PublicInbox::Eml->new(shift);
+	$eml->each_part(\&PublicInbox::View::add_text_body, $ctx, 1);
+	$ctx->zflush;
 	++$m;
-	$obuf = '';
+	delete $ctx->{zbuf};
+	${$ctx->{obuf}} = '';
 };
 
 my $t = timeit(1, sub {
-	$ctx->{obuf} = \$obuf;
-	$ctx->{mhref} = '../';
 	if (defined $blob) {
 		my $nr = $ENV{NR} // 10000;
 		for (1..$nr) {
@@ -67,6 +69,6 @@ my $t = timeit(1, sub {
 	}
 	$git->async_wait_all;
 });
-diag 'multipart_text_as_html took '.timestr($t)." for $n <=> $m messages";
+diag 'add_text_body took '.timestr($t)." for $n <=> $m messages";
 is($m, $n, 'rendered all messages');
 done_testing();
diff --git a/xt/perf-obfuscate.t b/xt/perf-obfuscate.t
index 640309d2..4da36124 100644
--- a/xt/perf-obfuscate.t
+++ b/xt/perf-obfuscate.t
@@ -1,5 +1,5 @@
 #!perl -w
-# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 use strict;
 use v5.10.1;
@@ -7,6 +7,7 @@ use PublicInbox::TestCommon;
 use Benchmark qw(:all);
 use PublicInbox::Inbox;
 use PublicInbox::View;
+use PublicInbox::WwwStream;
 
 my $inboxdir = $ENV{GIANT_INBOX_DIR};
 plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir;
@@ -22,7 +23,6 @@ if (require_git(2.19, 1)) {
 "git <2.19, cat-file lacks --unordered, locality suffers\n";
 }
 require_mods qw(Plack::Util);
-use_ok 'Plack::Util';
 my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'name' ,
 				    obfuscate => $obfuscate});
 my $git = $ibx->git;
@@ -31,26 +31,28 @@ my $vec = '';
 vec($vec, fileno($fh), 1) = 1;
 select($vec, undef, undef, 60) or die "timed out waiting for --batch-check";
 
-my $ctx = {
+my $ctx = bless {
 	env => { HTTP_HOST => 'example.com', 'psgi.url_scheme' => 'https' },
 	ibx => $ibx,
 	www => Plack::Util::inline_object(style => sub {''}),
-};
-my ($mime, $res, $oid, $type);
+	gz => PublicInbox::GzipFilter::gzip_or_die(),
+}, 'PublicInbox::WwwStream';
+my ($eml, $res, $oid, $type);
 my $n = 0;
-my $obuf = '';
 my $m = 0;
+${$ctx->{obuf}} = '';
+$ctx->{mhref} = '../';
 
 my $cb = sub {
-	$mime = PublicInbox::Eml->new(shift);
-	PublicInbox::View::multipart_text_as_html($mime, $ctx);
+	$eml = PublicInbox::Eml->new(shift);
+	$eml->each_part(\&PublicInbox::View::add_text_body, $ctx, 1);
+	$ctx->zflush;
 	++$m;
-	$obuf = '';
+	delete $ctx->{zbuf};
+	${$ctx->{obuf}} = '';
 };
 
 my $t = timeit(1, sub {
-	$ctx->{obuf} = \$obuf;
-	$ctx->{mhref} = '../';
 	while (<$fh>) {
 		($oid, $type) = split / /;
 		next if $type ne 'blob';
@@ -59,6 +61,6 @@ my $t = timeit(1, sub {
 	}
 	$git->async_wait_all;
 });
-diag 'multipart_text_as_html took '.timestr($t)." for $n <=> $m messages";
+diag 'add_text_body took '.timestr($t)." for $n <=> $m messages";
 is($m, $n, 'rendered all messages');
 done_testing();

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2022-09-04  4:28 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-04  4:27 [PATCH] prepare HTML rendering maintainer tests for upcoming changes Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).