unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 27/38] www: switch to zadd for the majority of buffering
Date: Sat, 10 Sep 2022 08:17:18 +0000	[thread overview]
Message-ID: <20220910081729.2011934-28-e@80x24.org> (raw)
In-Reply-To: <20220910081729.2011934-1-e@80x24.org>

This allows us to focus string concatenations in one place to
allow Perl internal scratchpad optimizations to reuse memory.

Calling Compress::Raw::Zlib::deflate repeatedly proves too
expensive in terms of CPU cycles.
---
 lib/PublicInbox/GzipFilter.pm    | 22 +++++++++++-------
 lib/PublicInbox/Mbox.pm          |  2 +-
 lib/PublicInbox/SearchView.pm    |  2 +-
 lib/PublicInbox/View.pm          | 40 ++++++++++++++++----------------
 lib/PublicInbox/ViewDiff.pm      | 14 +++++------
 lib/PublicInbox/WwwAtomStream.pm |  2 +-
 lib/PublicInbox/WwwStream.pm     |  4 ++--
 7 files changed, 46 insertions(+), 40 deletions(-)

diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm
index eb0046ce..1f11acb8 100644
--- a/lib/PublicInbox/GzipFilter.pm
+++ b/lib/PublicInbox/GzipFilter.pm
@@ -127,15 +127,21 @@ sub write {
 	http_out($_[0])->write(translate($_[0], $_[1]));
 }
 
+sub zadd {
+	my $self = shift;
+	$self->{pbuf} .= $_ for @_; # perl internal pad memory use here
+}
+
 # similar to ->translate; use this when we're sure we know we have
 # more data to buffer after this
 sub zmore {
 	my $self = shift; # $_[1] => input
 	http_out($self);
-	my $err;
+	my $x;
+	defined($x = delete($self->{pbuf})) and unshift(@_, $x);
 	for (@_) {
-		$err = $self->{gz}->deflate($_, $self->{zbuf});
-		die "gzip->deflate: $err" if $err != Z_OK;
+		($x = $self->{gz}->deflate($_, $self->{zbuf})) == Z_OK or
+			die "gzip->deflate: $x";
 	}
 	undef;
 }
@@ -145,14 +151,14 @@ sub zflush ($;@) {
 	my $self = shift; # $_[1..Inf] => final input (optional)
 	my $zbuf = delete $self->{zbuf};
 	my $gz = delete $self->{gz};
-	my $err;
+	my $x;
+	defined($x = delete($self->{pbuf})) and unshift(@_, $x);
 	for (@_) { # it's a bug iff $gz is undef if @_ isn't empty, here:
-		$err = $gz->deflate($_, $zbuf);
-		die "gzip->deflate: $err" if $err != Z_OK;
+		($x = $gz->deflate($_, $zbuf)) == Z_OK or
+			die "gzip->deflate: $x";
 	}
 	$gz // return ''; # not a bug, recursing on DS->write failure
-	$err = $gz->flush($zbuf);
-	die "gzip->flush: $err" if $err != Z_OK;
+	($x = $gz->flush($zbuf)) == Z_OK or die "gzip->flush: $x";
 	$zbuf;
 }
 
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 2ef8ff2b..cfe34d9c 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -20,7 +20,7 @@ sub getline {
 	my $ibx = $ctx->{ibx};
 	my $eml = delete($ctx->{eml}) // $ibx->smsg_eml($smsg) // return;
 	my $n = $ctx->{smsg} = $ibx->over->next_by_mid(@{$ctx->{next_arg}});
-	$ctx->zmore(msg_hdr($ctx, $eml));
+	$ctx->zadd(msg_hdr($ctx, $eml));
 	if ($n) {
 		$ctx->translate(msg_body($eml));
 	} else { # last message
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index e0404e5f..b18947ee 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -331,7 +331,7 @@ sub mset_thread {
 # callback for PublicInbox::WwwStream::getline
 sub mset_thread_i {
 	my ($ctx, $eml) = @_;
-	$ctx->zmore($ctx->html_top) if exists $ctx->{-html_tip};
+	$ctx->zadd($ctx->html_top) if exists $ctx->{-html_tip};
 	$eml and return PublicInbox::View::eml_entry($ctx, $eml);
 	my $smsg = shift @{$ctx->{msgs}} or
 		$ctx->zmore(${delete($ctx->{skel})});
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 630f1e42..85dc3bd8 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -40,10 +40,10 @@ sub msg_page_i {
 				"../${\mid_href($smsg->{mid})}/" : '';
 		if (_msg_page_prepare($eml, $ctx)) {
 			$eml->each_part(\&add_text_body, $ctx, 1);
-			$ctx->zmore('</pre><hr>');
+			$ctx->zadd('</pre><hr>');
 		}
 		html_footer($ctx, $ctx->{first_hdr}) if !$ctx->{smsg};
-		\''; # XXX TODO cleanup
+		''; # XXX TODO cleanup
 	} else { # called by WwwStream::async_next or getline
 		$ctx->{smsg}; # may be undef
 	}
@@ -58,7 +58,7 @@ sub no_over_html ($) {
 	PublicInbox::WwwStream::init($ctx);
 	if (_msg_page_prepare($eml, $ctx)) { # sets {-title_html}
 		$eml->each_part(\&add_text_body, $ctx, 1);
-		$ctx->zmore('</pre><hr>');
+		$ctx->zadd('</pre><hr>');
 	}
 	html_footer($ctx, $eml);
 	$ctx->html_done;
@@ -245,7 +245,7 @@ sub eml_entry {
 	# scan through all parts, looking for displayable text
 	$ctx->{mhref} = $mhref;
 	$ctx->{changed_href} = "#e$id"; # for diffstat "files? changed,"
-	$ctx->zmore($rv); # XXX $rv is small, reuse below
+	$ctx->zadd($rv); # XXX $rv is small, reuse below
 	$eml->each_part(\&add_text_body, $ctx, 1); # expensive
 
 	# add the footer
@@ -386,7 +386,7 @@ sub pre_thread  { # walk_thread callback
 sub thread_eml_entry {
 	my ($ctx, $eml) = @_;
 	my ($beg, $end) = thread_adj_level($ctx, $ctx->{level});
-	$ctx->zmore($beg.'<pre>');
+	$ctx->zadd($beg.'<pre>');
 	eml_entry($ctx, $eml) . '</pre>' . $end;
 }
 
@@ -414,15 +414,15 @@ sub stream_thread_i { # PublicInbox::WwwStream::getline callback
 				if (!$ghost_ok) { # first non-ghost
 					$ctx->{-title_html} =
 						ascii_html($smsg->{subject});
-					$ctx->zmore($ctx->html_top);
+					$ctx->zadd($ctx->html_top);
 				}
 				return $smsg;
 			}
 			# buffer the ghost entry and loop
-			$ctx->zmore(ghost_index_entry($ctx, $lvl, $smsg));
+			$ctx->zadd(ghost_index_entry($ctx, $lvl, $smsg));
 		} else { # all done
-			$ctx->zmore(join('', thread_adj_level($ctx, 0)));
-			$ctx->zmore(${delete($ctx->{skel})});
+			$ctx->zadd(join('', thread_adj_level($ctx, 0)));
+			$ctx->zadd(${delete($ctx->{skel})});
 			return;
 		}
 	}
@@ -491,7 +491,7 @@ sub thread_html_i { # PublicInbox::WwwStream::getline callback
 		my $smsg = $ctx->{smsg};
 		if (exists $ctx->{-html_tip}) {
 			$ctx->{-title_html} = ascii_html($smsg->{subject});
-			$ctx->zmore($ctx->html_top);
+			$ctx->zadd($ctx->html_top);
 		}
 		return eml_entry($ctx, $eml);
 	} else {
@@ -499,7 +499,7 @@ sub thread_html_i { # PublicInbox::WwwStream::getline callback
 			return $smsg if exists($smsg->{blob});
 		}
 		my $skel = delete($ctx->{skel}) or return; # all done
-		$ctx->zmore($$skel);
+		$ctx->zadd($$skel);
 		undef;
 	}
 }
@@ -560,7 +560,7 @@ sub add_text_body { # callback for each_part
 	my $ct = $part->content_type || 'text/plain';
 	my $fn = $part->filename;
 	my ($s, $err) = msg_part_text($part, $ct);
-	$s // return $ctx->zmore(attach_link($ctx, $ct, $p, $fn) // '');
+	$s // return $ctx->zadd(attach_link($ctx, $ct, $p, $fn) // '');
 	my $buf = $part->{is_submsg} ? submsg_hdr($ctx, $part)."\n" : '';
 
 	# makes no difference to browsers, and don't screw up filename
@@ -612,18 +612,18 @@ sub add_text_body { # callback for each_part
 		$buf .= attach_link($ctx, $ct, $p, $fn, $err) . "\n";
 	}
 	delete $part->{bdy}; # save memory
-	$ctx->zmore($buf);
+	$ctx->zadd($buf);
 	undef $buf;
 	for my $cur (@sections) { # $cur may be huge
 		if ($cur =~ /\A>/) {
 			# we use a <span> here to allow users to specify
 			# their own color for quoted text
-			$ctx->zmore(qq(<span\nclass="q">),
+			$ctx->zadd(qq(<span\nclass="q">),
 					$l->to_html($cur), '</span>');
 		} elsif ($diff) {
 			flush_diff($ctx, \$cur);
 		} else { # regular lines, OK
-			$ctx->zmore($l->to_html($cur));
+			$ctx->zadd($l->to_html($cur));
 		}
 		undef $cur; # free memory
 	}
@@ -685,10 +685,10 @@ sub _msg_page_prepare {
 		$hbuf .= qq[Message-ID: &lt;$x&gt; (<a href="raw">raw</a>)\n];
 	}
 	if (!$nr) { # first (and only) message, common case
-		$ctx->zmore($ctx->html_top, $hbuf);
+		$ctx->zadd($ctx->html_top, $hbuf);
 	} else {
 		delete $ctx->{-title_html};
-		$ctx->zmore($ctx->{-html_tip}, $hbuf);
+		$ctx->zadd($ctx->{-html_tip}, $hbuf);
 	}
 	$ctx->{-linkify} //= PublicInbox::Linkify->new;
 	$hbuf = '';
@@ -699,7 +699,7 @@ sub _msg_page_prepare {
 			$hbuf .= "$h: $_\n" for ($eml->header_raw($h));
 		}
 		$ctx->{-linkify}->linkify_mids('..', \$hbuf, 1); # escapes HTML
-		$ctx->zmore($hbuf);
+		$ctx->zadd($hbuf);
 		$hbuf = '';
 	}
 	my @irt = $eml->header_raw('In-Reply-To');
@@ -717,7 +717,7 @@ sub _msg_page_prepare {
 		$hbuf .= 'References: <'.join(">\n\t<", @$refs).">\n" if @$refs;
 	}
 	$ctx->{-linkify}->linkify_mids('..', \$hbuf); # escapes HTML
-	$ctx->zmore($hbuf .= "\n");
+	$ctx->zadd($hbuf .= "\n");
 	1;
 }
 
@@ -837,7 +837,7 @@ EOF
 	$foot .= qq(<a\nhref="#R">reply</a>);
 	# $skel may be big for big threads, don't append it to $foot
 	$skel .= '</pre>' . ($related // '');
-	$ctx->zmore($foot, $skel .= msg_reply($ctx, $hdr));
+	$ctx->zadd($foot, $skel .= msg_reply($ctx, $hdr));
 }
 
 sub ghost_parent {
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index 36601910..95b615dc 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -156,7 +156,7 @@ sub diff_header ($$$) {
 		warn "BUG? <$$x> had no ^index line";
 	}
 	$$x =~ s!^diff --git!anchor1($ctx, $pb) // 'diff --git'!ems;
-	$ctx->zmore(qq(<span\nclass="head">$$x</span>));
+	$ctx->zadd(qq(<span\nclass="head">$$x</span>));
 	$dctx;
 }
 
@@ -180,9 +180,9 @@ sub diff_before_or_after ($$) {
 		$$x .= qq(<a href="$ch">changed</a>,);
 		$$x .= ascii_html(pop @x); # $4: insertions/deletions
 		# notes, commit message, etc
-		$ctx->zmore($$x .= $lnk->to_html(pop @x));
+		$ctx->zadd($$x .= $lnk->to_html(pop @x));
 	} else {
-		$ctx->zmore($ctx->{-linkify}->to_html($$x));
+		$ctx->zadd($ctx->{-linkify}->to_html($$x));
 	}
 }
 
@@ -220,23 +220,23 @@ sub flush_diff ($$) {
 				if (!defined($dctx)) {
 					$after .= $s;
 				} elsif ($s =~ s/\A@@ (\S+) (\S+) @@//) {
-					$ctx->zmore(qq(<span\nclass="hunk">) .
+					$ctx->zadd(qq(<span\nclass="hunk">) .
 						diff_hunk($dctx, $1, $2) .
 						$lnk->to_html($s) .
 						'</span>');
 				} elsif ($s =~ /\A\+/) { # $s may be huge
-					$ctx->zmore(qq(<span\nclass="add">),
+					$ctx->zadd(qq(<span\nclass="add">),
 							$lnk->to_html($s),
 							'</span>');
 				} elsif ($s =~ /\A-- $/sm) { # email sig starts
 					$dctx = undef;
 					$after .= $s;
 				} elsif ($s =~ /\A-/) { # $s may be huge
-					$ctx->zmore(qq(<span\nclass="del">),
+					$ctx->zadd(qq(<span\nclass="del">),
 						$lnk->to_html($s),
 						'</span>');
 				} else { # $s may be huge
-					$ctx->zmore($lnk->to_html($s));
+					$ctx->zadd($lnk->to_html($s));
 				}
 			}
 			diff_before_or_after($ctx, \$after) if !$dctx;
diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm
index cdfbf393..1c7ae881 100644
--- a/lib/PublicInbox/WwwAtomStream.pm
+++ b/lib/PublicInbox/WwwAtomStream.pm
@@ -146,7 +146,7 @@ sub feed_entry {
 	my $name = ascii_html(join(', ', PublicInbox::Address::names($from)));
 	$email = ascii_html($email // $ctx->{ibx}->{-primary_address});
 
-	$ctx->zmore(
+	$ctx->zadd(
 		(delete($ctx->{emit_header}) ? atom_header($ctx, $title) : '').
 		"<entry><author><name>$name</name><email>$email</email>" .
 		"</author>$title$updated" .
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index c23668a4..2a318e5e 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -182,7 +182,7 @@ sub html_oneshot ($$;@) {
 	bless $ctx, __PACKAGE__;
 	$ctx->{gz} = PublicInbox::GzipFilter::gz_or_noop($res_hdr, $ctx->{env});
 	$ctx->{base_url} // do {
-		$ctx->zmore(html_top($ctx));
+		$ctx->zadd(html_top($ctx));
 		$ctx->{base_url} = base_url($ctx);
 	};
 	my $bdy = $ctx->zflush(@_[2..$#_], _html_end($ctx));
@@ -216,7 +216,7 @@ sub html_init {
 	my $h = $ctx->{-res_hdr} = ['Content-Type', 'text/html; charset=UTF-8'];
 	$ctx->{gz} = PublicInbox::GzipFilter::gz_or_noop($h, $ctx->{env});
 	bless $ctx, __PACKAGE__;
-	$ctx->zmore(html_top($ctx));
+	$ctx->zadd(html_top($ctx));
 }
 
 1;

  parent reply	other threads:[~2022-09-10  8:18 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-10  8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
2022-09-10  8:16 ` [PATCH 01/38] xt: fold perf-obfuscate into perf-msgview, future-proof Eric Wong
2022-09-10  8:16 ` [PATCH 02/38] www: gzip_filter: implicitly flush {obuf} on zmore/zflush Eric Wong
2022-09-10  8:16 ` [PATCH 03/38] view: rework single message page to compress earlier Eric Wong
2022-09-10  8:16 ` [PATCH 04/38] www_atom_stream: require 200 response Eric Wong
2022-09-10  8:16 ` [PATCH 05/38] www_stream: aresponse assumes 200, too Eric Wong
2022-09-10  8:16 ` [PATCH 06/38] www_text: reduce parameter passing for response header Eric Wong
2022-09-10  8:16 ` [PATCH 07/38] viewvcs: use shorter and simpler ctx->html_done Eric Wong
2022-09-10  8:16 ` [PATCH 08/38] www_listing: consolidate some ->zmore dispatches Eric Wong
2022-09-10  8:17 ` [PATCH 09/38] www_listing: avoid unnecessary work for common cases Eric Wong
2022-09-10  8:17 ` [PATCH 10/38] www: viewdiff: use return value for diff_hunk Eric Wong
2022-09-10  8:17 ` [PATCH 11/38] view: simplify _parent_headers Eric Wong
2022-09-10  8:17 ` [PATCH 12/38] view: eml_entry: reduce manipulation of ctx->{obuf} Eric Wong
2022-09-10  8:17 ` [PATCH 13/38] gzip_filter: ->translate can reuse zmore/zflush Eric Wong
2022-09-10  8:17 ` [PATCH 14/38] view: remove multipart_text_as_html Eric Wong
2022-09-10  8:17 ` [PATCH 15/38] view: reduce subroutine calls for submsg_hdr Eric Wong
2022-09-10  8:17 ` [PATCH 16/38] view: attach_link: reduce obuf manipulation Eric Wong
2022-09-10  8:17 ` [PATCH 17/38] viewdiff: reuse existing string in diff_before_or_after Eric Wong
2022-09-10  8:17 ` [PATCH 18/38] view: _th_index_lite: avoid one s///, improve symmetry Eric Wong
2022-09-10  8:17 ` [PATCH 19/38] view: _th_index_lite: use `//' defined-or op Eric Wong
2022-09-10  8:17 ` [PATCH 20/38] view: reduce ascii_html calls and {obuf} use Eric Wong
2022-09-10  8:17 ` [PATCH 21/38] view: html_footer: golf out a few lines Eric Wong
2022-09-10  8:17 ` [PATCH 22/38] view: html_footer: remove obuf dependency Eric Wong
2022-09-10  8:17 ` [PATCH 23/38] view: html_footer: avoid escaping " in a few places Eric Wong
2022-09-10  8:17 ` [PATCH 24/38] viewdiff: diff_hunk: shorten conditionals, slightly Eric Wong
2022-09-10  8:17 ` [PATCH 25/38] view: switch a few things to ctx->zmore Eric Wong
2022-09-10  8:17 ` [PATCH 26/38] www: drop {obuf} use entirely, for now Eric Wong
2022-09-10  8:17 ` Eric Wong [this message]
2022-09-10  8:17 ` [PATCH 28/38] www: use PerlIO::scalar (zfh) for buffering Eric Wong
2022-09-10  8:17 ` [PATCH 29/38] viewdiff: diff_before_or_after: avoid extra capture Eric Wong
2022-09-10  8:17 ` [PATCH 30/38] viewdiff: diff_header: shorten function, slightly Eric Wong
2022-09-10  8:17 ` [PATCH 31/38] www_static: switch to `print $zfh', and optimize Eric Wong
2022-09-10  8:17 ` [PATCH 32/38] httpd/async: describe which ->write subs it can call Eric Wong
2022-09-10  8:17 ` [PATCH 33/38] translate: support multiple buffer args Eric Wong
2022-09-10  8:17 ` [PATCH 34/38] gzip_filter: write: use multi-arg translate Eric Wong
2022-09-10  8:17 ` [PATCH 35/38] feed: new_html_i: switch from zmore to `print $zfh' Eric Wong
2022-09-10  8:17 ` [PATCH 36/38] mbox*: use multi-arg ->translate and ->write Eric Wong
2022-09-10  8:17 ` [PATCH 37/38] www_listing: switch to `print $zfh' Eric Wong
2022-09-10  8:17 ` [PATCH 38/38] viewvcs: " Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220910081729.2011934-28-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).