* [PATCH 01/38] xt: fold perf-obfuscate into perf-msgview, future-proof
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
@ 2022-09-10 8:16 ` Eric Wong
2022-09-10 8:16 ` [PATCH 02/38] www: gzip_filter: implicitly flush {obuf} on zmore/zflush Eric Wong
` (36 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:16 UTC (permalink / raw)
To: meta
perf-obfuscate was close enough to perf-msgview that it only
required setting the `obfuscate' field of the inbox.
Then update perf-msgview to account for upcoming internal
changes. The current use of {obuf} and concat ops results in
excessive scratchpad space and I may be able to even get
speedups by avoiding concat ops.
---
MANIFEST | 1 -
xt/perf-msgview.t | 10 ++++---
xt/perf-obfuscate.t | 66 ---------------------------------------------
3 files changed, 7 insertions(+), 70 deletions(-)
delete mode 100644 xt/perf-obfuscate.t
diff --git a/MANIFEST b/MANIFEST
index ac21ddcc..8be912d0 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -602,7 +602,6 @@ xt/nntpd-validate.t
xt/over-fsck.perl
xt/perf-msgview.t
xt/perf-nntpd.t
-xt/perf-obfuscate.t
xt/perf-threading.t
xt/pop3d-mpop.t
xt/solver.t
diff --git a/xt/perf-msgview.t b/xt/perf-msgview.t
index 7f92ce85..ef261359 100644
--- a/xt/perf-msgview.t
+++ b/xt/perf-msgview.t
@@ -11,6 +11,8 @@ use PublicInbox::WwwStream;
my $inboxdir = $ENV{GIANT_INBOX_DIR} // $ENV{GIANT_PI_DIR};
my $blob = $ENV{TEST_BLOB};
+my $obfuscate = $ENV{PI_OBFUSCATE} ? 1 : 0;
+diag "PI_OBFUSCATE=$obfuscate";
plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir;
my @cat = qw(cat-file --buffer --batch-check --batch-all-objects);
@@ -21,7 +23,8 @@ if (require_git(2.19, 1)) {
"git <2.19, cat-file lacks --unordered, locality suffers\n";
}
require_mods qw(Plack::Util);
-my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'name' });
+my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'name',
+ obfuscate => $obfuscate});
my $git = $ibx->git;
my $fh = $blob ? undef : $git->popen(@cat);
if ($fh) {
@@ -46,10 +49,11 @@ $ctx->{mhref} = '../';
my $cb = sub {
$eml = PublicInbox::Eml->new(shift);
$eml->each_part(\&PublicInbox::View::add_text_body, $ctx, 1);
- $ctx->zflush;
+ $ctx->zflush(grep defined, delete @$ctx{'obuf'}); # compat
++$m;
delete $ctx->{zbuf};
- ${$ctx->{obuf}} = '';
+ ${$ctx->{obuf}} = ''; # compat
+ $ctx->{gz} = PublicInbox::GzipFilter::gzip_or_die();
};
my $t = timeit(1, sub {
diff --git a/xt/perf-obfuscate.t b/xt/perf-obfuscate.t
deleted file mode 100644
index 4da36124..00000000
--- a/xt/perf-obfuscate.t
+++ /dev/null
@@ -1,66 +0,0 @@
-#!perl -w
-# Copyright (C) all contributors <meta@public-inbox.org>
-# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-use strict;
-use v5.10.1;
-use PublicInbox::TestCommon;
-use Benchmark qw(:all);
-use PublicInbox::Inbox;
-use PublicInbox::View;
-use PublicInbox::WwwStream;
-
-my $inboxdir = $ENV{GIANT_INBOX_DIR};
-plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir;
-
-my $obfuscate = $ENV{PI_OBFUSCATE} ? 1 : 0;
-diag "obfuscate=$obfuscate\n";
-
-my @cat = qw(cat-file --buffer --batch-check --batch-all-objects);
-if (require_git(2.19, 1)) {
- push @cat, '--unordered';
-} else {
- warn
-"git <2.19, cat-file lacks --unordered, locality suffers\n";
-}
-require_mods qw(Plack::Util);
-my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'name' ,
- obfuscate => $obfuscate});
-my $git = $ibx->git;
-my $fh = $git->popen(@cat);
-my $vec = '';
-vec($vec, fileno($fh), 1) = 1;
-select($vec, undef, undef, 60) or die "timed out waiting for --batch-check";
-
-my $ctx = bless {
- env => { HTTP_HOST => 'example.com', 'psgi.url_scheme' => 'https' },
- ibx => $ibx,
- www => Plack::Util::inline_object(style => sub {''}),
- gz => PublicInbox::GzipFilter::gzip_or_die(),
-}, 'PublicInbox::WwwStream';
-my ($eml, $res, $oid, $type);
-my $n = 0;
-my $m = 0;
-${$ctx->{obuf}} = '';
-$ctx->{mhref} = '../';
-
-my $cb = sub {
- $eml = PublicInbox::Eml->new(shift);
- $eml->each_part(\&PublicInbox::View::add_text_body, $ctx, 1);
- $ctx->zflush;
- ++$m;
- delete $ctx->{zbuf};
- ${$ctx->{obuf}} = '';
-};
-
-my $t = timeit(1, sub {
- while (<$fh>) {
- ($oid, $type) = split / /;
- next if $type ne 'blob';
- ++$n;
- $git->cat_async($oid, $cb);
- }
- $git->async_wait_all;
-});
-diag 'add_text_body took '.timestr($t)." for $n <=> $m messages";
-is($m, $n, 'rendered all messages');
-done_testing();
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 02/38] www: gzip_filter: implicitly flush {obuf} on zmore/zflush
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
2022-09-10 8:16 ` [PATCH 01/38] xt: fold perf-obfuscate into perf-msgview, future-proof Eric Wong
@ 2022-09-10 8:16 ` Eric Wong
2022-09-10 8:16 ` [PATCH 03/38] view: rework single message page to compress earlier Eric Wong
` (35 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:16 UTC (permalink / raw)
To: meta
This seems like the least disruptive way to allow more use of
->zmore when streaming large messages to sockets.
---
lib/PublicInbox/CompressNoop.pm | 4 ++--
lib/PublicInbox/GzipFilter.pm | 9 +++++----
lib/PublicInbox/ViewVCS.pm | 2 --
3 files changed, 7 insertions(+), 8 deletions(-)
diff --git a/lib/PublicInbox/CompressNoop.pm b/lib/PublicInbox/CompressNoop.pm
index e3301473..5135299f 100644
--- a/lib/PublicInbox/CompressNoop.pm
+++ b/lib/PublicInbox/CompressNoop.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Provide the same methods as Compress::Raw::Zlib::Deflate but
@@ -10,7 +10,7 @@ use Compress::Raw::Zlib qw(Z_OK);
sub new { bless \(my $self), __PACKAGE__ }
sub deflate { # ($self, $input, $output)
- $_[2] .= $_[1];
+ $_[2] .= ref($_[1]) ? ${$_[1]} : $_[1];
Z_OK;
}
diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm
index 86d34183..75ba625e 100644
--- a/lib/PublicInbox/GzipFilter.pm
+++ b/lib/PublicInbox/GzipFilter.pm
@@ -140,8 +140,8 @@ sub zmore {
my $self = $_[0]; # $_[1] => input
http_out($self);
my $err;
- for (1..$#_) {
- $err = $self->{gz}->deflate($_[$_], $self->{zbuf});
+ for (delete $self->{obuf}, @_[1..$#_]) {
+ $err = $self->{gz}->deflate($_ // next, $self->{zbuf});
die "gzip->deflate: $err" if $err != Z_OK;
}
undef;
@@ -153,8 +153,9 @@ sub zflush ($;@) {
my $zbuf = delete $self->{zbuf};
my $gz = delete $self->{gz};
my $err;
- for (1..$#_) { # it's a bug iff $gz is undef w/ $_[1..]
- $err = $gz->deflate($_[$_], $zbuf);
+ # it's a bug iff $gz is undef w/ $obuf or $_[1..]
+ for (delete $self->{obuf}, @_[1..$#_]) {
+ $err = $gz->deflate($_ // next, $zbuf);
die "gzip->deflate: $err" if $err != Z_OK;
}
$gz // return ''; # not a bug, recursing on DS->write failure
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index 3b4fa393..ed4a6454 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -210,8 +210,6 @@ EOM
$x =~ s/\r?\n/\n/gs;
$ctx->{-anchors} = {} if $x =~ /^diff --git /sm;
flush_diff($ctx, \$x); # undefs $x
- $ctx->zmore($bdy);
- undef $bdy;
# TODO: should there be another textarea which attempts to
# search for the exact email which was applied to make this
# commit?
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 03/38] view: rework single message page to compress earlier
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
2022-09-10 8:16 ` [PATCH 01/38] xt: fold perf-obfuscate into perf-msgview, future-proof Eric Wong
2022-09-10 8:16 ` [PATCH 02/38] www: gzip_filter: implicitly flush {obuf} on zmore/zflush Eric Wong
@ 2022-09-10 8:16 ` Eric Wong
2022-09-10 8:16 ` [PATCH 04/38] www_atom_stream: require 200 response Eric Wong
` (34 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:16 UTC (permalink / raw)
To: meta
We can rely on deflate to compress large thread skeletons on
single message pages. Subsequent commits will compress bodies,
as well.
---
lib/PublicInbox/View.pm | 42 ++++++++++++++++--------------------
lib/PublicInbox/WwwStream.pm | 14 ++++++++++--
2 files changed, 30 insertions(+), 26 deletions(-)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 446e6bb8..033af283 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -38,14 +38,12 @@ sub msg_page_i {
: $ctx->gone('over');
$ctx->{mhref} = ($ctx->{nr} || $ctx->{smsg}) ?
"../${\mid_href($smsg->{mid})}/" : '';
- my $obuf = _msg_page_prepare_obuf($eml, $ctx);
- if (length($$obuf)) {
+ if (_msg_page_prepare_obuf($eml, $ctx)) {
multipart_text_as_html($eml, $ctx);
- $$obuf .= '</pre><hr>';
+ ${$ctx->{obuf}} .= '</pre><hr>';
}
- delete $ctx->{obuf};
- $$obuf .= html_footer($ctx, $ctx->{first_hdr}) if !$ctx->{smsg};
- $$obuf;
+ html_footer($ctx, $ctx->{first_hdr}) if !$ctx->{smsg};
+ delete($ctx->{obuf}) // \'';
} else { # called by WwwStream::async_next or getline
$ctx->{smsg}; # may be undef
}
@@ -58,14 +56,12 @@ sub no_over_html ($) {
my $eml = PublicInbox::Eml->new($bref);
$ctx->{mhref} = '';
PublicInbox::WwwStream::init($ctx);
- my $obuf = _msg_page_prepare_obuf($eml, $ctx);
- if (length($$obuf)) {
+ if (_msg_page_prepare_obuf($eml, $ctx)) { # sets {-title_html}
multipart_text_as_html($eml, $ctx);
- $$obuf .= '</pre><hr>';
+ ${$ctx->{obuf}} .= '</pre><hr>';
}
- delete $ctx->{obuf};
- eval { $$obuf .= html_footer($ctx, $eml) };
- html_oneshot($ctx, 200, $$obuf);
+ html_footer($ctx, $eml);
+ $ctx->html_done(200);
}
# public functions: (unstable)
@@ -669,7 +665,7 @@ sub _msg_page_prepare_obuf {
if ($nr) { # unlikely
if ($ctx->{chash} eq content_hash($eml)) {
warn "W: BUG? @$mids not deduplicated properly\n";
- return \$rv;
+ return;
}
$rv .=
"<pre>WARNING: multiple messages have this Message-ID\n</pre><pre>";
@@ -746,7 +742,7 @@ sub _msg_page_prepare_obuf {
}
_parent_headers($ctx, $eml);
$rv .= "\n";
- \$rv;
+ 1;
}
sub SKEL_EXPAND () {
@@ -827,13 +823,11 @@ EOM
}
}
-# returns a string buffer
+# appends to obuf
sub html_footer {
my ($ctx, $hdr) = @_;
my $upfx = '../';
- my $skel;
- my $rv = '<pre>';
- my $related;
+ my ($related, $skel);
my $qry = delete $ctx->{-qry};
if ($qry && $ctx->{ibx}->isrch) {
my $q = ''; # search for either ancestor or descendent patches
@@ -896,15 +890,15 @@ EOF
} elsif ($u) { # unlikely
$parent = " <a\nhref=\"$u\"\nrel=prev>parent</a>";
}
- $rv .= "$next $prev$parent ";
+ ${$ctx->{obuf}} .= "<pre>$next $prev$parent ";
} else { # unindexed inboxes w/o over
+ ${$ctx->{obuf}} .= '<pre>';
$skel = qq( <a\nhref="$upfx">latest</a>);
}
- $rv .= qq(<a\nhref="#R">reply</a>);
- $rv .= $skel;
- $rv .= '</pre>';
- $rv .= $related // '';
- $rv .= msg_reply($ctx, $hdr);
+ ${$ctx->{obuf}} .= qq(<a\nhref="#R">reply</a>);
+ # $skel may be big for big threads, don't append it to obuf
+ $skel .= '</pre>' . ($related // '');
+ $ctx->zmore($skel .= msg_reply($ctx, $hdr)); # flushes obuf
}
sub linkify_ref_no_over {
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index f2777fdc..115e0440 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -27,6 +27,9 @@ sub init {
my ($ctx, $cb) = @_;
$ctx->{cb} = $cb;
$ctx->{base_url} = base_url($ctx);
+ $ctx->{-res_hdr} = [ 'Content-Type' => 'text/html; charset=UTF-8' ];
+ $ctx->{gz} = PublicInbox::GzipFilter::gz_or_noop($ctx->{-res_hdr},
+ $ctx->{env});
bless $ctx, __PACKAGE__;
}
@@ -164,6 +167,14 @@ sub getline {
$ctx->zflush(_html_end($ctx));
}
+sub html_done ($$) {
+ my ($ctx, $code) = @_;
+ my $bdy = $ctx->zflush(_html_end($ctx));
+ my $res_hdr = delete $ctx->{-res_hdr};
+ push @$res_hdr, 'Content-Length', length($bdy);
+ [ $code, $res_hdr, [ $bdy ] ]
+}
+
sub html_oneshot ($$;@) {
my ($ctx, $code) = @_[0, 1];
my $res_hdr = [ 'Content-Type' => 'text/html; charset=UTF-8',
@@ -195,9 +206,8 @@ sub async_next ($) {
sub aresponse {
my ($ctx, $code, $cb) = @_;
- my $res_hdr = [ 'Content-Type' => 'text/html; charset=UTF-8' ];
init($ctx, $cb);
- $ctx->psgi_response($code, $res_hdr);
+ $ctx->psgi_response($code, delete $ctx->{-res_hdr});
}
sub html_init {
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 04/38] www_atom_stream: require 200 response
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (2 preceding siblings ...)
2022-09-10 8:16 ` [PATCH 03/38] view: rework single message page to compress earlier Eric Wong
@ 2022-09-10 8:16 ` Eric Wong
2022-09-10 8:16 ` [PATCH 05/38] www_stream: aresponse assumes 200, too Eric Wong
` (33 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:16 UTC (permalink / raw)
To: meta
This simplifies parameter passing at the moment. I can't
imagine an Atom feed reader would be parsing XML for 404s or
other error codes.
---
lib/PublicInbox/Feed.pm | 4 ++--
lib/PublicInbox/SearchView.pm | 2 +-
lib/PublicInbox/WwwAtomStream.pm | 6 +++---
3 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index e0810420..bdfa0d9d 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -19,14 +19,14 @@ sub generate {
my ($ctx) = @_;
my $msgs = $ctx->{msgs} = recent_msgs($ctx);
return _no_thread() unless @$msgs;
- PublicInbox::WwwAtomStream->response($ctx, 200, \&generate_i);
+ PublicInbox::WwwAtomStream->response($ctx, \&generate_i);
}
sub generate_thread_atom {
my ($ctx) = @_;
my $msgs = $ctx->{msgs} = $ctx->{ibx}->over->get_thread($ctx->{mid});
return _no_thread() unless @$msgs;
- PublicInbox::WwwAtomStream->response($ctx, 200, \&generate_i);
+ PublicInbox::WwwAtomStream->response($ctx, \&generate_i);
}
sub generate_html_index {
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index b025ec96..3dce768f 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -359,7 +359,7 @@ sub adump {
my ($cb, $mset, $q, $ctx) = @_;
$ctx->{ids} = $ctx->{ibx}->isrch->mset_to_artnums($mset);
$ctx->{search_query} = $q; # used by WwwAtomStream::atom_header
- PublicInbox::WwwAtomStream->response($ctx, 200, \&adump_i);
+ PublicInbox::WwwAtomStream->response($ctx, \&adump_i);
}
# callback for PublicInbox::WwwAtomStream::getline
diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm
index 09b6facb..906b292a 100644
--- a/lib/PublicInbox/WwwAtomStream.pm
+++ b/lib/PublicInbox/WwwAtomStream.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Atom body stream for HTTP responses
@@ -43,10 +43,10 @@ sub async_eml { # for async_blob_cb
}
sub response {
- my ($class, $ctx, $code, $cb) = @_;
+ my ($class, $ctx, $cb) = @_;
my $res_hdr = [ 'Content-Type' => 'application/atom+xml' ];
$class->new($ctx, $cb);
- $ctx->psgi_response($code, $res_hdr);
+ $ctx->psgi_response(200, $res_hdr);
}
# called once for each message by PSGI server
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 05/38] www_stream: aresponse assumes 200, too
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (3 preceding siblings ...)
2022-09-10 8:16 ` [PATCH 04/38] www_atom_stream: require 200 response Eric Wong
@ 2022-09-10 8:16 ` Eric Wong
2022-09-10 8:16 ` [PATCH 06/38] www_text: reduce parameter passing for response header Eric Wong
` (32 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:16 UTC (permalink / raw)
To: meta
There's no reason to be streaming large amounts of HTML for
anything other than a 200 response.
---
lib/PublicInbox/Feed.pm | 2 +-
lib/PublicInbox/SearchView.pm | 2 +-
lib/PublicInbox/View.pm | 6 +++---
lib/PublicInbox/WwwStream.pm | 4 ++--
4 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index bdfa0d9d..56ca9886 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -71,7 +71,7 @@ sub new_html {
$ctx->{-html_tip} = '<pre>';
$ctx->{-upfx} = '';
$ctx->{-hr} = 1;
- PublicInbox::WwwStream::aresponse($ctx, 200, \&new_html_i);
+ PublicInbox::WwwStream::aresponse($ctx, \&new_html_i);
}
# private subs
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 3dce768f..e0404e5f 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -325,7 +325,7 @@ sub mset_thread {
@$msgs = reverse @$msgs if $r;
$ctx->{msgs} = $msgs;
- PublicInbox::WwwStream::aresponse($ctx, 200, \&mset_thread_i);
+ PublicInbox::WwwStream::aresponse($ctx, \&mset_thread_i);
}
# callback for PublicInbox::WwwStream::getline
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 033af283..1cbc62be 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -80,7 +80,7 @@ sub msg_page {
# allow user to easily browse the range around this message if
# they have ->over
$ctx->{-t_max} = $smsg->{ts};
- PublicInbox::WwwStream::aresponse($ctx, 200, \&msg_page_i);
+ PublicInbox::WwwStream::aresponse($ctx, \&msg_page_i);
}
# /$INBOX/$MESSAGE_ID/#R
@@ -432,7 +432,7 @@ sub stream_thread_i { # PublicInbox::WwwStream::getline callback
sub stream_thread ($$) {
my ($rootset, $ctx) = @_;
@{$ctx->{-queue}} = map { (0, $_) } @$rootset;
- PublicInbox::WwwStream::aresponse($ctx, 200, \&stream_thread_i);
+ PublicInbox::WwwStream::aresponse($ctx, \&stream_thread_i);
}
# /$INBOX/$MSGID/t/ and /$INBOX/$MSGID/T/
@@ -483,7 +483,7 @@ EOF
# flat display: lazy load the full message from smsg
$ctx->{msgs} = $msgs;
$ctx->{-html_tip} = '<pre>';
- PublicInbox::WwwStream::aresponse($ctx, 200, \&thread_html_i);
+ PublicInbox::WwwStream::aresponse($ctx, \&thread_html_i);
}
sub thread_html_i { # PublicInbox::WwwStream::getline callback
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index 115e0440..1fc213d4 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -205,9 +205,9 @@ sub async_next ($) {
}
sub aresponse {
- my ($ctx, $code, $cb) = @_;
+ my ($ctx, $cb) = @_;
init($ctx, $cb);
- $ctx->psgi_response($code, delete $ctx->{-res_hdr});
+ $ctx->psgi_response(200, delete $ctx->{-res_hdr});
}
sub html_init {
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 06/38] www_text: reduce parameter passing for response header
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (4 preceding siblings ...)
2022-09-10 8:16 ` [PATCH 05/38] www_stream: aresponse assumes 200, too Eric Wong
@ 2022-09-10 8:16 ` Eric Wong
2022-09-10 8:16 ` [PATCH 07/38] viewvcs: use shorter and simpler ctx->html_done Eric Wong
` (31 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:16 UTC (permalink / raw)
To: meta
This is a tiny step in making the code slightly less confusing
by reusing common field names and reducing dependencies on
argument ordering.
---
lib/PublicInbox/WwwText.pm | 35 +++++++++++++++++++----------------
1 file changed, 19 insertions(+), 16 deletions(-)
diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm
index 1a0bb07a..224fed5c 100644
--- a/lib/PublicInbox/WwwText.pm
+++ b/lib/PublicInbox/WwwText.pm
@@ -31,16 +31,17 @@ sub get_text {
my $have_tslash = ($key =~ s!/\z!!) if !$raw;
my $txt = '';
- my $hdr = [ 'Content-Type', 'text/plain', 'Content-Length', undef ];
- if (!_default_text($ctx, $key, $hdr, \$txt)) {
+ if (!_default_text($ctx, $key, \$txt)) {
$code = 404;
$txt = "404 Not Found ($key)\n";
}
my $env = $ctx->{env};
if ($raw) {
- $txt = gzf_maybe($hdr, $env)->zflush($txt) if $code == 200;
- $hdr->[3] = length($txt);
- return [ $code, $hdr, [ $txt ] ]
+ my $h = delete $ctx->{-res_hdr};
+ $txt = gzf_maybe($h, $env)->zflush($txt) if $code == 200;
+ push @$h, 'Content-Type', 'text/plain',
+ 'Content-Length', length($txt);
+ return [ $code, $h, [ $txt ] ]
}
# enforce trailing slash for "wget -r" compatibility
@@ -167,12 +168,13 @@ EOF
}
# n.b. this is a perfect candidate for memoization
-sub inbox_config ($$$) {
- my ($ctx, $hdr, $txt) = @_;
+sub inbox_config ($$) {
+ my ($ctx, $txt) = @_;
my $ibx = $ctx->{ibx};
- push @$hdr, 'Content-Disposition', 'inline; filename=inbox.config';
+ push @{$ctx->{-res_hdr}},
+ 'Content-Disposition', 'inline; filename=inbox.config';
my $t = eval { $ibx->mm->created_at };
- push(@$hdr, 'Last-Modified', time2str($t)) if $t;
+ push(@{$ctx->{-res_hdr}}, 'Last-Modified', time2str($t)) if $t;
my $name = dq_escape($ibx->{name});
my $inboxdir = '/path/to/top-level-inbox';
my $base_url = $ibx->base_url($ctx->{env});
@@ -219,10 +221,11 @@ EOF
}
# n.b. this is a perfect candidate for memoization
-sub extindex_config ($$$) {
- my ($ctx, $hdr, $txt) = @_;
+sub extindex_config ($$) {
+ my ($ctx, $txt) = @_;
my $ibx = $ctx->{ibx};
- push @$hdr, 'Content-Disposition', 'inline; filename=extindex.config';
+ push @{$ctx->{-res_hdr}},
+ 'Content-Disposition', 'inline; filename=extindex.config';
my $name = dq_escape($ibx->{name});
my $base_url = $ibx->base_url($ctx->{env});
$$txt .= <<EOS;
@@ -397,16 +400,16 @@ EOF
1;
}
-sub _default_text ($$$$) {
- my ($ctx, $key, $hdr, $txt) = @_;
+sub _default_text ($$$) {
+ my ($ctx, $key, $txt) = @_;
if ($key eq 'mirror') {
return _mirror_help($ctx, $txt);
} elsif ($key eq 'color') {
return _colors_help($ctx, $txt);
} elsif ($key eq 'config') {
return $ctx->{ibx}->can('cloneurl') ?
- inbox_config($ctx, $hdr, $txt) :
- extindex_config($ctx, $hdr, $txt);
+ inbox_config($ctx, $txt) :
+ extindex_config($ctx, $txt);
}
return if $key ne 'help'; # TODO more keys?
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 07/38] viewvcs: use shorter and simpler ctx->html_done
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (5 preceding siblings ...)
2022-09-10 8:16 ` [PATCH 06/38] www_text: reduce parameter passing for response header Eric Wong
@ 2022-09-10 8:16 ` Eric Wong
2022-09-10 8:16 ` [PATCH 08/38] www_listing: consolidate some ->zmore dispatches Eric Wong
` (30 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:16 UTC (permalink / raw)
To: meta
We only return 200s for any response large enough to warrant
->html_done, so we can just assume it. ViewVCS can also take
advantage of it with some tweaking to avoid an extra method
dispatch.
---
lib/PublicInbox/View.pm | 2 +-
lib/PublicInbox/ViewVCS.pm | 5 +----
lib/PublicInbox/WwwStream.pm | 8 ++++----
3 files changed, 6 insertions(+), 9 deletions(-)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 1cbc62be..c6fd05cc 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -61,7 +61,7 @@ sub no_over_html ($) {
${$ctx->{obuf}} .= '</pre><hr>';
}
html_footer($ctx, $eml);
- $ctx->html_done(200);
+ $ctx->html_done;
}
# public functions: (unstable)
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index ed4a6454..d3ac1a7d 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -253,10 +253,7 @@ possible to have multiple root commits when merging independent histories.
Every commit references one top-level <dfn id=tree>tree</dfn> object.</pre>
EOM
- $x = $ctx->zflush($x, $ctx->_html_end);
- my $res_hdr = delete $ctx->{-res_hdr};
- push @$res_hdr, 'Content-Length', length($x);
- delete($ctx->{env}->{'qspawn.wcb'})->([200, $res_hdr, [$x]]);
+ delete($ctx->{env}->{'qspawn.wcb'})->($ctx->html_done($x));
}
sub stream_patch_parse_hdr { # {parse_hdr} for Qspawn
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index 1fc213d4..c23668a4 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -167,12 +167,12 @@ sub getline {
$ctx->zflush(_html_end($ctx));
}
-sub html_done ($$) {
- my ($ctx, $code) = @_;
- my $bdy = $ctx->zflush(_html_end($ctx));
+sub html_done ($;@) {
+ my $ctx = $_[0];
+ my $bdy = $ctx->zflush(@_[1..$#_], _html_end($ctx));
my $res_hdr = delete $ctx->{-res_hdr};
push @$res_hdr, 'Content-Length', length($bdy);
- [ $code, $res_hdr, [ $bdy ] ]
+ [ 200, $res_hdr, [ $bdy ] ]
}
sub html_oneshot ($$;@) {
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 08/38] www_listing: consolidate some ->zmore dispatches
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (6 preceding siblings ...)
2022-09-10 8:16 ` [PATCH 07/38] viewvcs: use shorter and simpler ctx->html_done Eric Wong
@ 2022-09-10 8:16 ` Eric Wong
2022-09-10 8:17 ` [PATCH 09/38] www_listing: avoid unnecessary work for common cases Eric Wong
` (29 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:16 UTC (permalink / raw)
To: meta
`.' concatenation is still faster for small strings, but
passing an array to ->zmore is more efficient for large
search results and full listings.
---
lib/PublicInbox/WwwListing.pm | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm
index 79c0a8ec..0ab41452 100644
--- a/lib/PublicInbox/WwwListing.pm
+++ b/lib/PublicInbox/WwwListing.pm
@@ -223,13 +223,12 @@ sub psgi_triple {
@$list = map { $_->[1] }
sort { $b->[0] <=> $a->[0] } @$list;
}
- $gzf->zmore('<pre>');
- $gzf->zmore(join("\n", @$list));
+ $gzf->zmore('<pre>', join("\n", @$list)); # big
$gzf->zmore(mset_footer($ctx, $mset)) if $mset;
} elsif (my $mset = delete $ctx->{-mset}) {
- $gzf->zmore(mset_nav_top($ctx, $mset));
- $gzf->zmore('<pre>no matching inboxes');
- $gzf->zmore(mset_footer($ctx, $mset));
+ $gzf->zmore(mset_nav_top($ctx, $mset) .
+ '<pre>no matching inboxes' .
+ mset_footer($ctx, $mset));
} else {
$gzf->zmore('<pre>no inboxes, yet');
}
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 09/38] www_listing: avoid unnecessary work for common cases
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (7 preceding siblings ...)
2022-09-10 8:16 ` [PATCH 08/38] www_listing: consolidate some ->zmore dispatches Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 10/38] www: viewdiff: use return value for diff_hunk Eric Wong
` (28 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
We need to branch for non-empty `q=' parameters anyways, but
`q=' is usually empty/unset. While we're in the area, `chomp'
reads `$/' while `chop' is simpler. Furthermore, we can shave
a few bytes off the form HTML by omitting spaces before `/>'
and placing `\n' to wrap long lines before attribute names.
---
lib/PublicInbox/WwwListing.pm | 18 ++++++++----------
1 file changed, 8 insertions(+), 10 deletions(-)
diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm
index 0ab41452..35abf050 100644
--- a/lib/PublicInbox/WwwListing.pm
+++ b/lib/PublicInbox/WwwListing.pm
@@ -169,17 +169,15 @@ sub mset_nav_top {
my ($ctx, $mset) = @_;
my $q = $ctx->{-sq};
my $qh = $q->{'q'} // '';
- utf8::decode($qh);
- $qh = ascii_html($qh);
- $qh = qq[\nvalue="$qh"] if $qh ne '';
- my $rv = <<EOM;
-<form
-action="./"><pre><input name=q type=text$qh
-/><input type=submit value="locate inbox"
-/><input type=submit name=a value="search all inboxes"
-/></pre></form><pre>
+ if ($qh ne '') {
+ utf8::decode($qh);
+ $qh = qq[\nvalue="].ascii_html($qh).'"';
+ }
+ chop(my $rv = <<EOM);
+<form action="./"><pre><input name=q type=text$qh/><input
+type=submit value="locate inbox"/><input type=submit name=a
+value="search all inboxes"/></pre></form><pre>
EOM
- chomp $rv;
if (defined($q->{'q'})) {
my $initial_q = $ctx->{-uxs_retried};
if (defined $initial_q) {
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 10/38] www: viewdiff: use return value for diff_hunk
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (8 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 09/38] www_listing: avoid unnecessary work for common cases Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 11/38] view: simplify _parent_headers Eric Wong
` (27 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
It's only a short string, so there's not much copy overhead,
and it'll make future changes easier to reason about.
---
lib/PublicInbox/ViewDiff.pm | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index f16c7229..349ffec8 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -46,21 +46,21 @@ sub uri_escape_path {
}
# link to line numbers in blobs
-sub diff_hunk ($$$$) {
- my ($dst, $dctx, $ca, $cb) = @_;
+sub diff_hunk ($$$) {
+ my ($dctx, $ca, $cb) = @_;
my ($oid_a, $oid_b, $spfx) = @$dctx{qw(oid_a oid_b spfx)};
if (defined($spfx) && defined($oid_a) && defined($oid_b)) {
my ($n) = ($ca =~ /^-([0-9]+)/);
$n = defined($n) ? "#n$n" : '';
- $$dst .= qq(@@ <a\nhref="$spfx$oid_a/s/$dctx->{Q}$n">$ca</a>);
+ my $x = qq(@@ <a\nhref="$spfx$oid_a/s/$dctx->{Q}$n">$ca</a>);
($n) = ($cb =~ /^\+([0-9]+)/);
$n = defined($n) ? "#n$n" : '';
- $$dst .= qq( <a\nhref="$spfx$oid_b/s/$dctx->{Q}$n">$cb</a> @@);
+ $x .= qq( <a\nhref="$spfx$oid_b/s/$dctx->{Q}$n">$cb</a> @@);
} else {
- $$dst .= "@@ $ca $cb @@";
+ "@@ $ca $cb @@";
}
}
@@ -229,7 +229,7 @@ sub flush_diff ($$) {
$after .= $s;
} elsif ($s =~ s/\A@@ (\S+) (\S+) @@//) {
$$dst .= qq(<span\nclass="hunk">);
- diff_hunk($dst, $dctx, $1, $2);
+ $$dst .= diff_hunk($dctx, $1, $2);
$$dst .= $linkify->to_html($s);
$$dst .= '</span>';
} elsif ($s =~ /\A\+/) {
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 11/38] view: simplify _parent_headers
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (9 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 10/38] www: viewdiff: use return value for diff_hunk Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 12/38] view: eml_entry: reduce manipulation of ctx->{obuf} Eric Wong
` (26 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
Having References but lacking In-Reply-To is an uncommon case
with email, nowadays. So just rely on ->linkify_mids to handle
linkification and HTML escaping Furthermore, headers are short
enough to return as-is (and rely on CoW improvements in Perl
5.1x) since linkify_mids needs to operate on an independent
string, anyways.
---
lib/PublicInbox/View.pm | 39 ++++++++++-----------------------------
1 file changed, 10 insertions(+), 29 deletions(-)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index c6fd05cc..3980ed91 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -740,8 +740,7 @@ sub _msg_page_prepare_obuf {
$ctx->{-linkify}->linkify_mids('..', \$s, 1);
$rv .= $s;
}
- _parent_headers($ctx, $eml);
- $rv .= "\n";
+ $rv .= _parent_headers($ctx, $eml);
1;
}
@@ -795,32 +794,21 @@ sub _parent_headers {
my ($ctx, $hdr) = @_;
my @irt = $hdr->header_raw('In-Reply-To');
my $refs;
- if (@irt) {
- my $s = '';
- $s .= "In-Reply-To: $_\n" for @irt;
- $ctx->{-linkify}->linkify_mids('..', \$s);
- ${$ctx->{obuf}} .= $s;
- } else {
+ my $s = '';
+ if (!@irt) {
$refs = references($hdr);
- my $irt = pop @$refs;
- if (defined $irt) {
- my $html = ascii_html($irt);
- my $href = mid_href($irt);
- ${$ctx->{obuf}} .= <<EOM;
-In-Reply-To: <<a\nhref="../$href/">$html</a>>
-EOM
- }
+ $irt[0] = pop(@$refs) if scalar @$refs;
}
+ $s .= "In-Reply-To: $_\n" for @irt;
# do not display References: if search is present,
# we show the thread skeleton at the bottom, instead.
- return if $ctx->{ibx}->over;
-
- $refs //= references($hdr);
- if (@$refs) {
- $_ = linkify_ref_no_over($_) for @$refs;
- ${$ctx->{obuf}} .= 'References: '. join("\n\t", @$refs) . "\n";
+ if (!$ctx->{ibx}->over) {
+ $refs //= references($hdr);
+ $s .= 'References: <'.join(">\n\t<", @$refs).">\n" if @$refs;
}
+ $ctx->{-linkify}->linkify_mids('..', \$s); # escapes HTML
+ $s .= "\n";
}
# appends to obuf
@@ -901,13 +889,6 @@ EOF
$ctx->zmore($skel .= msg_reply($ctx, $hdr)); # flushes obuf
}
-sub linkify_ref_no_over {
- my ($mid) = @_;
- my $href = mid_href($mid);
- my $html = ascii_html($mid);
- "<<a\nhref=\"../$href/\">$html</a>>";
-}
-
sub ghost_parent {
my ($upfx, $mid) = @_;
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 12/38] view: eml_entry: reduce manipulation of ctx->{obuf}
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (10 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 11/38] view: simplify _parent_headers Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 13/38] gzip_filter: ->translate can reuse zmore/zflush Eric Wong
` (25 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
This is another step towards avoid unnecessary copies
and pad space waste.
---
lib/PublicInbox/View.pm | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 3980ed91..37b484ae 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -246,11 +246,11 @@ sub eml_entry {
$ctx->{mhref} = $mhref;
$ctx->{changed_href} = "#e$id"; # for diffstat "files? changed,"
$ctx->{obuf} = \$rv;
- $eml->each_part(\&add_text_body, $ctx, 1);
- delete $ctx->{obuf};
+ $eml->each_part(\&add_text_body, $ctx, 1); # expensive
+ $ctx->zmore; # TODO: remove once add_text_body is updated
# add the footer
- $rv .= "\n<a\nhref=#$id_m\nid=e$id>^</a> ".
+ $rv = "\n<a\nhref=#$id_m\nid=e$id>^</a> ".
"<a\nhref=\"$mhref\">permalink</a>" .
" <a\nhref=\"${mhref}raw\">raw</a>" .
" <a\nhref=\"${mhref}#R\">reply</a>";
@@ -388,7 +388,8 @@ sub pre_thread { # walk_thread callback
sub thread_eml_entry {
my ($ctx, $eml) = @_;
my ($beg, $end) = thread_adj_level($ctx, $ctx->{level});
- $beg . '<pre>' . eml_entry($ctx, $eml) . '</pre>' . $end;
+ $ctx->zmore($beg.'<pre>');
+ eml_entry($ctx, $eml) . '</pre>' . $end;
}
sub next_in_queue ($$) {
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 13/38] gzip_filter: ->translate can reuse zmore/zflush
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (11 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 12/38] view: eml_entry: reduce manipulation of ctx->{obuf} Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 14/38] view: remove multipart_text_as_html Eric Wong
` (24 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
We can work towards delaying zlib context allocations in future
commits, too.
---
lib/PublicInbox/GzipFilter.pm | 15 ++++-----------
1 file changed, 4 insertions(+), 11 deletions(-)
diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm
index 75ba625e..77d570b6 100644
--- a/lib/PublicInbox/GzipFilter.pm
+++ b/lib/PublicInbox/GzipFilter.pm
@@ -100,19 +100,12 @@ sub translate ($$) {
# allocate the zlib context lazily here, instead of in ->new.
# Deflate contexts are memory-intensive and this object may
# be sitting in the Qspawn limiter queue for a while.
- my $gz = $self->{gz} //= gzip_or_die();
- my $zbuf = delete($self->{zbuf});
+ $self->{gz} //= gzip_or_die();
if (defined $_[1]) { # my $buf = $_[1];
- my $err = $gz->deflate($_[1], $zbuf);
- die "gzip->deflate: $err" if $err != Z_OK;
- return $zbuf if length($zbuf) >= 8192;
-
- $self->{zbuf} = $zbuf;
- '';
+ zmore($self, $_[1]);
+ length($self->{zbuf}) >= 8192 ? delete($self->{zbuf}) : '';
} else { # undef == EOF
- my $err = $gz->flush($zbuf);
- die "gzip->flush: $err" if $err != Z_OK;
- $zbuf;
+ zflush($self);
}
}
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 14/38] view: remove multipart_text_as_html
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (12 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 13/38] gzip_filter: ->translate can reuse zmore/zflush Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 15/38] view: reduce subroutine calls for submsg_hdr Eric Wong
` (23 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
It seems like a pointless wrapper function that's not saving us
a whole lot. Drop some direct {obuf} manipulation while we're
at it.
---
lib/PublicInbox/View.pm | 15 ++++-----------
lib/PublicInbox/WwwAtomStream.pm | 5 ++---
2 files changed, 6 insertions(+), 14 deletions(-)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 37b484ae..0b67d92f 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -39,8 +39,8 @@ sub msg_page_i {
$ctx->{mhref} = ($ctx->{nr} || $ctx->{smsg}) ?
"../${\mid_href($smsg->{mid})}/" : '';
if (_msg_page_prepare_obuf($eml, $ctx)) {
- multipart_text_as_html($eml, $ctx);
- ${$ctx->{obuf}} .= '</pre><hr>';
+ $eml->each_part(\&add_text_body, $ctx, 1);
+ $ctx->zmore('</pre><hr>');
}
html_footer($ctx, $ctx->{first_hdr}) if !$ctx->{smsg};
delete($ctx->{obuf}) // \'';
@@ -57,8 +57,8 @@ sub no_over_html ($) {
$ctx->{mhref} = '';
PublicInbox::WwwStream::init($ctx);
if (_msg_page_prepare_obuf($eml, $ctx)) { # sets {-title_html}
- multipart_text_as_html($eml, $ctx);
- ${$ctx->{obuf}} .= '</pre><hr>';
+ $eml->each_part(\&add_text_body, $ctx, 1);
+ $ctx->zmore('</pre><hr>');
}
html_footer($ctx, $eml);
$ctx->html_done;
@@ -506,13 +506,6 @@ sub thread_html_i { # PublicInbox::WwwStream::getline callback
}
}
-sub multipart_text_as_html {
- # ($mime, $ctx) = @_; # each_part may do "$_[0] = undef"
-
- # scan through all parts, looking for displayable text
- $_[0]->each_part(\&add_text_body, $_[1], 1);
-}
-
sub submsg_hdr ($$) {
my ($ctx, $eml) = @_;
my $obfs_ibx = $ctx->{-obfs_ibx};
diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm
index 906b292a..09c79a8a 100644
--- a/lib/PublicInbox/WwwAtomStream.pm
+++ b/lib/PublicInbox/WwwAtomStream.pm
@@ -157,9 +157,8 @@ sub feed_entry {
$ctx->{obuf} = \$s;
$ctx->{mhref} = $href;
$ctx->{changed_href} = "${href}#related";
- PublicInbox::View::multipart_text_as_html($eml, $ctx);
- delete $ctx->{obuf};
- $s .= '</pre></div></content></entry>';
+ $eml->each_part(\&PublicInbox::View::add_text_body, $ctx, 1);
+ '</pre></div></content></entry>';
}
sub feed_updated {
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 15/38] view: reduce subroutine calls for submsg_hdr
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (13 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 14/38] view: remove multipart_text_as_html Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 16/38] view: attach_link: reduce obuf manipulation Eric Wong
` (22 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
Favor fewer, yet more expensive operations than many smaller
ones. While we're still directly manipulating ctx->{obuf} after
this, this change makes it easier for us to avoid doing so in
the future.
---
lib/PublicInbox/View.pm | 17 ++++++-----------
1 file changed, 6 insertions(+), 11 deletions(-)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 0b67d92f..1b55fe77 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -508,17 +508,12 @@ sub thread_html_i { # PublicInbox::WwwStream::getline callback
sub submsg_hdr ($$) {
my ($ctx, $eml) = @_;
- my $obfs_ibx = $ctx->{-obfs_ibx};
- my $rv = $ctx->{obuf};
- $$rv .= "\n";
+ my $s = "\n";
for my $h (qw(From To Cc Subject Date Message-ID X-Alt-Message-ID)) {
- my @v = $eml->header($h);
- for my $v (@v) {
- obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
- $v = ascii_html($v);
- $$rv .= "$h: $v\n";
- }
+ $s .= "$h: $_\n" for $eml->header($h);
}
+ obfuscate_addrs($ctx->{-obfs_ibx}, $s) if $ctx->{-obfs_ibx};
+ ascii_html($s);
}
sub attach_link ($$$$;$) {
@@ -559,7 +554,7 @@ EOF
$$rv .= ($desc eq '') ? "$ts --]" : "$desc --]\n[-- $ts --]";
$$rv .= "</a>\n";
- submsg_hdr($ctx, $part) if $part->{is_submsg};
+ $$rv .= submsg_hdr($ctx, $part) if $part->{is_submsg};
undef;
}
@@ -578,7 +573,7 @@ sub add_text_body { # callback for each_part
my $rv = $ctx->{obuf};
if ($part->{is_submsg}) {
- submsg_hdr($ctx, $part);
+ $$rv .= submsg_hdr($ctx, $part);
$$rv .= "\n";
}
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 16/38] view: attach_link: reduce obuf manipulation
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (14 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 15/38] view: reduce subroutine calls for submsg_hdr Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 17/38] viewdiff: reuse existing string in diff_before_or_after Eric Wong
` (21 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
This is another steep towards reducing the maximum size of
an obuf by eventually doing compression earlier while we
render messages as HTML.
And do some golfing while we're at it...
---
lib/PublicInbox/View.pm | 30 +++++++++++-------------------
1 file changed, 11 insertions(+), 19 deletions(-)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 1b55fe77..0fe645ca 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -524,7 +524,6 @@ sub attach_link ($$$$;$) {
# downloads for 0-byte multipart attachments
return unless $part->{bdy};
- my $nl = $idx eq '1' ? '' : "\n"; # like join("\n", ...)
my $size = length($part->body);
delete $part->{bdy}; # save memory
@@ -540,23 +539,17 @@ sub attach_link ($$$$;$) {
} else {
$sfn = 'a.bin';
}
- my $rv = $ctx->{obuf};
- $$rv .= qq($nl<a\nhref="$ctx->{mhref}$idx-$sfn">);
- if ($err) {
- $$rv .= <<EOF;
+ my $rv = $idx eq '1' ? '' : "\n"; # like join("\n", ...)
+ $rv .= qq(<a\nhref="$ctx->{mhref}$idx-$sfn">);
+ $rv .= <<EOF if $err;
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
EOF
- }
- $$rv .= "[-- Attachment #$idx: ";
- my $ts = "Type: $ct, Size: $size bytes";
+ $rv .= "[-- Attachment #$idx: ";
my $desc = $part->header('Content-Description') // $fn // '';
- $desc = ascii_html($desc);
- $$rv .= ($desc eq '') ? "$ts --]" : "$desc --]\n[-- $ts --]";
- $$rv .= "</a>\n";
-
- $$rv .= submsg_hdr($ctx, $part) if $part->{is_submsg};
-
- undef;
+ $rv .= ascii_html($desc)." --]\n[-- " if $desc ne '';
+ $rv .= "Type: $ct, Size: $size bytes --]</a>\n";
+ $rv .= submsg_hdr($ctx, $part) if $part->{is_submsg};
+ $rv;
}
sub add_text_body { # callback for each_part
@@ -568,10 +561,9 @@ sub add_text_body { # callback for each_part
my ($part, $depth, $idx) = @$p;
my $ct = $part->content_type || 'text/plain';
my $fn = $part->filename;
- my ($s, $err) = msg_part_text($part, $ct);
- return attach_link($ctx, $ct, $p, $fn) unless defined $s;
-
my $rv = $ctx->{obuf};
+ my ($s, $err) = msg_part_text($part, $ct);
+ $s // return $$rv .= (attach_link($ctx, $ct, $p, $fn) // '');
if ($part->{is_submsg}) {
$$rv .= submsg_hdr($ctx, $part);
$$rv .= "\n";
@@ -623,7 +615,7 @@ sub add_text_body { # callback for each_part
undef $s; # free memory
if (defined($fn) || ($depth > 0 && !$part->{is_submsg}) || $err) {
# badly-encoded message with $err? tell the world about it!
- attach_link($ctx, $ct, $p, $fn, $err);
+ $$rv .= attach_link($ctx, $ct, $p, $fn, $err);
$$rv .= "\n";
}
delete $part->{bdy}; # save memory
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 17/38] viewdiff: reuse existing string in diff_before_or_after
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (15 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 16/38] view: attach_link: reduce obuf manipulation Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 18/38] view: _th_index_lite: avoid one s///, improve symmetry Eric Wong
` (20 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
Instead of appending to an ever-growing {obuf}, we'll reuse
the existing string (which already has pre-allocated memory).
---
lib/PublicInbox/ViewDiff.pm | 24 +++++++++++-------------
1 file changed, 11 insertions(+), 13 deletions(-)
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index 349ffec8..b9194ed4 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -168,28 +168,26 @@ sub diff_header ($$$) {
sub diff_before_or_after ($$) {
my ($ctx, $x) = @_;
- my $linkify = $ctx->{-linkify};
- my $dst = $ctx->{obuf};
if (exists $ctx->{-anchors} && $$x =~ /\A(.*?) # likely "---\n"
# diffstat lines:
((?:^\x20(?:[^\n]+?)(?:\x20+\|\x20[^\n]*\n))+)
(\x20[0-9]+\x20files?\x20)changed,([^\n]+\n)
(.*?)\z/msx) { # notes, commit message, etc
- undef $$x;
my @x = ($5, $4, $3, $2, $1);
- $$dst .= $linkify->to_html(pop @x); # uninteresting prefix
+ my $lnk = $ctx->{-linkify};
+ $$x = $lnk->to_html(pop @x); # uninteresting prefix
for my $l (split(/^/m, pop(@x))) { # per-file diffstat lines
$l =~ /^ (.+)( +\| .*\z)/s and
- anchor0($dst, $ctx, $1, $2) and next;
- $$dst .= $linkify->to_html($l);
+ anchor0($x, $ctx, $1, $2) and next;
+ $$x .= $lnk->to_html($l);
}
- $$dst .= $x[2]; # $3 /^ \d+ files? /
+ $$x .= pop @x; # $3 /^ \d+ files? /
my $ch = $ctx->{changed_href} // '#related';
- $$dst .= qq(<a href="$ch">changed</a>,);
- $$dst .= ascii_html($x[1]); # $4: insertions/deletions
- $$dst .= $linkify->to_html($x[0]); # notes, commit message, etc
+ $$x .= qq(<a href="$ch">changed</a>,);
+ $$x .= ascii_html(pop @x); # $4: insertions/deletions
+ $$x .= $lnk->to_html(pop @x); # notes, commit message, etc
} else {
- $$dst .= $linkify->to_html($$x);
+ $ctx->{-linkify}->to_html($$x);
}
}
@@ -247,9 +245,9 @@ sub flush_diff ($$) {
$$dst .= $linkify->to_html($s);
}
}
- diff_before_or_after($ctx, \$after) unless $dctx;
+ $$dst .= diff_before_or_after($ctx, \$after) if !$dctx;
} else {
- diff_before_or_after($ctx, \$x);
+ $$dst .= diff_before_or_after($ctx, \$x);
}
}
}
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 18/38] view: _th_index_lite: avoid one s///, improve symmetry
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (16 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 17/38] viewdiff: reuse existing string in diff_before_or_after Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 19/38] view: _th_index_lite: use `//' defined-or op Eric Wong
` (19 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
We can replace an expensive `s///' substitution with a simpler
`chop'. Furthermore, we can delay the "</b>\n" replacement
to ensure it's on the same line of Perl code as the `<b>'
opening tag for readability.
---
lib/PublicInbox/View.pm | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 0fe645ca..3282d4f9 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -335,10 +335,10 @@ sub _th_index_lite {
}
my $s_s = nr_to_s($nr_s, 'sibling', 'siblings');
my $s_c = nr_to_s($nr_c, 'reply', 'replies');
- $attr =~ s!\n\z!</b>\n!s;
+ chop $attr; # remove "\n"
$attr =~ s!<a\nhref.*</a> (?:" )?!!s; # no point in dup subject
$attr =~ s!<a\nhref=[^>]+>([^<]+)</a>!$1!s; # no point linking to self
- $rv .= "<b>@ $attr";
+ $rv .= "<b>@ $attr</b>\n";
if ($nr_c) {
my $cmid = $children->[0] ? $children->[0]->{mid} : undef;
$rv .= $pad . _skel_hdr($mapping, $cmid);
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 19/38] view: _th_index_lite: use `//' defined-or op
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (17 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 18/38] view: _th_index_lite: avoid one s///, improve symmetry Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 20/38] view: reduce ascii_html calls and {obuf} use Eric Wong
` (18 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
Just something I noticed while evaluating this subroutine
for the buffering overhaul.
---
lib/PublicInbox/View.pm | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 3282d4f9..08ba54bb 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -302,8 +302,7 @@ sub _th_index_lite {
my $rv = '';
my $mapping = $ctx->{mapping} or return $rv;
my $pad = ' ';
- my $mid_map = $mapping->{$mid_raw};
- defined $mid_map or
+ my $mid_map = $mapping->{$mid_raw} //
return 'public-inbox BUG: '.ascii_html($mid_raw).' not mapped';
my ($attr, $node, $idx, $level) = @$mid_map;
my $children = $node->{children};
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 20/38] view: reduce ascii_html calls and {obuf} use
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (18 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 19/38] view: _th_index_lite: use `//' defined-or op Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 21/38] view: html_footer: golf out a few lines Eric Wong
` (17 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
We can rely on {-html_tip} for some things at the top of the
page, and reduce ascii_html and obfuscate_addrs calls by
working on the whole buffer at once.
---
lib/PublicInbox/View.pm | 127 +++++++++++++++++-----------------------
t/psgi_v2.t | 4 +-
2 files changed, 58 insertions(+), 73 deletions(-)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 08ba54bb..52d37a9f 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -38,7 +38,7 @@ sub msg_page_i {
: $ctx->gone('over');
$ctx->{mhref} = ($ctx->{nr} || $ctx->{smsg}) ?
"../${\mid_href($smsg->{mid})}/" : '';
- if (_msg_page_prepare_obuf($eml, $ctx)) {
+ if (_msg_page_prepare($eml, $ctx)) {
$eml->each_part(\&add_text_body, $ctx, 1);
$ctx->zmore('</pre><hr>');
}
@@ -56,7 +56,7 @@ sub no_over_html ($) {
my $eml = PublicInbox::Eml->new($bref);
$ctx->{mhref} = '';
PublicInbox::WwwStream::init($ctx);
- if (_msg_page_prepare_obuf($eml, $ctx)) { # sets {-title_html}
+ if (_msg_page_prepare($eml, $ctx)) { # sets {-title_html}
$eml->each_part(\&add_text_body, $ctx, 1);
$ctx->zmore('</pre><hr>');
}
@@ -635,11 +635,9 @@ sub add_text_body { # callback for each_part
}
}
-sub _msg_page_prepare_obuf {
+sub _msg_page_prepare {
my ($eml, $ctx) = @_;
my $have_over = !!$ctx->{ibx}->over;
- my $obfs_ibx = $ctx->{-obfs_ibx};
- $ctx->{obuf} = \(my $rv = '');
my $mids = mids_for_index($eml);
my $nr = $ctx->{nr}++;
if ($nr) { # unlikely
@@ -647,80 +645,86 @@ sub _msg_page_prepare_obuf {
warn "W: BUG? @$mids not deduplicated properly\n";
return;
}
- $rv .=
+ $ctx->{-html_tip} =
"<pre>WARNING: multiple messages have this Message-ID\n</pre><pre>";
} else {
$ctx->{first_hdr} = $eml->header_obj;
$ctx->{chash} = content_hash($eml) if $ctx->{smsg}; # reused MID
- $rv .= "<pre\nid=b>"; # anchor for body start
+ $ctx->{-html_tip} = "<pre\nid=b>"; # anchor for body start
}
$ctx->{-upfx} = '../';
my @title; # (Subject[0], From[0])
+ my $hbuf = '';
for my $v ($eml->header('From')) {
my @n = PublicInbox::Address::names($v);
- $v = ascii_html($v);
- $title[1] //= ascii_html(join(', ', @n));
- if ($obfs_ibx) {
- obfuscate_addrs($obfs_ibx, $v);
- obfuscate_addrs($obfs_ibx, $title[1]);
- }
- $rv .= "From: $v\n" if $v ne '';
+ $title[1] //= join(', ', @n);
+ $hbuf .= "From: $v\n" if $v ne '';
}
- foreach my $h (qw(To Cc)) {
+ for my $h (qw(To Cc)) {
for my $v ($eml->header($h)) {
fold_addresses($v);
- $v = ascii_html($v);
- obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
- $rv .= "$h: $v\n" if $v ne '';
+ $hbuf .= "$h: $v\n" if $v ne '';
}
}
my @subj = $eml->header('Subject');
- if (@subj) {
- my $v = ascii_html(shift @subj);
- obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
- $rv .= 'Subject: ';
- $rv .= $have_over ? qq(<a\nhref="#r"\nid=t>$v</a>\n) : "$v\n";
- $title[0] = $v;
- for $v (@subj) { # multi-Subject message :<
- $v = ascii_html($v);
- obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
- $rv .= "Subject: $v\n";
- }
- } else { # dummy anchor for thread skeleton at bottom of page
- $rv .= qq(<a\nhref="#r"\nid=t></a>) if $have_over;
- $title[0] = '(no subject)';
- }
- for my $v ($eml->header('Date')) {
- $v = ascii_html($v);
- obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx; # possible :P
- $rv .= qq{Date: $v\n};
+ $hbuf .= "Subject: $_\n" for @subj;
+ $title[0] = $subj[0] // '(no subject)';
+ $hbuf .= "Date: $_\n" for $eml->header('Date');
+ $hbuf = ascii_html($hbuf);
+ $ctx->{-title_html} = ascii_html(join(' - ', @title));
+ if (my $obfs_ibx = $ctx->{-obfs_ibx}) {
+ obfuscate_addrs($obfs_ibx, $hbuf);
+ obfuscate_addrs($obfs_ibx, $ctx->{-title_html});
}
+
# [thread overview] link is typically added after Date,
# but added after Subject, or even nothing.
if ($have_over) {
- chop $rv; # drop "\n", or noop if $rv eq ''
- $rv .= qq{\t<a\nhref="#r">[thread overview]</a>\n};
+ chop $hbuf; # drop "\n", or noop if $rv eq ''
+ $hbuf .= qq{\t<a\nhref="#r">[thread overview]</a>\n};
+ $hbuf =~ s!^Subject:\x20(.*?)(\n[A-Z]|\z)
+ !Subject: <a\nhref="#r"\nid=t>$1</a>$2!msx or
+ $hbuf .= qq(<a\nhref="#r\nid=t></a>);
+ }
+ if (scalar(@$mids) == 1) { # common case
+ my $x = ascii_html($mids->[0]);
+ $hbuf .= qq[Message-ID: <$x> (<a href="raw">raw</a>)\n];
}
if (!$nr) { # first (and only) message, common case
- $ctx->{-title_html} = join(' - ', @title);
- $rv = $ctx->html_top . $rv;
+ $ctx->zmore($ctx->html_top, $hbuf);
+ } else {
+ delete $ctx->{-title_html};
+ $ctx->zmore($ctx->{-html_tip}, $hbuf);
}
-
$ctx->{-linkify} //= PublicInbox::Linkify->new;
- if (scalar(@$mids) == 1) { # common case
- my $mhtml = ascii_html($mids->[0]);
- $rv .= qq[Message-ID: <$mhtml> (<a href="raw">raw</a>)\n];
- } else {
+ $hbuf = '';
+ if (scalar(@$mids) != 1) { # unlikely, but it happens :<
# X-Alt-Message-ID can happen if a message is injected from
# public-inbox-nntpd because of multiple Message-ID headers.
- my $s = '';
for my $h (qw(Message-ID X-Alt-Message-ID)) {
- $s .= "$h: $_\n" for ($eml->header_raw($h));
+ $hbuf .= "$h: $_\n" for ($eml->header_raw($h));
}
- $ctx->{-linkify}->linkify_mids('..', \$s, 1);
- $rv .= $s;
+ $ctx->{-linkify}->linkify_mids('..', \$hbuf, 1); # escapes HTML
+ $ctx->zmore($hbuf);
+ $hbuf = '';
+ }
+ my @irt = $eml->header_raw('In-Reply-To');
+ my $refs;
+ if (!@irt) {
+ $refs = references($eml);
+ $irt[0] = pop(@$refs) if scalar @$refs;
}
- $rv .= _parent_headers($ctx, $eml);
+ $hbuf .= "In-Reply-To: $_\n" for @irt;
+
+ # do not display References: if search is present,
+ # we show the thread skeleton at the bottom, instead.
+ if (!$have_over) {
+ $refs //= references($eml);
+ $hbuf .= 'References: <'.join(">\n\t<", @$refs).">\n" if @$refs;
+ }
+ $ctx->{-linkify}->linkify_mids('..', \$hbuf); # escapes HTML
+ $ctx->zmore($hbuf .= "\n");
+ ${$ctx->{obuf}} = ''; # TODO remove
1;
}
@@ -770,27 +774,6 @@ sub thread_skel ($$$) {
$ctx->{parent_msg} = $parent;
}
-sub _parent_headers {
- my ($ctx, $hdr) = @_;
- my @irt = $hdr->header_raw('In-Reply-To');
- my $refs;
- my $s = '';
- if (!@irt) {
- $refs = references($hdr);
- $irt[0] = pop(@$refs) if scalar @$refs;
- }
- $s .= "In-Reply-To: $_\n" for @irt;
-
- # do not display References: if search is present,
- # we show the thread skeleton at the bottom, instead.
- if (!$ctx->{ibx}->over) {
- $refs //= references($hdr);
- $s .= 'References: <'.join(">\n\t<", @$refs).">\n" if @$refs;
- }
- $ctx->{-linkify}->linkify_mids('..', \$s); # escapes HTML
- $s .= "\n";
-}
-
# appends to obuf
sub html_footer {
my ($ctx, $hdr) = @_;
diff --git a/t/psgi_v2.t b/t/psgi_v2.t
index 7d73b606..6b1b3a39 100644
--- a/t/psgi_v2.t
+++ b/t/psgi_v2.t
@@ -1,5 +1,5 @@
#!perl -w
-# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use v5.10.1;
@@ -209,6 +209,8 @@ my $client1 = sub {
local $SIG{__WARN__} = 'DEFAULT';
$res = $cb->(GET('/v2test/a-mid@b/'));
$raw = $res->content;
+ like($raw, qr/WARNING: multiple messages have this Message-ID/,
+ 'warned about duplicate Message-IDs');
like($raw, qr/^hello world$/m, 'got first message');
like($raw, qr/^hello world!$/m, 'got second message');
like($raw, qr/^hello ghosts$/m, 'got third message');
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 21/38] view: html_footer: golf out a few lines
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (19 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 20/38] view: reduce ascii_html calls and {obuf} use Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 22/38] view: html_footer: remove obuf dependency Eric Wong
` (16 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
We can build `$u' in one line, and drop an unnecessary empty
line to reduce the amount of scrolling required to read this
sub.
---
lib/PublicInbox/View.pm | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 52d37a9f..b27523b2 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -811,7 +811,6 @@ ${fallback}other threads:[<a
href="$upfx?t=$t">~$t_fmt UTC</a>|<a
href="$upfx">newest</a>]
EOF
-
thread_skel(\$skel, $ctx, $hdr);
my ($next, $prev);
my $parent = ' ';
@@ -821,12 +820,8 @@ EOF
$n = mid_href($n);
$next = "<a\nhref=\"$upfx$n/\"\nrel=next>next</a>";
}
- my $u;
my $par = $ctx->{parent_msg};
- if ($par) {
- $u = mid_href($par);
- $u = "$upfx$u/";
- }
+ my $u = $par ? $upfx.mid_href($par).'/' : undef;
if (my $p = $ctx->{prev_msg}) {
$prev = mid_href($p);
if ($p && $par && $p eq $par) {
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 22/38] view: html_footer: remove obuf dependency
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (20 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 21/38] view: html_footer: golf out a few lines Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 23/38] view: html_footer: avoid escaping " in a few places Eric Wong
` (15 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
Another step towards giving us more options for speedups and
memory reductions.
---
lib/PublicInbox/View.pm | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index b27523b2..40b4bf36 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -774,11 +774,12 @@ sub thread_skel ($$$) {
$ctx->{parent_msg} = $parent;
}
-# appends to obuf
+# writes to zbuf
sub html_footer {
my ($ctx, $hdr) = @_;
my $upfx = '../';
my ($related, $skel);
+ my $foot = '<pre>';
my $qry = delete $ctx->{-qry};
if ($qry && $ctx->{ibx}->isrch) {
my $q = ''; # search for either ancestor or descendent patches
@@ -836,15 +837,14 @@ EOF
} elsif ($u) { # unlikely
$parent = " <a\nhref=\"$u\"\nrel=prev>parent</a>";
}
- ${$ctx->{obuf}} .= "<pre>$next $prev$parent ";
+ $foot .= "$next $prev$parent ";
} else { # unindexed inboxes w/o over
- ${$ctx->{obuf}} .= '<pre>';
$skel = qq( <a\nhref="$upfx">latest</a>);
}
- ${$ctx->{obuf}} .= qq(<a\nhref="#R">reply</a>);
- # $skel may be big for big threads, don't append it to obuf
+ $foot .= qq(<a\nhref="#R">reply</a>);
+ # $skel may be big for big threads, don't append it to $foot
$skel .= '</pre>' . ($related // '');
- $ctx->zmore($skel .= msg_reply($ctx, $hdr)); # flushes obuf
+ $ctx->zmore($foot, $skel .= msg_reply($ctx, $hdr)); # flushes obuf
}
sub ghost_parent {
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 23/38] view: html_footer: avoid escaping " in a few places
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (21 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 22/38] view: html_footer: remove obuf dependency Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 24/38] viewdiff: diff_hunk: shorten conditionals, slightly Eric Wong
` (14 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
qq() is a nice alternative to "" when there's embedded "
characters in HTML entities.
---
lib/PublicInbox/View.pm | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 40b4bf36..2f99179e 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -819,23 +819,23 @@ EOF
if (my $n = $ctx->{next_msg}) {
$n = mid_href($n);
- $next = "<a\nhref=\"$upfx$n/\"\nrel=next>next</a>";
+ $next = qq(<a\nhref="$upfx$n/"\nrel=next>next</a>);
}
my $par = $ctx->{parent_msg};
my $u = $par ? $upfx.mid_href($par).'/' : undef;
if (my $p = $ctx->{prev_msg}) {
$prev = mid_href($p);
if ($p && $par && $p eq $par) {
- $prev = "<a\nhref=\"$upfx$prev/\"\n" .
+ $prev = qq(<a\nhref="$upfx$prev/"\n) .
'rel=prev>prev parent</a>';
$parent = '';
} else {
- $prev = "<a\nhref=\"$upfx$prev/\"\n" .
+ $prev = qq(<a\nhref="$upfx$prev/"\n) .
'rel=prev>prev</a>';
- $parent = " <a\nhref=\"$u\">parent</a>" if $u;
+ $parent = qq( <a\nhref="$u">parent</a>) if $u;
}
} elsif ($u) { # unlikely
- $parent = " <a\nhref=\"$u\"\nrel=prev>parent</a>";
+ $parent = qq( <a\nhref="$u"\nrel=prev>parent</a>);
}
$foot .= "$next $prev$parent ";
} else { # unindexed inboxes w/o over
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 24/38] viewdiff: diff_hunk: shorten conditionals, slightly
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (22 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 23/38] view: html_footer: avoid escaping " in a few places Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 25/38] view: switch a few things to ctx->zmore Eric Wong
` (13 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
I'm not sure if Devel::Size::total_size can be trusted due
to the regexps and crashes[1], but when it works, it's showing
around a 900 byte size reduction, too.
[1] https://rt.cpan.org/Public/Bug/Display.html?id=96421
---
lib/PublicInbox/ViewDiff.pm | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index b9194ed4..076aa1af 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -51,13 +51,10 @@ sub diff_hunk ($$$) {
my ($oid_a, $oid_b, $spfx) = @$dctx{qw(oid_a oid_b spfx)};
if (defined($spfx) && defined($oid_a) && defined($oid_b)) {
- my ($n) = ($ca =~ /^-([0-9]+)/);
- $n = defined($n) ? "#n$n" : '';
-
+ my $n = ($ca =~ /^-([0-9]+)/) ? "#n$1" : '';
my $x = qq(@@ <a\nhref="$spfx$oid_a/s/$dctx->{Q}$n">$ca</a>);
- ($n) = ($cb =~ /^\+([0-9]+)/);
- $n = defined($n) ? "#n$n" : '';
+ $n = ($cb =~ /^\+([0-9]+)/) ? "#n$1" : '';
$x .= qq( <a\nhref="$spfx$oid_b/s/$dctx->{Q}$n">$cb</a> @@);
} else {
"@@ $ca $cb @@";
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 25/38] view: switch a few things to ctx->zmore
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (23 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 24/38] viewdiff: diff_hunk: shorten conditionals, slightly Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 26/38] www: drop {obuf} use entirely, for now Eric Wong
` (12 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
Unfortunately, this is actually slower. However, this
hopefully makes it easier to improve the internals and
make performance improvements down the line.
---
lib/PublicInbox/View.pm | 14 +++++-------
lib/PublicInbox/ViewDiff.pm | 45 +++++++++++++++++--------------------
2 files changed, 27 insertions(+), 32 deletions(-)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 2f99179e..3dbf8bac 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -560,7 +560,7 @@ sub add_text_body { # callback for each_part
my ($part, $depth, $idx) = @$p;
my $ct = $part->content_type || 'text/plain';
my $fn = $part->filename;
- my $rv = $ctx->{obuf};
+ my $rv = $ctx->{obuf} //= \(my $obuf = '');
my ($s, $err) = msg_part_text($part, $ct);
$s // return $$rv .= (attach_link($ctx, $ct, $p, $fn) // '');
if ($part->{is_submsg}) {
@@ -618,18 +618,16 @@ sub add_text_body { # callback for each_part
$$rv .= "\n";
}
delete $part->{bdy}; # save memory
- foreach my $cur (@sections) {
+ for my $cur (@sections) { # $cur may be huge
if ($cur =~ /\A>/) {
# we use a <span> here to allow users to specify
# their own color for quoted text
- $$rv .= qq(<span\nclass="q">);
- $$rv .= $l->to_html($cur);
- $$rv .= '</span>';
+ $ctx->zmore(qq(<span\nclass="q">),
+ $l->to_html($cur), '</span>');
} elsif ($diff) {
flush_diff($ctx, \$cur);
- } else {
- # regular lines, OK
- $$rv .= $l->to_html($cur);
+ } else { # regular lines, OK
+ $ctx->zmore($l->to_html($cur));
}
undef $cur; # free memory
}
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index 076aa1af..36601910 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -156,10 +156,7 @@ sub diff_header ($$$) {
warn "BUG? <$$x> had no ^index line";
}
$$x =~ s!^diff --git!anchor1($ctx, $pb) // 'diff --git'!ems;
- my $dst = $ctx->{obuf};
- $$dst .= qq(<span\nclass="head">);
- $$dst .= $$x;
- $$dst .= '</span>';
+ $ctx->zmore(qq(<span\nclass="head">$$x</span>));
$dctx;
}
@@ -182,9 +179,10 @@ sub diff_before_or_after ($$) {
my $ch = $ctx->{changed_href} // '#related';
$$x .= qq(<a href="$ch">changed</a>,);
$$x .= ascii_html(pop @x); # $4: insertions/deletions
- $$x .= $lnk->to_html(pop @x); # notes, commit message, etc
+ # notes, commit message, etc
+ $ctx->zmore($$x .= $lnk->to_html(pop @x));
} else {
- $ctx->{-linkify}->to_html($$x);
+ $ctx->zmore($ctx->{-linkify}->to_html($$x));
}
}
@@ -195,8 +193,7 @@ sub flush_diff ($$) {
my @top = split($EXTRACT_DIFFS, $$cur);
undef $$cur; # free memory
- my $linkify = $ctx->{-linkify};
- my $dst = $ctx->{obuf};
+ my $lnk = $ctx->{-linkify};
my $dctx; # {}, keys: Q, oid_a, oid_b
while (defined(my $x = shift @top)) {
@@ -223,28 +220,28 @@ sub flush_diff ($$) {
if (!defined($dctx)) {
$after .= $s;
} elsif ($s =~ s/\A@@ (\S+) (\S+) @@//) {
- $$dst .= qq(<span\nclass="hunk">);
- $$dst .= diff_hunk($dctx, $1, $2);
- $$dst .= $linkify->to_html($s);
- $$dst .= '</span>';
- } elsif ($s =~ /\A\+/) {
- $$dst .= qq(<span\nclass="add">);
- $$dst .= $linkify->to_html($s);
- $$dst .= '</span>';
+ $ctx->zmore(qq(<span\nclass="hunk">) .
+ diff_hunk($dctx, $1, $2) .
+ $lnk->to_html($s) .
+ '</span>');
+ } elsif ($s =~ /\A\+/) { # $s may be huge
+ $ctx->zmore(qq(<span\nclass="add">),
+ $lnk->to_html($s),
+ '</span>');
} elsif ($s =~ /\A-- $/sm) { # email sig starts
$dctx = undef;
$after .= $s;
- } elsif ($s =~ /\A-/) {
- $$dst .= qq(<span\nclass="del">);
- $$dst .= $linkify->to_html($s);
- $$dst .= '</span>';
- } else {
- $$dst .= $linkify->to_html($s);
+ } elsif ($s =~ /\A-/) { # $s may be huge
+ $ctx->zmore(qq(<span\nclass="del">),
+ $lnk->to_html($s),
+ '</span>');
+ } else { # $s may be huge
+ $ctx->zmore($lnk->to_html($s));
}
}
- $$dst .= diff_before_or_after($ctx, \$after) if !$dctx;
+ diff_before_or_after($ctx, \$after) if !$dctx;
} else {
- $$dst .= diff_before_or_after($ctx, \$x);
+ diff_before_or_after($ctx, \$x);
}
}
}
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 26/38] www: drop {obuf} use entirely, for now
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (24 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 25/38] view: switch a few things to ctx->zmore Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 27/38] www: switch to zadd for the majority of buffering Eric Wong
` (11 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
This may help us identify hot spots and reduce pad space
as needed.
---
lib/PublicInbox/GzipFilter.pm | 13 ++++++-------
lib/PublicInbox/View.pm | 21 ++++++++-------------
lib/PublicInbox/ViewVCS.pm | 3 ++-
lib/PublicInbox/WwwAtomStream.pm | 8 ++++----
4 files changed, 20 insertions(+), 25 deletions(-)
diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm
index 77d570b6..eb0046ce 100644
--- a/lib/PublicInbox/GzipFilter.pm
+++ b/lib/PublicInbox/GzipFilter.pm
@@ -130,11 +130,11 @@ sub write {
# similar to ->translate; use this when we're sure we know we have
# more data to buffer after this
sub zmore {
- my $self = $_[0]; # $_[1] => input
+ my $self = shift; # $_[1] => input
http_out($self);
my $err;
- for (delete $self->{obuf}, @_[1..$#_]) {
- $err = $self->{gz}->deflate($_ // next, $self->{zbuf});
+ for (@_) {
+ $err = $self->{gz}->deflate($_, $self->{zbuf});
die "gzip->deflate: $err" if $err != Z_OK;
}
undef;
@@ -142,13 +142,12 @@ sub zmore {
# flushes and returns the final bit of gzipped data
sub zflush ($;@) {
- my $self = $_[0]; # $_[1..Inf] => final input (optional)
+ my $self = shift; # $_[1..Inf] => final input (optional)
my $zbuf = delete $self->{zbuf};
my $gz = delete $self->{gz};
my $err;
- # it's a bug iff $gz is undef w/ $obuf or $_[1..]
- for (delete $self->{obuf}, @_[1..$#_]) {
- $err = $gz->deflate($_ // next, $zbuf);
+ for (@_) { # it's a bug iff $gz is undef if @_ isn't empty, here:
+ $err = $gz->deflate($_, $zbuf);
die "gzip->deflate: $err" if $err != Z_OK;
}
$gz // return ''; # not a bug, recursing on DS->write failure
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 3dbf8bac..630f1e42 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -43,7 +43,7 @@ sub msg_page_i {
$ctx->zmore('</pre><hr>');
}
html_footer($ctx, $ctx->{first_hdr}) if !$ctx->{smsg};
- delete($ctx->{obuf}) // \'';
+ \''; # XXX TODO cleanup
} else { # called by WwwStream::async_next or getline
$ctx->{smsg}; # may be undef
}
@@ -245,9 +245,8 @@ sub eml_entry {
# scan through all parts, looking for displayable text
$ctx->{mhref} = $mhref;
$ctx->{changed_href} = "#e$id"; # for diffstat "files? changed,"
- $ctx->{obuf} = \$rv;
+ $ctx->zmore($rv); # XXX $rv is small, reuse below
$eml->each_part(\&add_text_body, $ctx, 1); # expensive
- $ctx->zmore; # TODO: remove once add_text_body is updated
# add the footer
$rv = "\n<a\nhref=#$id_m\nid=e$id>^</a> ".
@@ -560,13 +559,9 @@ sub add_text_body { # callback for each_part
my ($part, $depth, $idx) = @$p;
my $ct = $part->content_type || 'text/plain';
my $fn = $part->filename;
- my $rv = $ctx->{obuf} //= \(my $obuf = '');
my ($s, $err) = msg_part_text($part, $ct);
- $s // return $$rv .= (attach_link($ctx, $ct, $p, $fn) // '');
- if ($part->{is_submsg}) {
- $$rv .= submsg_hdr($ctx, $part);
- $$rv .= "\n";
- }
+ $s // return $ctx->zmore(attach_link($ctx, $ct, $p, $fn) // '');
+ my $buf = $part->{is_submsg} ? submsg_hdr($ctx, $part)."\n" : '';
# makes no difference to browsers, and don't screw up filename
# link generation in diffs with the extra '%0D'
@@ -614,10 +609,11 @@ sub add_text_body { # callback for each_part
undef $s; # free memory
if (defined($fn) || ($depth > 0 && !$part->{is_submsg}) || $err) {
# badly-encoded message with $err? tell the world about it!
- $$rv .= attach_link($ctx, $ct, $p, $fn, $err);
- $$rv .= "\n";
+ $buf .= attach_link($ctx, $ct, $p, $fn, $err) . "\n";
}
delete $part->{bdy}; # save memory
+ $ctx->zmore($buf);
+ undef $buf;
for my $cur (@sections) { # $cur may be huge
if ($cur =~ /\A>/) {
# we use a <span> here to allow users to specify
@@ -722,7 +718,6 @@ sub _msg_page_prepare {
}
$ctx->{-linkify}->linkify_mids('..', \$hbuf); # escapes HTML
$ctx->zmore($hbuf .= "\n");
- ${$ctx->{obuf}} = ''; # TODO remove
1;
}
@@ -842,7 +837,7 @@ EOF
$foot .= qq(<a\nhref="#R">reply</a>);
# $skel may be big for big threads, don't append it to $foot
$skel .= '</pre>' . ($related // '');
- $ctx->zmore($foot, $skel .= msg_reply($ctx, $hdr)); # flushes obuf
+ $ctx->zmore($foot, $skel .= msg_reply($ctx, $hdr));
}
sub ghost_parent {
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index d3ac1a7d..57ab378d 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -205,7 +205,8 @@ EOM
$ctx->zmore("---\n patch is too large to show\n");
} else { # prepare flush_diff:
read($fh, $x, -s _);
- $ctx->{obuf} = \$bdy;
+ $ctx->zmore($bdy);
+ undef $bdy;
$ctx->{-apfx} = $ctx->{-spfx} = $upfx;
$x =~ s/\r?\n/\n/gs;
$ctx->{-anchors} = {} if $x =~ /^diff --git /sm;
diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm
index 09c79a8a..cdfbf393 100644
--- a/lib/PublicInbox/WwwAtomStream.pm
+++ b/lib/PublicInbox/WwwAtomStream.pm
@@ -146,15 +146,15 @@ sub feed_entry {
my $name = ascii_html(join(', ', PublicInbox::Address::names($from)));
$email = ascii_html($email // $ctx->{ibx}->{-primary_address});
- my $s = delete($ctx->{emit_header}) ? atom_header($ctx, $title) : '';
- $s .= "<entry><author><name>$name</name><email>$email</email>" .
+ $ctx->zmore(
+ (delete($ctx->{emit_header}) ? atom_header($ctx, $title) : '').
+ "<entry><author><name>$name</name><email>$email</email>" .
"</author>$title$updated" .
qq(<link\nhref="$href"/>).
"<id>$uuid</id>$irt" .
qq{<content\ntype="xhtml">} .
qq{<div\nxmlns="http://www.w3.org/1999/xhtml">} .
- qq(<pre\nstyle="white-space:pre-wrap">);
- $ctx->{obuf} = \$s;
+ qq(<pre\nstyle="white-space:pre-wrap">));
$ctx->{mhref} = $href;
$ctx->{changed_href} = "${href}#related";
$eml->each_part(\&PublicInbox::View::add_text_body, $ctx, 1);
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 27/38] www: switch to zadd for the majority of buffering
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (25 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 26/38] www: drop {obuf} use entirely, for now Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 28/38] www: use PerlIO::scalar (zfh) for buffering Eric Wong
` (10 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
This allows us to focus string concatenations in one place to
allow Perl internal scratchpad optimizations to reuse memory.
Calling Compress::Raw::Zlib::deflate repeatedly proves too
expensive in terms of CPU cycles.
---
lib/PublicInbox/GzipFilter.pm | 22 +++++++++++-------
lib/PublicInbox/Mbox.pm | 2 +-
lib/PublicInbox/SearchView.pm | 2 +-
lib/PublicInbox/View.pm | 40 ++++++++++++++++----------------
lib/PublicInbox/ViewDiff.pm | 14 +++++------
lib/PublicInbox/WwwAtomStream.pm | 2 +-
lib/PublicInbox/WwwStream.pm | 4 ++--
7 files changed, 46 insertions(+), 40 deletions(-)
diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm
index eb0046ce..1f11acb8 100644
--- a/lib/PublicInbox/GzipFilter.pm
+++ b/lib/PublicInbox/GzipFilter.pm
@@ -127,15 +127,21 @@ sub write {
http_out($_[0])->write(translate($_[0], $_[1]));
}
+sub zadd {
+ my $self = shift;
+ $self->{pbuf} .= $_ for @_; # perl internal pad memory use here
+}
+
# similar to ->translate; use this when we're sure we know we have
# more data to buffer after this
sub zmore {
my $self = shift; # $_[1] => input
http_out($self);
- my $err;
+ my $x;
+ defined($x = delete($self->{pbuf})) and unshift(@_, $x);
for (@_) {
- $err = $self->{gz}->deflate($_, $self->{zbuf});
- die "gzip->deflate: $err" if $err != Z_OK;
+ ($x = $self->{gz}->deflate($_, $self->{zbuf})) == Z_OK or
+ die "gzip->deflate: $x";
}
undef;
}
@@ -145,14 +151,14 @@ sub zflush ($;@) {
my $self = shift; # $_[1..Inf] => final input (optional)
my $zbuf = delete $self->{zbuf};
my $gz = delete $self->{gz};
- my $err;
+ my $x;
+ defined($x = delete($self->{pbuf})) and unshift(@_, $x);
for (@_) { # it's a bug iff $gz is undef if @_ isn't empty, here:
- $err = $gz->deflate($_, $zbuf);
- die "gzip->deflate: $err" if $err != Z_OK;
+ ($x = $gz->deflate($_, $zbuf)) == Z_OK or
+ die "gzip->deflate: $x";
}
$gz // return ''; # not a bug, recursing on DS->write failure
- $err = $gz->flush($zbuf);
- die "gzip->flush: $err" if $err != Z_OK;
+ ($x = $gz->flush($zbuf)) == Z_OK or die "gzip->flush: $x";
$zbuf;
}
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 2ef8ff2b..cfe34d9c 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -20,7 +20,7 @@ sub getline {
my $ibx = $ctx->{ibx};
my $eml = delete($ctx->{eml}) // $ibx->smsg_eml($smsg) // return;
my $n = $ctx->{smsg} = $ibx->over->next_by_mid(@{$ctx->{next_arg}});
- $ctx->zmore(msg_hdr($ctx, $eml));
+ $ctx->zadd(msg_hdr($ctx, $eml));
if ($n) {
$ctx->translate(msg_body($eml));
} else { # last message
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index e0404e5f..b18947ee 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -331,7 +331,7 @@ sub mset_thread {
# callback for PublicInbox::WwwStream::getline
sub mset_thread_i {
my ($ctx, $eml) = @_;
- $ctx->zmore($ctx->html_top) if exists $ctx->{-html_tip};
+ $ctx->zadd($ctx->html_top) if exists $ctx->{-html_tip};
$eml and return PublicInbox::View::eml_entry($ctx, $eml);
my $smsg = shift @{$ctx->{msgs}} or
$ctx->zmore(${delete($ctx->{skel})});
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 630f1e42..85dc3bd8 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -40,10 +40,10 @@ sub msg_page_i {
"../${\mid_href($smsg->{mid})}/" : '';
if (_msg_page_prepare($eml, $ctx)) {
$eml->each_part(\&add_text_body, $ctx, 1);
- $ctx->zmore('</pre><hr>');
+ $ctx->zadd('</pre><hr>');
}
html_footer($ctx, $ctx->{first_hdr}) if !$ctx->{smsg};
- \''; # XXX TODO cleanup
+ ''; # XXX TODO cleanup
} else { # called by WwwStream::async_next or getline
$ctx->{smsg}; # may be undef
}
@@ -58,7 +58,7 @@ sub no_over_html ($) {
PublicInbox::WwwStream::init($ctx);
if (_msg_page_prepare($eml, $ctx)) { # sets {-title_html}
$eml->each_part(\&add_text_body, $ctx, 1);
- $ctx->zmore('</pre><hr>');
+ $ctx->zadd('</pre><hr>');
}
html_footer($ctx, $eml);
$ctx->html_done;
@@ -245,7 +245,7 @@ sub eml_entry {
# scan through all parts, looking for displayable text
$ctx->{mhref} = $mhref;
$ctx->{changed_href} = "#e$id"; # for diffstat "files? changed,"
- $ctx->zmore($rv); # XXX $rv is small, reuse below
+ $ctx->zadd($rv); # XXX $rv is small, reuse below
$eml->each_part(\&add_text_body, $ctx, 1); # expensive
# add the footer
@@ -386,7 +386,7 @@ sub pre_thread { # walk_thread callback
sub thread_eml_entry {
my ($ctx, $eml) = @_;
my ($beg, $end) = thread_adj_level($ctx, $ctx->{level});
- $ctx->zmore($beg.'<pre>');
+ $ctx->zadd($beg.'<pre>');
eml_entry($ctx, $eml) . '</pre>' . $end;
}
@@ -414,15 +414,15 @@ sub stream_thread_i { # PublicInbox::WwwStream::getline callback
if (!$ghost_ok) { # first non-ghost
$ctx->{-title_html} =
ascii_html($smsg->{subject});
- $ctx->zmore($ctx->html_top);
+ $ctx->zadd($ctx->html_top);
}
return $smsg;
}
# buffer the ghost entry and loop
- $ctx->zmore(ghost_index_entry($ctx, $lvl, $smsg));
+ $ctx->zadd(ghost_index_entry($ctx, $lvl, $smsg));
} else { # all done
- $ctx->zmore(join('', thread_adj_level($ctx, 0)));
- $ctx->zmore(${delete($ctx->{skel})});
+ $ctx->zadd(join('', thread_adj_level($ctx, 0)));
+ $ctx->zadd(${delete($ctx->{skel})});
return;
}
}
@@ -491,7 +491,7 @@ sub thread_html_i { # PublicInbox::WwwStream::getline callback
my $smsg = $ctx->{smsg};
if (exists $ctx->{-html_tip}) {
$ctx->{-title_html} = ascii_html($smsg->{subject});
- $ctx->zmore($ctx->html_top);
+ $ctx->zadd($ctx->html_top);
}
return eml_entry($ctx, $eml);
} else {
@@ -499,7 +499,7 @@ sub thread_html_i { # PublicInbox::WwwStream::getline callback
return $smsg if exists($smsg->{blob});
}
my $skel = delete($ctx->{skel}) or return; # all done
- $ctx->zmore($$skel);
+ $ctx->zadd($$skel);
undef;
}
}
@@ -560,7 +560,7 @@ sub add_text_body { # callback for each_part
my $ct = $part->content_type || 'text/plain';
my $fn = $part->filename;
my ($s, $err) = msg_part_text($part, $ct);
- $s // return $ctx->zmore(attach_link($ctx, $ct, $p, $fn) // '');
+ $s // return $ctx->zadd(attach_link($ctx, $ct, $p, $fn) // '');
my $buf = $part->{is_submsg} ? submsg_hdr($ctx, $part)."\n" : '';
# makes no difference to browsers, and don't screw up filename
@@ -612,18 +612,18 @@ sub add_text_body { # callback for each_part
$buf .= attach_link($ctx, $ct, $p, $fn, $err) . "\n";
}
delete $part->{bdy}; # save memory
- $ctx->zmore($buf);
+ $ctx->zadd($buf);
undef $buf;
for my $cur (@sections) { # $cur may be huge
if ($cur =~ /\A>/) {
# we use a <span> here to allow users to specify
# their own color for quoted text
- $ctx->zmore(qq(<span\nclass="q">),
+ $ctx->zadd(qq(<span\nclass="q">),
$l->to_html($cur), '</span>');
} elsif ($diff) {
flush_diff($ctx, \$cur);
} else { # regular lines, OK
- $ctx->zmore($l->to_html($cur));
+ $ctx->zadd($l->to_html($cur));
}
undef $cur; # free memory
}
@@ -685,10 +685,10 @@ sub _msg_page_prepare {
$hbuf .= qq[Message-ID: <$x> (<a href="raw">raw</a>)\n];
}
if (!$nr) { # first (and only) message, common case
- $ctx->zmore($ctx->html_top, $hbuf);
+ $ctx->zadd($ctx->html_top, $hbuf);
} else {
delete $ctx->{-title_html};
- $ctx->zmore($ctx->{-html_tip}, $hbuf);
+ $ctx->zadd($ctx->{-html_tip}, $hbuf);
}
$ctx->{-linkify} //= PublicInbox::Linkify->new;
$hbuf = '';
@@ -699,7 +699,7 @@ sub _msg_page_prepare {
$hbuf .= "$h: $_\n" for ($eml->header_raw($h));
}
$ctx->{-linkify}->linkify_mids('..', \$hbuf, 1); # escapes HTML
- $ctx->zmore($hbuf);
+ $ctx->zadd($hbuf);
$hbuf = '';
}
my @irt = $eml->header_raw('In-Reply-To');
@@ -717,7 +717,7 @@ sub _msg_page_prepare {
$hbuf .= 'References: <'.join(">\n\t<", @$refs).">\n" if @$refs;
}
$ctx->{-linkify}->linkify_mids('..', \$hbuf); # escapes HTML
- $ctx->zmore($hbuf .= "\n");
+ $ctx->zadd($hbuf .= "\n");
1;
}
@@ -837,7 +837,7 @@ EOF
$foot .= qq(<a\nhref="#R">reply</a>);
# $skel may be big for big threads, don't append it to $foot
$skel .= '</pre>' . ($related // '');
- $ctx->zmore($foot, $skel .= msg_reply($ctx, $hdr));
+ $ctx->zadd($foot, $skel .= msg_reply($ctx, $hdr));
}
sub ghost_parent {
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index 36601910..95b615dc 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -156,7 +156,7 @@ sub diff_header ($$$) {
warn "BUG? <$$x> had no ^index line";
}
$$x =~ s!^diff --git!anchor1($ctx, $pb) // 'diff --git'!ems;
- $ctx->zmore(qq(<span\nclass="head">$$x</span>));
+ $ctx->zadd(qq(<span\nclass="head">$$x</span>));
$dctx;
}
@@ -180,9 +180,9 @@ sub diff_before_or_after ($$) {
$$x .= qq(<a href="$ch">changed</a>,);
$$x .= ascii_html(pop @x); # $4: insertions/deletions
# notes, commit message, etc
- $ctx->zmore($$x .= $lnk->to_html(pop @x));
+ $ctx->zadd($$x .= $lnk->to_html(pop @x));
} else {
- $ctx->zmore($ctx->{-linkify}->to_html($$x));
+ $ctx->zadd($ctx->{-linkify}->to_html($$x));
}
}
@@ -220,23 +220,23 @@ sub flush_diff ($$) {
if (!defined($dctx)) {
$after .= $s;
} elsif ($s =~ s/\A@@ (\S+) (\S+) @@//) {
- $ctx->zmore(qq(<span\nclass="hunk">) .
+ $ctx->zadd(qq(<span\nclass="hunk">) .
diff_hunk($dctx, $1, $2) .
$lnk->to_html($s) .
'</span>');
} elsif ($s =~ /\A\+/) { # $s may be huge
- $ctx->zmore(qq(<span\nclass="add">),
+ $ctx->zadd(qq(<span\nclass="add">),
$lnk->to_html($s),
'</span>');
} elsif ($s =~ /\A-- $/sm) { # email sig starts
$dctx = undef;
$after .= $s;
} elsif ($s =~ /\A-/) { # $s may be huge
- $ctx->zmore(qq(<span\nclass="del">),
+ $ctx->zadd(qq(<span\nclass="del">),
$lnk->to_html($s),
'</span>');
} else { # $s may be huge
- $ctx->zmore($lnk->to_html($s));
+ $ctx->zadd($lnk->to_html($s));
}
}
diff_before_or_after($ctx, \$after) if !$dctx;
diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm
index cdfbf393..1c7ae881 100644
--- a/lib/PublicInbox/WwwAtomStream.pm
+++ b/lib/PublicInbox/WwwAtomStream.pm
@@ -146,7 +146,7 @@ sub feed_entry {
my $name = ascii_html(join(', ', PublicInbox::Address::names($from)));
$email = ascii_html($email // $ctx->{ibx}->{-primary_address});
- $ctx->zmore(
+ $ctx->zadd(
(delete($ctx->{emit_header}) ? atom_header($ctx, $title) : '').
"<entry><author><name>$name</name><email>$email</email>" .
"</author>$title$updated" .
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index c23668a4..2a318e5e 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -182,7 +182,7 @@ sub html_oneshot ($$;@) {
bless $ctx, __PACKAGE__;
$ctx->{gz} = PublicInbox::GzipFilter::gz_or_noop($res_hdr, $ctx->{env});
$ctx->{base_url} // do {
- $ctx->zmore(html_top($ctx));
+ $ctx->zadd(html_top($ctx));
$ctx->{base_url} = base_url($ctx);
};
my $bdy = $ctx->zflush(@_[2..$#_], _html_end($ctx));
@@ -216,7 +216,7 @@ sub html_init {
my $h = $ctx->{-res_hdr} = ['Content-Type', 'text/html; charset=UTF-8'];
$ctx->{gz} = PublicInbox::GzipFilter::gz_or_noop($h, $ctx->{env});
bless $ctx, __PACKAGE__;
- $ctx->zmore(html_top($ctx));
+ $ctx->zadd(html_top($ctx));
}
1;
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 28/38] www: use PerlIO::scalar (zfh) for buffering
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (26 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 27/38] www: switch to zadd for the majority of buffering Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 29/38] viewdiff: diff_before_or_after: avoid extra capture Eric Wong
` (9 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
Calling Compress::Raw::Zlib::deflate is fairly expensive.
Relying on the `.=' (concat) operator inside ->zadd operator is
faster, but the method dispatch overhead is noticeable compared
to the original code where we had bare `.=' littered throughout.
Fortunately, `print' and `say' with the PerlIO::scalar IO layer
appears to offer better performance without high method dispatch
overhead. This doesn't allow us to save as much memory as I
originally hoped, but does allow us to rely less on concat
operators in other places and just pass a list of args to
`print' and `say' as a appropriate.
This does reduce scratchpad use, however, allowing for large
memory savings, and we still ->deflate every single $eml.
---
Documentation/mknews.perl | 3 +-
lib/PublicInbox/GzipFilter.pm | 41 ++++++++++---------
lib/PublicInbox/Mbox.pm | 2 +-
lib/PublicInbox/SearchView.pm | 4 +-
lib/PublicInbox/View.pm | 58 +++++++++++++--------------
lib/PublicInbox/ViewDiff.pm | 69 ++++++++++++++++----------------
lib/PublicInbox/WwwAtomStream.pm | 8 ++--
lib/PublicInbox/WwwStream.pm | 7 ++--
8 files changed, 96 insertions(+), 96 deletions(-)
diff --git a/Documentation/mknews.perl b/Documentation/mknews.perl
index 1936cea7..68866f44 100755
--- a/Documentation/mknews.perl
+++ b/Documentation/mknews.perl
@@ -1,5 +1,5 @@
#!/usr/bin/perl -w
-# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Generates NEWS, NEWS.atom, and NEWS.html files using release emails
# this uses unstable internal APIs of public-inbox, and this script
@@ -46,6 +46,7 @@ if ($dst eq 'NEWS') {
ibx => $ibx,
-upfx => "$base_url/",
-hr => 1,
+ zfh => $out,
};
if ($dst eq 'NEWS.html') {
html_start($out, $ctx);
diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm
index 1f11acb8..848370ce 100644
--- a/lib/PublicInbox/GzipFilter.pm
+++ b/lib/PublicInbox/GzipFilter.pm
@@ -127,38 +127,39 @@ sub write {
http_out($_[0])->write(translate($_[0], $_[1]));
}
-sub zadd {
- my $self = shift;
- $self->{pbuf} .= $_ for @_; # perl internal pad memory use here
+sub zfh {
+ $_[0]->{zfh} // do {
+ open($_[0]->{zfh}, '>>', \($_[0]->{pbuf} //= '')) or
+ die "open: $!";
+ $_[0]->{zfh}
+ };
}
# similar to ->translate; use this when we're sure we know we have
# more data to buffer after this
sub zmore {
- my $self = shift; # $_[1] => input
+ my $self = shift;
+ my $zfh = delete $self->{zfh};
+ if (@_ > 1 || $zfh) {
+ print { $zfh // zfh($self) } @_;
+ @_ = (delete $self->{pbuf});
+ delete $self->{zfh};
+ };
http_out($self);
- my $x;
- defined($x = delete($self->{pbuf})) and unshift(@_, $x);
- for (@_) {
- ($x = $self->{gz}->deflate($_, $self->{zbuf})) == Z_OK or
- die "gzip->deflate: $x";
- }
- undef;
+ my $err;
+ ($err = $self->{gz}->deflate($_[0], $self->{zbuf})) == Z_OK or
+ die "gzip->deflate: $err";
}
# flushes and returns the final bit of gzipped data
sub zflush ($;@) {
my $self = shift; # $_[1..Inf] => final input (optional)
+ zmore($self, @_) if scalar(@_) || $self->{zfh};
+ # not a bug, recursing on DS->write failure
+ my $gz = delete $self->{gz} // return '';
+ my $err;
my $zbuf = delete $self->{zbuf};
- my $gz = delete $self->{gz};
- my $x;
- defined($x = delete($self->{pbuf})) and unshift(@_, $x);
- for (@_) { # it's a bug iff $gz is undef if @_ isn't empty, here:
- ($x = $gz->deflate($_, $zbuf)) == Z_OK or
- die "gzip->deflate: $x";
- }
- $gz // return ''; # not a bug, recursing on DS->write failure
- ($x = $gz->flush($zbuf)) == Z_OK or die "gzip->flush: $x";
+ ($err = $gz->flush($zbuf)) == Z_OK or die "gzip->flush: $err";
$zbuf;
}
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index cfe34d9c..2ef8ff2b 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -20,7 +20,7 @@ sub getline {
my $ibx = $ctx->{ibx};
my $eml = delete($ctx->{eml}) // $ibx->smsg_eml($smsg) // return;
my $n = $ctx->{smsg} = $ibx->over->next_by_mid(@{$ctx->{next_arg}});
- $ctx->zadd(msg_hdr($ctx, $eml));
+ $ctx->zmore(msg_hdr($ctx, $eml));
if ($n) {
$ctx->translate(msg_body($eml));
} else { # last message
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index b18947ee..8932c73d 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -331,10 +331,10 @@ sub mset_thread {
# callback for PublicInbox::WwwStream::getline
sub mset_thread_i {
my ($ctx, $eml) = @_;
- $ctx->zadd($ctx->html_top) if exists $ctx->{-html_tip};
+ print { $ctx->zfh } $ctx->html_top if exists $ctx->{-html_tip};
$eml and return PublicInbox::View::eml_entry($ctx, $eml);
my $smsg = shift @{$ctx->{msgs}} or
- $ctx->zmore(${delete($ctx->{skel})});
+ print { $ctx->zfh } ${delete($ctx->{skel})};
$smsg;
}
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 85dc3bd8..0753c06e 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -40,7 +40,7 @@ sub msg_page_i {
"../${\mid_href($smsg->{mid})}/" : '';
if (_msg_page_prepare($eml, $ctx)) {
$eml->each_part(\&add_text_body, $ctx, 1);
- $ctx->zadd('</pre><hr>');
+ print { $ctx->{zfh} } '</pre><hr>';
}
html_footer($ctx, $ctx->{first_hdr}) if !$ctx->{smsg};
''; # XXX TODO cleanup
@@ -58,7 +58,7 @@ sub no_over_html ($) {
PublicInbox::WwwStream::init($ctx);
if (_msg_page_prepare($eml, $ctx)) { # sets {-title_html}
$eml->each_part(\&add_text_body, $ctx, 1);
- $ctx->zadd('</pre><hr>');
+ print { $ctx->{zfh} } '</pre><hr>';
}
html_footer($ctx, $eml);
$ctx->html_done;
@@ -240,12 +240,11 @@ sub eml_entry {
my $html = ascii_html($irt);
$rv .= qq(In-Reply-To: <<a\nhref="$href">$html</a>>\n)
}
- $rv .= "\n";
+ say { $ctx->zfh } $rv;
# scan through all parts, looking for displayable text
$ctx->{mhref} = $mhref;
$ctx->{changed_href} = "#e$id"; # for diffstat "files? changed,"
- $ctx->zadd($rv); # XXX $rv is small, reuse below
$eml->each_part(\&add_text_body, $ctx, 1); # expensive
# add the footer
@@ -386,8 +385,8 @@ sub pre_thread { # walk_thread callback
sub thread_eml_entry {
my ($ctx, $eml) = @_;
my ($beg, $end) = thread_adj_level($ctx, $ctx->{level});
- $ctx->zadd($beg.'<pre>');
- eml_entry($ctx, $eml) . '</pre>' . $end;
+ print { $ctx->zfh } $beg, '<pre>';
+ print { $ctx->{zfh} } eml_entry($ctx, $eml), '</pre>', $end;
}
sub next_in_queue ($$) {
@@ -414,15 +413,15 @@ sub stream_thread_i { # PublicInbox::WwwStream::getline callback
if (!$ghost_ok) { # first non-ghost
$ctx->{-title_html} =
ascii_html($smsg->{subject});
- $ctx->zadd($ctx->html_top);
+ print { $ctx->zfh } $ctx->html_top;
}
return $smsg;
}
# buffer the ghost entry and loop
- $ctx->zadd(ghost_index_entry($ctx, $lvl, $smsg));
+ print { $ctx->zfh } ghost_index_entry($ctx, $lvl, $smsg)
} else { # all done
- $ctx->zadd(join('', thread_adj_level($ctx, 0)));
- $ctx->zadd(${delete($ctx->{skel})});
+ print { $ctx->zfh } thread_adj_level($ctx, 0),
+ ${delete($ctx->{skel})};
return;
}
}
@@ -491,7 +490,7 @@ sub thread_html_i { # PublicInbox::WwwStream::getline callback
my $smsg = $ctx->{smsg};
if (exists $ctx->{-html_tip}) {
$ctx->{-title_html} = ascii_html($smsg->{subject});
- $ctx->zadd($ctx->html_top);
+ print { $ctx->zfh } $ctx->html_top;
}
return eml_entry($ctx, $eml);
} else {
@@ -499,7 +498,7 @@ sub thread_html_i { # PublicInbox::WwwStream::getline callback
return $smsg if exists($smsg->{blob});
}
my $skel = delete($ctx->{skel}) or return; # all done
- $ctx->zadd($$skel);
+ print { $ctx->zfh } $$skel;
undef;
}
}
@@ -560,8 +559,9 @@ sub add_text_body { # callback for each_part
my $ct = $part->content_type || 'text/plain';
my $fn = $part->filename;
my ($s, $err) = msg_part_text($part, $ct);
- $s // return $ctx->zadd(attach_link($ctx, $ct, $p, $fn) // '');
- my $buf = $part->{is_submsg} ? submsg_hdr($ctx, $part)."\n" : '';
+ my $zfh = $ctx->zfh;
+ $s // return print $zfh (attach_link($ctx, $ct, $p, $fn) // '');
+ say $zfh submsg_hdr($ctx, $part) if $part->{is_submsg};
# makes no difference to browsers, and don't screw up filename
# link generation in diffs with the extra '%0D'
@@ -609,21 +609,19 @@ sub add_text_body { # callback for each_part
undef $s; # free memory
if (defined($fn) || ($depth > 0 && !$part->{is_submsg}) || $err) {
# badly-encoded message with $err? tell the world about it!
- $buf .= attach_link($ctx, $ct, $p, $fn, $err) . "\n";
+ say $zfh attach_link($ctx, $ct, $p, $fn, $err);
}
delete $part->{bdy}; # save memory
- $ctx->zadd($buf);
- undef $buf;
for my $cur (@sections) { # $cur may be huge
if ($cur =~ /\A>/) {
# we use a <span> here to allow users to specify
# their own color for quoted text
- $ctx->zadd(qq(<span\nclass="q">),
- $l->to_html($cur), '</span>');
+ print $zfh qq(<span\nclass="q">),
+ $l->to_html($cur), '</span>';
} elsif ($diff) {
flush_diff($ctx, \$cur);
} else { # regular lines, OK
- $ctx->zadd($l->to_html($cur));
+ print $zfh $l->to_html($cur);
}
undef $cur; # free memory
}
@@ -685,10 +683,10 @@ sub _msg_page_prepare {
$hbuf .= qq[Message-ID: <$x> (<a href="raw">raw</a>)\n];
}
if (!$nr) { # first (and only) message, common case
- $ctx->zadd($ctx->html_top, $hbuf);
+ print { $ctx->zfh } $ctx->html_top, $hbuf;
} else {
delete $ctx->{-title_html};
- $ctx->zadd($ctx->{-html_tip}, $hbuf);
+ print { $ctx->zfh } $ctx->{-html_tip}, $hbuf;
}
$ctx->{-linkify} //= PublicInbox::Linkify->new;
$hbuf = '';
@@ -699,7 +697,7 @@ sub _msg_page_prepare {
$hbuf .= "$h: $_\n" for ($eml->header_raw($h));
}
$ctx->{-linkify}->linkify_mids('..', \$hbuf, 1); # escapes HTML
- $ctx->zadd($hbuf);
+ print { $ctx->{zfh} } $hbuf;
$hbuf = '';
}
my @irt = $eml->header_raw('In-Reply-To');
@@ -717,7 +715,7 @@ sub _msg_page_prepare {
$hbuf .= 'References: <'.join(">\n\t<", @$refs).">\n" if @$refs;
}
$ctx->{-linkify}->linkify_mids('..', \$hbuf); # escapes HTML
- $ctx->zadd($hbuf .= "\n");
+ say { $ctx->{zfh} } $hbuf;
1;
}
@@ -771,7 +769,7 @@ sub thread_skel ($$$) {
sub html_footer {
my ($ctx, $hdr) = @_;
my $upfx = '../';
- my ($related, $skel);
+ my (@related, $skel);
my $foot = '<pre>';
my $qry = delete $ctx->{-qry};
if ($qry && $ctx->{ibx}->isrch) {
@@ -786,7 +784,7 @@ sub html_footer {
$q = wrap('', '', $q);
my $rows = ($q =~ tr/\n/\n/) + 1;
$q = ascii_html($q);
- $related = <<EOM;
+ $related[0] = <<EOM;
<form id=related
action=$upfx
><pre>find likely ancestor, descendant, or conflicting patches for <a
@@ -799,7 +797,7 @@ EOM
if ($ctx->{ibx}->over) {
my $t = ts2str($ctx->{-t_max});
my $t_fmt = fmt_ts($ctx->{-t_max});
- my $fallback = $related ? "\t" : "<a id=related>\t</a>";
+ my $fallback = @related ? "\t" : "<a id=related>\t</a>";
$skel = <<EOF;
${fallback}other threads:[<a
href="$upfx?t=$t">~$t_fmt UTC</a>|<a
@@ -834,10 +832,10 @@ EOF
} else { # unindexed inboxes w/o over
$skel = qq( <a\nhref="$upfx">latest</a>);
}
- $foot .= qq(<a\nhref="#R">reply</a>);
# $skel may be big for big threads, don't append it to $foot
- $skel .= '</pre>' . ($related // '');
- $ctx->zadd($foot, $skel .= msg_reply($ctx, $hdr));
+ print { $ctx->zfh } $foot, qq(<a\nhref="#R">reply</a>),
+ $skel, '</pre>', @related,
+ msg_reply($ctx, $hdr);
}
sub ghost_parent {
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index 95b615dc..5d23881b 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -67,8 +67,8 @@ sub oid ($$$) {
}
# returns true if diffstat anchor written, false otherwise
-sub anchor0 ($$$$) {
- my ($dst, $ctx, $fn, $rest) = @_;
+sub anchor0 ($$$) {
+ my ($ctx, $fn, $rest) = @_;
my $orig = $fn;
@@ -84,15 +84,12 @@ sub anchor0 ($$$$) {
# long filenames will require us to check in anchor1()
push(@{$ctx->{-long_path}}, $fn) if $fn =~ s!\A\.\.\./?!!;
- if (defined(my $attr = to_attr($ctx->{-apfx}.$fn))) {
- $ctx->{-anchors}->{$attr} = 1;
- my $spaces = ($orig =~ s/( +)\z//) ? $1 : '';
- $$dst .= " <a\nid=i$attr\nhref=#$attr>" .
- ascii_html($orig) . '</a>' . $spaces .
+ my $attr = to_attr($ctx->{-apfx}.$fn) // return;
+ $ctx->{-anchors}->{$attr} = 1;
+ my $spaces = ($orig =~ s/( +)\z//) ? $1 : '';
+ print { $ctx->{zfh} } " <a\nid=i$attr\nhref=#$attr>",
+ ascii_html($orig), '</a>', $spaces,
$ctx->{-linkify}->to_html($rest);
- return 1;
- }
- undef;
}
# returns "diff --git" anchor destination, undef otherwise
@@ -156,33 +153,34 @@ sub diff_header ($$$) {
warn "BUG? <$$x> had no ^index line";
}
$$x =~ s!^diff --git!anchor1($ctx, $pb) // 'diff --git'!ems;
- $ctx->zadd(qq(<span\nclass="head">$$x</span>));
+ print { $ctx->{zfh} } qq(<span\nclass="head">), $$x, '</span>';
$dctx;
}
sub diff_before_or_after ($$) {
my ($ctx, $x) = @_;
- if (exists $ctx->{-anchors} && $$x =~ /\A(.*?) # likely "---\n"
+ if (exists $ctx->{-anchors} && $$x =~ /\A(.*?) # likely "---\n" # \$1
# diffstat lines:
((?:^\x20(?:[^\n]+?)(?:\x20+\|\x20[^\n]*\n))+)
(\x20[0-9]+\x20files?\x20)changed,([^\n]+\n)
(.*?)\z/msx) { # notes, commit message, etc
my @x = ($5, $4, $3, $2, $1);
+ undef $$x;
my $lnk = $ctx->{-linkify};
- $$x = $lnk->to_html(pop @x); # uninteresting prefix
- for my $l (split(/^/m, pop(@x))) { # per-file diffstat lines
+ my $zfh = $ctx->{zfh};
+ print $zfh $lnk->to_html(pop @x); # $1 uninteresting prefix
+ for my $l (split(/^/m, pop(@x))) { # $2 per-file stat lines
$l =~ /^ (.+)( +\| .*\z)/s and
- anchor0($x, $ctx, $1, $2) and next;
- $$x .= $lnk->to_html($l);
+ anchor0($ctx, $1, $2) and next;
+ print $zfh $lnk->to_html($l);
}
- $$x .= pop @x; # $3 /^ \d+ files? /
my $ch = $ctx->{changed_href} // '#related';
- $$x .= qq(<a href="$ch">changed</a>,);
- $$x .= ascii_html(pop @x); # $4: insertions/deletions
- # notes, commit message, etc
- $ctx->zadd($$x .= $lnk->to_html(pop @x));
+ print $zfh pop(@x), # $3 /^ \d+ files? /
+ qq(<a href="$ch">changed</a>,),
+ ascii_html(pop @x), # insertions/deletions
+ $lnk->to_html(@x); # notes, commit message, etc
} else {
- $ctx->zadd($ctx->{-linkify}->to_html($$x));
+ print { $ctx->{zfh} } $ctx->{-linkify}->to_html($$x);
}
}
@@ -195,6 +193,7 @@ sub flush_diff ($$) {
my $lnk = $ctx->{-linkify};
my $dctx; # {}, keys: Q, oid_a, oid_b
+ my $zfh = $ctx->zfh;
while (defined(my $x = shift @top)) {
if (scalar(@top) >= 4 &&
@@ -202,7 +201,7 @@ sub flush_diff ($$) {
$top[0] =~ $IS_OID) {
$dctx = diff_header(\$x, $ctx, \@top);
} elsif ($dctx) {
- my $after = '';
+ open(my $afh, '>>', \(my $after='')) or die "open: $!";
# Quiet "Complex regular subexpression recursion limit"
# warning. Perl will truncate matches upon hitting
@@ -218,25 +217,25 @@ sub flush_diff ($$) {
(?:(?:^-[^\n]*\n)+)|
(?:^@@ [^\n]+\n))/xsm, $x)) {
if (!defined($dctx)) {
- $after .= $s;
+ print $afh $s;
} elsif ($s =~ s/\A@@ (\S+) (\S+) @@//) {
- $ctx->zadd(qq(<span\nclass="hunk">) .
- diff_hunk($dctx, $1, $2) .
- $lnk->to_html($s) .
- '</span>');
+ print $zfh qq(<span\nclass="hunk">),
+ diff_hunk($dctx, $1, $2),
+ $lnk->to_html($s),
+ '</span>';
} elsif ($s =~ /\A\+/) { # $s may be huge
- $ctx->zadd(qq(<span\nclass="add">),
+ print $zfh qq(<span\nclass="add">),
$lnk->to_html($s),
- '</span>');
+ '</span>';
} elsif ($s =~ /\A-- $/sm) { # email sig starts
$dctx = undef;
- $after .= $s;
+ print $afh $s;
} elsif ($s =~ /\A-/) { # $s may be huge
- $ctx->zadd(qq(<span\nclass="del">),
- $lnk->to_html($s),
- '</span>');
+ print $zfh qq(<span\nclass="del">),
+ $lnk->to_html($s),
+ '</span>';
} else { # $s may be huge
- $ctx->zadd($lnk->to_html($s));
+ print $zfh $lnk->to_html($s);
}
}
diff_before_or_after($ctx, \$after) if !$dctx;
diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm
index 1c7ae881..33da3244 100644
--- a/lib/PublicInbox/WwwAtomStream.pm
+++ b/lib/PublicInbox/WwwAtomStream.pm
@@ -146,15 +146,15 @@ sub feed_entry {
my $name = ascii_html(join(', ', PublicInbox::Address::names($from)));
$email = ascii_html($email // $ctx->{ibx}->{-primary_address});
- $ctx->zadd(
- (delete($ctx->{emit_header}) ? atom_header($ctx, $title) : '').
+ print { $ctx->zfh }
+ (delete($ctx->{emit_header}) ? atom_header($ctx, $title) : ''),
"<entry><author><name>$name</name><email>$email</email>" .
"</author>$title$updated" .
- qq(<link\nhref="$href"/>).
+ qq(<link\nhref="$href"/>) .
"<id>$uuid</id>$irt" .
qq{<content\ntype="xhtml">} .
qq{<div\nxmlns="http://www.w3.org/1999/xhtml">} .
- qq(<pre\nstyle="white-space:pre-wrap">));
+ qq(<pre\nstyle="white-space:pre-wrap">);
$ctx->{mhref} = $href;
$ctx->{changed_href} = "${href}#related";
$eml->each_part(\&PublicInbox::View::add_text_body, $ctx, 1);
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index 2a318e5e..77b6f9c2 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -181,11 +181,12 @@ sub html_oneshot ($$;@) {
'Content-Length' => undef ];
bless $ctx, __PACKAGE__;
$ctx->{gz} = PublicInbox::GzipFilter::gz_or_noop($res_hdr, $ctx->{env});
+ my @top;
$ctx->{base_url} // do {
- $ctx->zadd(html_top($ctx));
+ @top = html_top($ctx);
$ctx->{base_url} = base_url($ctx);
};
- my $bdy = $ctx->zflush(@_[2..$#_], _html_end($ctx));
+ my $bdy = $ctx->zflush(@top, @_[2..$#_], _html_end($ctx));
$res_hdr->[3] = length($bdy);
[ $code, $res_hdr, [ $bdy ] ]
}
@@ -216,7 +217,7 @@ sub html_init {
my $h = $ctx->{-res_hdr} = ['Content-Type', 'text/html; charset=UTF-8'];
$ctx->{gz} = PublicInbox::GzipFilter::gz_or_noop($h, $ctx->{env});
bless $ctx, __PACKAGE__;
- $ctx->zadd(html_top($ctx));
+ print { $ctx->zfh } html_top($ctx);
}
1;
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 29/38] viewdiff: diff_before_or_after: avoid extra capture
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (27 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 28/38] www: use PerlIO::scalar (zfh) for buffering Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 30/38] viewdiff: diff_header: shorten function, slightly Eric Wong
` (8 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
/(.*?)\z/ will capture the "$X insertions(+), $Y deletions(-)"
bit anyways, along with whatever extra notes before the
/^diff --git / line. So just rely on /(.*?)\z/ and avoid
the special case before it.
---
lib/PublicInbox/ViewDiff.pm | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index 5d23881b..b9115669 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -162,9 +162,9 @@ sub diff_before_or_after ($$) {
if (exists $ctx->{-anchors} && $$x =~ /\A(.*?) # likely "---\n" # \$1
# diffstat lines:
((?:^\x20(?:[^\n]+?)(?:\x20+\|\x20[^\n]*\n))+)
- (\x20[0-9]+\x20files?\x20)changed,([^\n]+\n)
+ (\x20[0-9]+\x20files?\x20)changed,
(.*?)\z/msx) { # notes, commit message, etc
- my @x = ($5, $4, $3, $2, $1);
+ my @x = ($4, $3, $2, $1);
undef $$x;
my $lnk = $ctx->{-linkify};
my $zfh = $ctx->{zfh};
@@ -177,8 +177,8 @@ sub diff_before_or_after ($$) {
my $ch = $ctx->{changed_href} // '#related';
print $zfh pop(@x), # $3 /^ \d+ files? /
qq(<a href="$ch">changed</a>,),
- ascii_html(pop @x), # insertions/deletions
- $lnk->to_html(@x); # notes, commit message, etc
+ # insertions/deletions, notes, commit message, etc:
+ $lnk->to_html(@x);
} else {
print { $ctx->{zfh} } $ctx->{-linkify}->to_html($$x);
}
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 30/38] viewdiff: diff_header: shorten function, slightly
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (28 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 29/38] viewdiff: diff_before_or_after: avoid extra capture Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 31/38] www_static: switch to `print $zfh', and optimize Eric Wong
` (7 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
It makes for easier reading with gigantic fonts.
---
lib/PublicInbox/ViewDiff.pm | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index b9115669..fba3d76c 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -124,12 +124,8 @@ sub diff_header ($$$) {
$dctx->{Q} = '?b='.uri_escape_path($pb);
} else {
my @q;
- if ($pb ne '/dev/null') {
- push @q, 'b='.uri_escape_path($pb);
- }
- if ($pa ne '/dev/null') {
- push @q, 'a='.uri_escape_path($pa);
- }
+ push @q, 'b='.uri_escape_path($pb) if $pb ne '/dev/null';
+ push @q, 'a='.uri_escape_path($pa) if $pa ne '/dev/null';
$dctx->{Q} = '?'.join('&', @q);
}
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 31/38] www_static: switch to `print $zfh', and optimize
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (29 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 30/38] viewdiff: diff_header: shorten function, slightly Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 32/38] httpd/async: describe which ->write subs it can call Eric Wong
` (6 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
->zmore isn't cheap, and we can be smarter about how
we place newlines to avoid a `join' operation, now.
We can also drop some unused variables, here.
---
lib/PublicInbox/WwwStatic.pm | 32 +++++++++++++++-----------------
1 file changed, 15 insertions(+), 17 deletions(-)
diff --git a/lib/PublicInbox/WwwStatic.pm b/lib/PublicInbox/WwwStatic.pm
index eeb5e565..1c1a3d38 100644
--- a/lib/PublicInbox/WwwStatic.pm
+++ b/lib/PublicInbox/WwwStatic.pm
@@ -275,12 +275,11 @@ sub dir_response ($$$) {
my $path_info = $env->{PATH_INFO};
push @entries, '..' if $path_info ne '/';
for my $base (@entries) {
+ my @st = stat($fs_path . $base) or next; # unlikely
my $href = ascii_html(uri_escape_utf8($base));
my $name = ascii_html($base);
- my @st = stat($fs_path . $base) or next; # unlikely
- my ($gzipped, $uncompressed, $hsize);
- my $entry = '';
my $mtime = $st[9];
+ my ($entry, $hsize);
if (-d _) {
$href .= '/';
$name .= '/';
@@ -296,12 +295,12 @@ sub dir_response ($$$) {
next;
}
# 54 = 80 - (SP length(strftime(%Y-%m-%d %k:%M)) SP human_size)
- $hsize = sprintf('% 8s', $hsize);
my $pad = 54 - length($name);
$pad = 1 if $pad <= 0;
- $entry .= qq(<a\nhref="$href">$name</a>) . (' ' x $pad);
- $mtime = strftime('%Y-%m-%d %k:%M', gmtime($mtime));
- $entry .= $mtime . $hsize;
+ $entry = qq(\n<a\nhref="$href">$name</a>) .
+ (' ' x $pad) .
+ strftime('%Y-%m-%d %k:%M', gmtime($mtime)) .
+ sprintf('% 8s', $hsize);
}
# filter out '.gz' files as long as the mtime matches the
@@ -309,17 +308,16 @@ sub dir_response ($$$) {
delete(@other{keys %want_gz});
@entries = ((map { ${$dirs{$_}} } sort keys %dirs),
(map { ${$other{$_}} } sort keys %other));
-
my $path_info_html = ascii_html($path_info);
- my $h = [qw(Content-Type text/html Content-Length), undef];
- my $gzf = gzf_maybe($h, $env);
- $gzf->zmore("<html><head><title>Index of $path_info_html</title>" .
- ${$self->{style}} .
- "</head><body><pre>Index of $path_info_html</pre><hr><pre>\n");
- $gzf->zmore(join("\n", @entries));
- my $out = $gzf->zflush("</pre><hr></body></html>\n");
- $h->[3] = length($out);
- [ 200, $h, [ $out ] ]
+ my @h = qw(Content-Type text/html);
+ my $gzf = gzf_maybe(\@h, $env);
+ print { $gzf->zfh } '<html><head><title>Index of ', $path_info_html,
+ '</title>', ${$self->{style}}, '</head><body><pre>Index of ',
+ $path_info_html, '</pre><hr><pre>', @entries,
+ '</pre><hr></body></html>';
+ my $out = $gzf->zflush;
+ push @h, 'Content-Length', length($out);
+ [ 200, \@h, [ $out ] ]
}
sub call { # PSGI app endpoint
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 32/38] httpd/async: describe which ->write subs it can call
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (30 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 31/38] www_static: switch to `print $zfh', and optimize Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 33/38] translate: support multiple buffer args Eric Wong
` (5 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
I initially wanted to rename GzipFilter->write to
GzipFilter->writev to reflect the multi-argument nature of the
sub, and it wasn't worth the memory to maintain an alias.
---
lib/PublicInbox/HTTPD/Async.pm | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/lib/PublicInbox/HTTPD/Async.pm b/lib/PublicInbox/HTTPD/Async.pm
index 1651da88..cb76cfab 100644
--- a/lib/PublicInbox/HTTPD/Async.pm
+++ b/lib/PublicInbox/HTTPD/Async.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# XXX This is a totally unstable API for public-inbox internal use only
@@ -77,8 +77,11 @@ sub async_pass {
# will automatically close this ($self) object.
$http->{forward} = $self;
- # write anything we overread when we were reading headers
- $fh->write($$bref); # PublicInbox:HTTP::{chunked,identity}_wcb
+ # write anything we overread when we were reading headers.
+ # This is typically PublicInbox:HTTP::{chunked,identity}_wcb,
+ # but may be PublicInbox::GzipFilter::write. PSGI requires
+ # *_wcb methods respond to ->write (and ->close), not ->print
+ $fh->write($$bref);
# we're done with this, free this memory up ASAP since the
# calls after this may use much memory:
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 33/38] translate: support multiple buffer args
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (31 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 32/38] httpd/async: describe which ->write subs it can call Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 34/38] gzip_filter: write: use multi-arg translate Eric Wong
` (4 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
This will let us drop some calls to zmore in subsequent commits.
---
lib/PublicInbox/GzipFilter.pm | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm
index 848370ce..72ab5d50 100644
--- a/lib/PublicInbox/GzipFilter.pm
+++ b/lib/PublicInbox/GzipFilter.pm
@@ -94,15 +94,15 @@ sub gone { # what: search/over/mm
# for GetlineBody (via Qspawn) when NOT using $env->{'pi-httpd.async'}
# Also used for ->getline callbacks
-sub translate ($$) {
- my $self = $_[0]; # $_[1] => input
+sub translate {
+ my $self = shift; # $_[1] => input
# allocate the zlib context lazily here, instead of in ->new.
# Deflate contexts are memory-intensive and this object may
# be sitting in the Qspawn limiter queue for a while.
$self->{gz} //= gzip_or_die();
- if (defined $_[1]) { # my $buf = $_[1];
- zmore($self, $_[1]);
+ if (defined $_[0]) { # my $buf = $_[1];
+ zmore($self, @_);
length($self->{zbuf}) >= 8192 ? delete($self->{zbuf}) : '';
} else { # undef == EOF
zflush($self);
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 34/38] gzip_filter: write: use multi-arg translate
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (32 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 33/38] translate: support multiple buffer args Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 35/38] feed: new_html_i: switch from zmore to `print $zfh' Eric Wong
` (3 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
While we must name this function ->write for PSGI compatibility,
our own uses of it can make it operate more like writev(2)
or `print' in Perl.
---
lib/PublicInbox/GzipFilter.pm | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm
index 72ab5d50..bd72afff 100644
--- a/lib/PublicInbox/GzipFilter.pm
+++ b/lib/PublicInbox/GzipFilter.pm
@@ -124,7 +124,7 @@ sub http_out ($) {
sub write {
# my $ret = bytes::length($_[1]); # XXX does anybody care?
- http_out($_[0])->write(translate($_[0], $_[1]));
+ http_out($_[0])->write(translate(@_));
}
sub zfh {
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 35/38] feed: new_html_i: switch from zmore to `print $zfh'
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (33 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 34/38] gzip_filter: write: use multi-arg translate Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 36/38] mbox*: use multi-arg ->translate and ->write Eric Wong
` (2 subsequent siblings)
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
eml_entry will enable zfh (PerlIO::scalar) buffering, anyways,
so there's no point in calling ->zmore to compress small
strings. The use of zfh for the skeleton is debatable, but
probably of no consequence given html_footer will hit it,
anyways.
---
lib/PublicInbox/Feed.pm | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 56ca9886..affe0fb6 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -49,15 +49,15 @@ sub generate_html_index {
sub new_html_i {
my ($ctx, $eml) = @_;
- $ctx->zmore($ctx->html_top) if exists $ctx->{-html_tip};
+ print { $ctx->zfh } $ctx->html_top if exists $ctx->{-html_tip};
if ($eml) {
$ctx->{smsg}->populate($eml) if !$ctx->{ibx}->{over};
return PublicInbox::View::eml_entry($ctx, $eml);
}
my $smsg = shift @{$ctx->{msgs}} or
- $ctx->zmore(PublicInbox::View::pagination_footer(
- $ctx, './new.html'));
+ print { $ctx->zfh } PublicInbox::View::pagination_footer(
+ $ctx, './new.html');
$smsg;
}
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 36/38] mbox*: use multi-arg ->translate and ->write
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (34 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 35/38] feed: new_html_i: switch from zmore to `print $zfh' Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 37/38] www_listing: switch to `print $zfh' Eric Wong
2022-09-10 8:17 ` [PATCH 38/38] viewvcs: " Eric Wong
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
No need to make multiple method calls from here,
now that ->translate and GzipFilter->write both support
multiple args.
---
lib/PublicInbox/Mbox.pm | 11 ++++-------
lib/PublicInbox/MboxGz.pm | 3 +--
2 files changed, 5 insertions(+), 9 deletions(-)
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 2ef8ff2b..18db9d38 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -19,12 +19,10 @@ sub getline {
my $smsg = $ctx->{smsg} or return;
my $ibx = $ctx->{ibx};
my $eml = delete($ctx->{eml}) // $ibx->smsg_eml($smsg) // return;
- my $n = $ctx->{smsg} = $ibx->over->next_by_mid(@{$ctx->{next_arg}});
- $ctx->zmore(msg_hdr($ctx, $eml));
- if ($n) {
- $ctx->translate(msg_body($eml));
+ if (($ctx->{smsg} = $ibx->over->next_by_mid(@{$ctx->{next_arg}}))) {
+ $ctx->translate(msg_hdr($ctx, $eml), msg_body($eml));
} else { # last message
- $ctx->zflush(msg_body($eml));
+ $ctx->zflush(msg_hdr($ctx, $eml), msg_body($eml));
}
}
@@ -45,8 +43,7 @@ sub async_eml { # for async_blob_cb
# next message
$ctx->{smsg} = $ctx->{ibx}->over->next_by_mid(@{$ctx->{next_arg}});
local $ctx->{eml} = $eml; # for mbox_hdr
- $ctx->zmore(msg_hdr($ctx, $eml));
- $ctx->write(msg_body($eml));
+ $ctx->write(msg_hdr($ctx, $eml), msg_body($eml));
}
sub mbox_hdr ($) {
diff --git a/lib/PublicInbox/MboxGz.pm b/lib/PublicInbox/MboxGz.pm
index 56f5adcf..533d2ff1 100644
--- a/lib/PublicInbox/MboxGz.pm
+++ b/lib/PublicInbox/MboxGz.pm
@@ -37,8 +37,7 @@ sub getline {
my $cb = $self->{cb} or return;
while (my $smsg = $cb->($self)) {
my $eml = $self->{ibx}->smsg_eml($smsg) or next;
- $self->zmore(msg_hdr($self, $eml));
- return $self->translate(msg_body($eml));
+ return $self->translate(msg_hdr($self, $eml), msg_body($eml));
}
# signal that we're done and can return undef next call:
delete $self->{cb};
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 37/38] www_listing: switch to `print $zfh'
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (35 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 36/38] mbox*: use multi-arg ->translate and ->write Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
2022-09-10 8:17 ` [PATCH 38/38] viewvcs: " Eric Wong
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
Again, ->deflate (and thus ->zmore) calls are relatively
expensive compared to `print' ops using PerlIO::scalar
behind-the-scenes. While I can likely optimize the `join' away
here, too, that will happen in a future commit.
---
lib/PublicInbox/WwwListing.pm | 21 +++++++++++----------
1 file changed, 11 insertions(+), 10 deletions(-)
diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm
index 35abf050..72c940dd 100644
--- a/lib/PublicInbox/WwwListing.pm
+++ b/lib/PublicInbox/WwwListing.pm
@@ -208,27 +208,28 @@ sub psgi_triple {
my $h = [ 'Content-Type', 'text/html; charset=UTF-8',
'Content-Length', undef ];
my $gzf = gzf_maybe($h, $ctx->{env});
- $gzf->zmore('<html><head><title>public-inbox listing</title>' .
- $ctx->{www}->style('+/') .
- '</head><body>');
+ my $zfh = $gzf->zfh;
+ print $zfh '<html><head><title>public-inbox listing</title>',
+ $ctx->{www}->style('+/'),
+ '</head><body>';
my $code = 404;
if (my $list = delete $ctx->{-list}) {
my $mset = delete $ctx->{-mset};
$code = 200;
if ($mset) { # already sorted, so search bar:
- $gzf->zmore(mset_nav_top($ctx, $mset));
+ print $zfh mset_nav_top($ctx, $mset);
} else { # sort config dump by ->modified
@$list = map { $_->[1] }
sort { $b->[0] <=> $a->[0] } @$list;
}
- $gzf->zmore('<pre>', join("\n", @$list)); # big
- $gzf->zmore(mset_footer($ctx, $mset)) if $mset;
+ print $zfh '<pre>', join("\n", @$list); # big
+ print $zfh mset_footer($ctx, $mset) if $mset;
} elsif (my $mset = delete $ctx->{-mset}) {
- $gzf->zmore(mset_nav_top($ctx, $mset) .
- '<pre>no matching inboxes' .
- mset_footer($ctx, $mset));
+ print $zfh mset_nav_top($ctx, $mset),
+ '<pre>no matching inboxes',
+ mset_footer($ctx, $mset);
} else {
- $gzf->zmore('<pre>no inboxes, yet');
+ print $zfh '<pre>no inboxes, yet';
}
my $out = $gzf->zflush('</pre><hr><pre>'.
qq(This is a listing of public inboxes, see the `mirror' link of each inbox
^ permalink raw reply related [flat|nested] 39+ messages in thread
* [PATCH 38/38] viewvcs: switch to `print $zfh'
2022-09-10 8:16 [PATCH 00/38] www: reduce memory usage Eric Wong
` (36 preceding siblings ...)
2022-09-10 8:17 ` [PATCH 37/38] www_listing: switch to `print $zfh' Eric Wong
@ 2022-09-10 8:17 ` Eric Wong
37 siblings, 0 replies; 39+ messages in thread
From: Eric Wong @ 2022-09-10 8:17 UTC (permalink / raw)
To: meta
Again, ->zmore has proven expensive due to the overhead of
calling ->deflate on small strings, so print directly to the
file handle and let the PerlIO::scalar layer take care of
buffering. One of the ->zmore calls was a no-op, even, so
drop that entirely.
---
lib/PublicInbox/ViewVCS.pm | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index 57ab378d..f740591d 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -189,7 +189,8 @@ href="$f.patch">patch</a>)\n <a href=#parent>parent</a> $P->[0]};
$x = ' (<a href=#root_commit>root commit</a>)';
}
PublicInbox::WwwStream::html_init($ctx);
- $ctx->zmore(<<EOM);
+ my $zfh = $ctx->zfh;
+ print $zfh <<EOM;
<pre> <a href=#commit>commit</a> $H$x
<a href=#tree>tree</a> <a href="$upfx$T/s/">$T</a>
author $au
@@ -197,16 +198,14 @@ committer $co
<b>$s</b>
EOM
- $ctx->zmore("\n", $ctx->{-linkify}->to_html($bdy)) if length($bdy);
+ print $zfh "\n", $ctx->{-linkify}->to_html($bdy) if length($bdy);
$bdy = '';
open my $fh, '<:utf8', "$ctx->{-tmp}/p" or
die "open $ctx->{-tmp}/p: $!";
if (-s $fh > $MAX_SIZE) {
- $ctx->zmore("---\n patch is too large to show\n");
+ print $zfh "---\n patch is too large to show\n";
} else { # prepare flush_diff:
read($fh, $x, -s _);
- $ctx->zmore($bdy);
- undef $bdy;
$ctx->{-apfx} = $ctx->{-spfx} = $upfx;
$x =~ s/\r?\n/\n/gs;
$ctx->{-anchors} = {} if $x =~ /^diff --git /sm;
@@ -228,7 +227,7 @@ EOM
$q = wrap('', '', $q);
my $rows = ($q =~ tr/\n/\n/) + 1;
$q = ascii_html($q);
- $ctx->zmore(<<EOM);
+ print $zfh <<EOM;
<hr><form action=$upfx
id=related><pre>find related emails, including ancestors/descendants/conflicts
<textarea name=q cols=${\PublicInbox::View::COLS} rows=$rows>$q</textarea>
^ permalink raw reply related [flat|nested] 39+ messages in thread