From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 3599B1F5B1 for ; Sun, 5 Jul 2020 23:28:00 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 02/43] wwwstream: oneshot: perform gzip without middleware Date: Sun, 5 Jul 2020 23:27:18 +0000 Message-Id: <20200705232759.3161-3-e@yhbt.net> In-Reply-To: <20200705232759.3161-1-e@yhbt.net> References: <20200705232759.3161-1-e@yhbt.net> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Plack::Middleware::Deflater forces us to use a memory-intensive closure. Instead, work towards building compressed strings in memory to reduce the overhead of buffering large HTML output. --- lib/PublicInbox/GzipFilter.pm | 13 +++++++++++++ lib/PublicInbox/WwwStream.pm | 27 ++++++++++++++++++++++----- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm index a7355a8df..115660cb1 100644 --- a/lib/PublicInbox/GzipFilter.pm +++ b/lib/PublicInbox/GzipFilter.pm @@ -4,7 +4,9 @@ # Qspawn filter package PublicInbox::GzipFilter; use strict; +use parent qw(Exporter); use Compress::Raw::Zlib qw(Z_FINISH Z_OK); +our @EXPORT_OK = qw(gzip_maybe); my %OPT = (-WindowBits => 15 + 16, -AppendOutput => 1); sub new { bless {}, shift } @@ -16,6 +18,17 @@ sub attach { $self } +sub gzip_maybe ($) { + my ($env) = @_; + return if (($env->{HTTP_ACCEPT_ENCODING}) // '') !~ /\bgzip\b/; + + # in case Plack::Middleware::Deflater is loaded: + $env->{'plack.skip-deflater'} = 1; + + my ($gz, $err) = Compress::Raw::Zlib::Deflate->new(%OPT); + $err == Z_OK ? $gz : undef; +} + # for GetlineBody (via Qspawn) when NOT using $env->{'pi-httpd.async'} sub translate ($$) { my $self = $_[0]; diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm index 915a71ba0..79ed6871e 100644 --- a/lib/PublicInbox/WwwStream.pm +++ b/lib/PublicInbox/WwwStream.pm @@ -13,6 +13,8 @@ use base qw(Exporter); our @EXPORT_OK = qw(html_oneshot); use bytes (); # length use PublicInbox::Hval qw(ascii_html prurl); +use Compress::Raw::Zlib qw(Z_FINISH Z_OK); +use PublicInbox::GzipFilter qw(gzip_maybe); our $TOR_URL = 'https://www.torproject.org/'; our $CODE_URL = 'https://public-inbox.org/public-inbox.git'; @@ -178,13 +180,28 @@ sub html_oneshot ($$;$) { ctx => $ctx, base_url => base_url($ctx), }, __PACKAGE__; - my @x = (_html_top($self), $sref ? $$sref : (), _html_end($self)); + my @x; + my @h = ('Content-Type' => 'text/html; charset=UTF-8'); + if (my $gz = gzip_maybe($ctx->{env})) { + my $err = $gz->deflate(_html_top($self), $x[0]); + die "gzip->deflate: $err" if $err != Z_OK; + if ($sref) { + $err = $gz->deflate($sref, $x[0]); + die "gzip->deflate: $err" if $err != Z_OK; + } + $err = $gz->deflate(_html_end($self), $x[0]); + die "gzip->deflate: $err" if $err != Z_OK; + $err = $gz->flush($x[0], Z_FINISH); + die "gzip->flush: $err" if $err != Z_OK; + push @h, qw(Vary Accept-Encoding Content-Encoding gzip); + } else { + @x = (_html_top($self), $sref ? $$sref : (), _html_end($self)); + } + my $len = 0; $len += bytes::length($_) for @x; - [ $code, [ - 'Content-Type' => 'text/html; charset=UTF-8', - 'Content-Length' => $len - ], \@x ]; + push @h, 'Content-Length', $len; + [ $code, \@h, \@x ] } 1;