From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF, T_SCC_BODY_TEXT_LINE shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 2135B1F655 for ; Thu, 6 Jun 2024 07:44:18 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1717659858; bh=NiZBKQlBn9LeNS3h6Gic9D/p95Ib4ZRr0asO9ELw4/A=; h=From:To:Subject:Date:In-Reply-To:References:From; b=GuRoE76Oaq5TsdJfQYJyNln7AL1VkRDGodhbAenvkRzSbOrSUndEL+TFR7WkOUEf2 4vIpHDdd2Vk8oUUNiU0ZM5Lfn8Q9OaVIwYNgitcZPgxVBN/iU5m55scmM1x8RE+wey h+jhU0fGF+1OigtnzioLj8B6ZlhXLF0wwwMFR48E= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 5/5] www: reduce fragmentation in /t/ and /T/ endpoints Date: Thu, 6 Jun 2024 07:44:16 +0000 Message-ID: <20240606074416.3900983-6-e@80x24.org> In-Reply-To: <20240606074416.3900983-1-e@80x24.org> References: <20240606074416.3900983-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: For giant threads with /t/ and /T/ endpoints, avoid generating a large string with a medium lifetime for the thread skeleton ($ctx->{skel}). Instead, make $ctx->{skel} an arrayref and use it to store a bunch of smaller strings, instead. While keeping many small strings is inefficient due to pointer chasing; forcing a smaller distribution of sizes makes it easier for the malloc implementation to organize and find small chunks of memory instead of having to find (and hold) larger contiguous chunks. When a large string is created now, it's lifetime is kept as short as possible to decrease its likelyhood of causing fragmentation. Preliminary testing shows this appears to reduce RSS by roughly 20-40% under both glibc malloc (using a tiny MALLOC_MMAP_THRESHOLD_=67000) on 32-bit and jemalloc 5.2.1 on 64-bit with standard settings. --- lib/PublicInbox/SearchView.pm | 7 ++--- lib/PublicInbox/View.pm | 55 ++++++++++++++++++----------------- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index 9919e25c..e27844d1 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -310,13 +310,12 @@ sub mset_thread { my $rootset = PublicInbox::SearchThread::thread($msgs, $r ? \&sort_relevance : \&PublicInbox::View::sort_ds, $ctx); - my $skel = search_nav_bot($ctx, $mset, $q).'
'. <{skel} = [ search_nav_bot($ctx, $mset, $q).'
'. <{-upfx} = '';
 	$ctx->{anchor_idx} = 1;
 	$ctx->{cur_level} = 0;
-	$ctx->{skel} = \$skel;
 	$ctx->{mapping} = {};
 	$ctx->{searchview} = 1;
 	$ctx->{prev_attr} = '';
@@ -326,7 +325,7 @@ EOM
 	# reduce hash lookups in skel_dump
 	$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
 	PublicInbox::View::walk_thread($rootset, $ctx,
-		\&PublicInbox::View::pre_thread);
+		\&PublicInbox::View::pre_thread); # pushes to ctx->{skel}
 
 	# link $INBOX_DIR/description text to "recent" view around
 	# the newest message in this result set:
@@ -343,7 +342,7 @@ sub mset_thread_i {
 	print { $ctx->zfh } $ctx->html_top if exists $ctx->{-html_tip};
 	$eml and return PublicInbox::View::eml_entry($ctx, $eml);
 	my $smsg = shift @{$ctx->{msgs}} or
-		print { $ctx->zfh } ${delete($ctx->{skel})};
+		print { $ctx->zfh } @{delete($ctx->{skel})};
 	$smsg;
 }
 
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 44e1f2a8..958efa41 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -476,7 +476,7 @@ sub stream_thread_i { # PublicInbox::WwwStream::getline callback
 			print { $ctx->zfh } ghost_index_entry($ctx, $lvl, $smsg)
 		} else { # all done
 			print { $ctx->zfh } thread_adj_level($ctx, 0),
-						${delete($ctx->{skel})};
+						@{delete($ctx->{skel})};
 			return;
 		}
 	}
@@ -513,11 +513,13 @@ href="../../">newest]
 EOF
 	$skel .= "Thread overview: ";
 	$skel .= $nr == 1 ? '(only message)' : "$nr+ messages";
-	$skel .= " (download: mbox.gz";
-	$skel .= " / follow: Atom feed)\n";
-	$skel .= "-- links below jump to the message on this page --\n";
+	$skel .= <mbox.gz follow: Atom feed
+-- links below jump to the message on this page --
+EOM
 	$ctx->{cur_level} = 0;
-	$ctx->{skel} = \$skel;
+	$ctx->{skel} = [ $skel ];
 	$ctx->{prev_attr} = '';
 	$ctx->{prev_level} = 0;
 	$ctx->{root_anchor} = 'm' . id_compress($mid, 1);
@@ -529,9 +531,9 @@ EOF
 
 	# reduce hash lookups in pre_thread->skel_dump
 	$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
-	walk_thread($rootset, $ctx, \&pre_thread);
+	walk_thread($rootset, $ctx, \&pre_thread); # pushes to ctx->{skel}
 
-	$skel .= '
'; + push @{$ctx->{skel}}, '
'; return stream_thread($rootset, $ctx) unless $ctx->{flat}; # flat display: lazy load the full message from smsg @@ -553,8 +555,7 @@ sub thread_html_i { # PublicInbox::WwwStream::getline callback while (my $smsg = shift @{$ctx->{msgs}}) { return $smsg if exists($smsg->{blob}); } - my $skel = delete($ctx->{skel}) or return; # all done - print { $ctx->zfh } $$skel; + print { $ctx->zfh } @{delete $ctx->{skel} // []}; undef; } } @@ -778,13 +779,13 @@ sub thread_skel ($$$) { my $ibx = $ctx->{ibx}; my ($nr, $msgs) = $ibx->over->get_thread($mid); my $parent = in_reply_to($hdr); - $$skel .= "\nThread overview: "; + $skel->[-1] .= "\nThread overview: "; if ($nr <= 1) { if (defined $parent) { - $$skel .= SKEL_EXPAND."\n "; - $$skel .= ghost_parent('../', $parent) . "\n"; + $skel->[-1] .= SKEL_EXPAND."\n "; + $skel->[-1] .= ghost_parent('../', $parent) . "\n"; } else { - $$skel .= "[no followups] ". + $skel->[-1] .= "[no followups] ". SKEL_EXPAND."\n"; } $ctx->{next_msg} = undef; @@ -792,8 +793,9 @@ sub thread_skel ($$$) { return; } - $$skel .= $nr; - $$skel .= '+ messages / '.SKEL_EXPAND.qq! top\n!; + $skel->[-1] .= $nr; + $skel->[-1] .= '+ messages / '.SKEL_EXPAND. + qq! top\n!; # nb: mutt only shows the first Subject in the index pane # when multiple Subject: headers are present, so we follow suit: @@ -815,7 +817,7 @@ sub thread_skel ($$$) { sub html_footer { my ($ctx, $hdr) = @_; my $upfx = '../'; - my (@related, $skel); + my (@related, @skel); my $foot = '
';
 	my $qry = delete $ctx->{-qry};
 	if ($qry && $ctx->{ibx}->isrch) {
@@ -847,12 +849,12 @@ EOM
 		my $t = ts2str($ctx->{-t_max});
 		my $t_fmt = fmt_ts($ctx->{-t_max});
 		my $fallback = @related ? "\t" : "\t";
-		$skel = <~$t_fmt UTC|newest]
 EOF
-		thread_skel(\$skel, $ctx, $hdr);
+		thread_skel(\@skel, $ctx, $hdr);
 		my ($next, $prev);
 		my $parent = '       ';
 		$next = $prev = '    ';
@@ -879,11 +881,11 @@ EOF
 		}
 		$foot .= "$next $prev$parent ";
 	} else { # unindexed inboxes w/o over
-		$skel = qq( latest);
+		$skel[0] = qq( latest);
 	}
-	# $skel may be big for big threads, don't append it to $foot
+	# @skel may be big for big threads, don't push to it
 	print { $ctx->zfh } $foot, qq(reply),
-				$skel, '
', @related, + @skel, '', @related, msg_reply($ctx, $hdr); } @@ -985,7 +987,8 @@ sub skel_dump { # walk_thread callback my $mid = $smsg->{mid}; if ($level == 0 && $ctx->{skel_dump_roots}++) { - $$skel .= delete($ctx->{sl_note}) || ''; + my $note = delete $ctx->{sl_note}; + push @$skel, $note if $note; } my $f = ascii_html(delete $smsg->{from_name}); @@ -1014,7 +1017,7 @@ sub skel_dump { # walk_thread callback if ($cur) { if ($cur eq $mid) { delete $ctx->{cur}; - $$skel .= "$d". + push @$skel, "$d". "$attr [this message]\n"; return 1; } else { @@ -1054,8 +1057,7 @@ sub skel_dump { # walk_thread callback } else { $m = $ctx->{-upfx}.mid_href($mid).'/'; } - $$skel .= $d . "" . $end; - 1; + push @$skel, qq($d$end); } sub _skel_ghost { @@ -1078,8 +1080,7 @@ sub _skel_ghost { } else { $d .= qq{<$html>\n}; } - ${$ctx->{skel}} .= $d; - 1; + push @{$ctx->{skel}}, $d; } # note: we favor Date: here because git-send-email increments it