unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 08/11] view: reduce memory usage when displaying large threads
Date: Thu, 20 Aug 2015 02:57:20 +0000	[thread overview]
Message-ID: <1440039443-27052-8-git-send-email-e@80x24.org> (raw)
In-Reply-To: <1440039443-27052-1-git-send-email-e@80x24.org>

We want to minimize the time any large objects or strings
are referenced.  We can do threading entirely from the
mini_mime-generated messages and lazilly load full messages
when rendering the display.
---
 lib/PublicInbox/View.pm | 80 +++++++++++++++++++++++++++----------------------
 1 file changed, 44 insertions(+), 36 deletions(-)

diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 2ba5118..391e3ad 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -146,7 +146,7 @@ sub thread_html {
 	my $mid = mid_compressed($ctx->{mid});
 	my $res = $srch->get_thread($mid);
 	my $rv = '';
-	my $msgs = load_results($ctx, $res);
+	my $msgs = load_results($res);
 	my $nr = scalar @$msgs;
 	return $rv if $nr == 0;
 	require PublicInbox::Thread;
@@ -154,7 +154,11 @@ sub thread_html {
 	$th->thread;
 	$th->order(*PublicInbox::Thread::sort_ts);
 	my $state = [ $srch, { root_anchor => anchor_for($mid) }, undef, 0 ];
-	thread_entry(\$rv, $state, $_, 0) for $th->rootset;
+	{
+		require PublicInbox::GitCatFile;
+		my $git = PublicInbox::GitCatFile->new($ctx->{git_dir});
+		thread_entry(\$rv, $git, $state, $_, 0) for $th->rootset;
+	}
 	my $final_anchor = $state->[3];
 	my $next = "<a\nid=\"s$final_anchor\">";
 
@@ -173,7 +177,7 @@ sub subject_path_html {
 	my $path = $ctx->{subject_path};
 	my $res = $srch->get_subject_path($path);
 	my $rv = '';
-	my $msgs = load_results($ctx, $res);
+	my $msgs = load_results($res);
 	my $nr = scalar @$msgs;
 	return $rv if $nr == 0;
 	require PublicInbox::Thread;
@@ -181,7 +185,11 @@ sub subject_path_html {
 	$th->thread;
 	$th->order(*PublicInbox::Thread::sort_ts);
 	my $state = [ $srch, { root_anchor => 'dummy' }, undef, 0 ];
-	thread_entry(\$rv, $state, $_, 0) for $th->rootset;
+	{
+		require PublicInbox::GitCatFile;
+		my $git = PublicInbox::GitCatFile->new($ctx->{git_dir});
+		thread_entry(\$rv, $git, $state, $_, 0) for $th->rootset;
+	}
 	my $final_anchor = $state->[3];
 	my $next = "<a\nid=\"s$final_anchor\">end of thread</a>\n";
 
@@ -197,7 +205,10 @@ sub index_walk {
 	my $ct = $part->content_type;
 
 	# account for filter bugs...
-	return '' if defined $ct && $ct =~ m!\btext/[xh]+tml\b!i;
+	if (defined $ct && $ct =~ m!\btext/[xh]+tml\b!i) {
+		$part->body_set('');
+		return '';
+	}
 
 	my $enc = enc_for($ct, $enc_msg);
 
@@ -224,7 +235,9 @@ sub index_walk {
 		# kill per-line trailing whitespace
 		$s =~ s/[ \t]+$//sgm;
 
-		$rv .= $s . "\n";
+		$rv .= $s;
+		$s = undef;
+		$rv .= "\n";
 	}
 	$rv;
 }
@@ -335,10 +348,13 @@ sub flush_quote {
 sub add_text_body {
 	my ($enc, $part, $part_nr, $full_pfx) = @_;
 	my $n = 0;
-	my $s = ascii_html($enc->decode($part->body));
+	my $nr = 0;
+	my $s = $part->body;
+	$part->body_set('');
+	$s = $enc->decode($s);
+	$s = ascii_html($s);
 	my @lines = split(/\n/, $s);
 	$s = '';
-	my $nr = 0;
 	my @quot;
 	while (defined(my $cur = shift @lines)) {
 		if ($cur !~ /^&gt;/) {
@@ -538,10 +554,10 @@ sub simple_dump {
 
 sub thread_followups {
 	my ($dst, $root, $res) = @_;
-	my @msgs = map { $_->mini_mime } @{$res->{msgs}};
+	my $msgs = load_results($res);
 	require PublicInbox::Thread;
 	$root->header_set('X-PI-TS', '0');
-	my $th = PublicInbox::Thread->new($root, @msgs);
+	my $th = PublicInbox::Thread->new($root, @$msgs);
 	$th->thread;
 	$th->order(*PublicInbox::Thread::sort_ts);
 	my $srch = $res->{srch};
@@ -559,43 +575,35 @@ sub thread_html_head {
 }
 
 sub thread_entry {
-	my ($dst, $state, $node, $level) = @_;
+	my ($dst, $git, $state, $node, $level) = @_;
 	# $state = [ $search_res, $seen, undef, 0 (msg_nr) ];
 	# $seen is overloaded with 3 types of fields:
 	#	1) "root_anchor" => anchor_for(Message-ID),
 	#	2) seen subject hashes: sha1(subject) => 1
 	#	3) anchors hashes: "#$sha1_hex" (same as $seen in index_entry)
 	if (my $mime = $node->message) {
-		if (length($$dst) == 0) {
-			$$dst .= thread_html_head($mime);
+
+		# lazy load the full message from mini_mime:
+		my $path = mid2path(mid_clean($mime->header('Message-ID')));
+		$mime = eval { Email::MIME->new($git->cat_file("HEAD:$path")) };
+		if ($mime) {
+			if (length($$dst) == 0) {
+				$$dst .= thread_html_head($mime);
+			}
+			$$dst .= index_entry(undef, $mime, $level, $state);
 		}
-		$$dst .= index_entry(undef, $mime, $level, $state);
 	}
-	thread_entry($dst, $state, $node->child, $level + 1) if $node->child;
-	thread_entry($dst, $state, $node->next, $level) if $node->next;
+	my $cur;
+	$cur = $node->child and
+		thread_entry($dst, $git, $state, $cur, $level + 1);
+	$cur = $node->next and
+		thread_entry($dst, $git, $state, $cur, $level);
 }
 
 sub load_results {
-	my ($ctx, $res) = @_;
-
-	require PublicInbox::GitCatFile;
-	my $git = PublicInbox::GitCatFile->new($ctx->{git_dir});
-	my @msgs;
-	while (my $smsg = shift @{$res->{msgs}}) {
-		my $m = $smsg->mid;
-		my $path = mid2path($m);
-
-		# FIXME: duplicated code from Feed.pm
-		my $mime = eval {
-			my $str = $git->cat_file("HEAD:$path");
-			Email::MIME->new($str);
-		};
-		unless ($@) {
-			$mime->header_set('X-PI-TS', msg_timestamp($mime));
-			push @msgs, $mime;
-		}
-	}
-	\@msgs;
+	my ($res) = @_;
+
+	[ map { $_->mini_mime } @{delete $res->{msgs}} ];
 }
 
 sub msg_timestamp {
-- 
EW


  parent reply	other threads:[~2015-08-20  2:57 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-08-20  2:57 [PATCH 01/11] feed: remove threading from index Eric Wong
2015-08-20  2:57 ` [PATCH 02/11] feed: move timestamp parsing to view Eric Wong
2015-08-20  2:57 ` [PATCH 03/11] use tables for rendering comment nesting Eric Wong
2015-08-20  2:57 ` [PATCH 04/11] view: avoid nesting <a> tags from auto-linkification Eric Wong
2015-08-20  2:57 ` [PATCH 05/11] index: simplify main landing page if search-enabled Eric Wong
2015-08-20  2:57 ` [PATCH 06/11] search: avoid needless decode Eric Wong
2015-08-20  2:57 ` [PATCH 07/11] search: reject ghosts in all cases Eric Wong
2015-08-20  2:57 ` Eric Wong [this message]
2015-08-20  2:57 ` [PATCH 09/11] search: bump schema version to 5 for subject_path Eric Wong
2015-08-20  2:57 ` [PATCH 10/11] index: layout fix + title and Atom feed links at top Eric Wong
2015-08-20  2:57 ` [PATCH 11/11] view: do not fold top-level messages in thread Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1440039443-27052-8-git-send-email-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).