From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 08/11] view: reduce memory usage when displaying large threads
Date: Thu, 20 Aug 2015 02:57:20 +0000 [thread overview]
Message-ID: <1440039443-27052-8-git-send-email-e@80x24.org> (raw)
In-Reply-To: <1440039443-27052-1-git-send-email-e@80x24.org>
We want to minimize the time any large objects or strings
are referenced. We can do threading entirely from the
mini_mime-generated messages and lazilly load full messages
when rendering the display.
---
lib/PublicInbox/View.pm | 80 +++++++++++++++++++++++++++----------------------
1 file changed, 44 insertions(+), 36 deletions(-)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 2ba5118..391e3ad 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -146,7 +146,7 @@ sub thread_html {
my $mid = mid_compressed($ctx->{mid});
my $res = $srch->get_thread($mid);
my $rv = '';
- my $msgs = load_results($ctx, $res);
+ my $msgs = load_results($res);
my $nr = scalar @$msgs;
return $rv if $nr == 0;
require PublicInbox::Thread;
@@ -154,7 +154,11 @@ sub thread_html {
$th->thread;
$th->order(*PublicInbox::Thread::sort_ts);
my $state = [ $srch, { root_anchor => anchor_for($mid) }, undef, 0 ];
- thread_entry(\$rv, $state, $_, 0) for $th->rootset;
+ {
+ require PublicInbox::GitCatFile;
+ my $git = PublicInbox::GitCatFile->new($ctx->{git_dir});
+ thread_entry(\$rv, $git, $state, $_, 0) for $th->rootset;
+ }
my $final_anchor = $state->[3];
my $next = "<a\nid=\"s$final_anchor\">";
@@ -173,7 +177,7 @@ sub subject_path_html {
my $path = $ctx->{subject_path};
my $res = $srch->get_subject_path($path);
my $rv = '';
- my $msgs = load_results($ctx, $res);
+ my $msgs = load_results($res);
my $nr = scalar @$msgs;
return $rv if $nr == 0;
require PublicInbox::Thread;
@@ -181,7 +185,11 @@ sub subject_path_html {
$th->thread;
$th->order(*PublicInbox::Thread::sort_ts);
my $state = [ $srch, { root_anchor => 'dummy' }, undef, 0 ];
- thread_entry(\$rv, $state, $_, 0) for $th->rootset;
+ {
+ require PublicInbox::GitCatFile;
+ my $git = PublicInbox::GitCatFile->new($ctx->{git_dir});
+ thread_entry(\$rv, $git, $state, $_, 0) for $th->rootset;
+ }
my $final_anchor = $state->[3];
my $next = "<a\nid=\"s$final_anchor\">end of thread</a>\n";
@@ -197,7 +205,10 @@ sub index_walk {
my $ct = $part->content_type;
# account for filter bugs...
- return '' if defined $ct && $ct =~ m!\btext/[xh]+tml\b!i;
+ if (defined $ct && $ct =~ m!\btext/[xh]+tml\b!i) {
+ $part->body_set('');
+ return '';
+ }
my $enc = enc_for($ct, $enc_msg);
@@ -224,7 +235,9 @@ sub index_walk {
# kill per-line trailing whitespace
$s =~ s/[ \t]+$//sgm;
- $rv .= $s . "\n";
+ $rv .= $s;
+ $s = undef;
+ $rv .= "\n";
}
$rv;
}
@@ -335,10 +348,13 @@ sub flush_quote {
sub add_text_body {
my ($enc, $part, $part_nr, $full_pfx) = @_;
my $n = 0;
- my $s = ascii_html($enc->decode($part->body));
+ my $nr = 0;
+ my $s = $part->body;
+ $part->body_set('');
+ $s = $enc->decode($s);
+ $s = ascii_html($s);
my @lines = split(/\n/, $s);
$s = '';
- my $nr = 0;
my @quot;
while (defined(my $cur = shift @lines)) {
if ($cur !~ /^>/) {
@@ -538,10 +554,10 @@ sub simple_dump {
sub thread_followups {
my ($dst, $root, $res) = @_;
- my @msgs = map { $_->mini_mime } @{$res->{msgs}};
+ my $msgs = load_results($res);
require PublicInbox::Thread;
$root->header_set('X-PI-TS', '0');
- my $th = PublicInbox::Thread->new($root, @msgs);
+ my $th = PublicInbox::Thread->new($root, @$msgs);
$th->thread;
$th->order(*PublicInbox::Thread::sort_ts);
my $srch = $res->{srch};
@@ -559,43 +575,35 @@ sub thread_html_head {
}
sub thread_entry {
- my ($dst, $state, $node, $level) = @_;
+ my ($dst, $git, $state, $node, $level) = @_;
# $state = [ $search_res, $seen, undef, 0 (msg_nr) ];
# $seen is overloaded with 3 types of fields:
# 1) "root_anchor" => anchor_for(Message-ID),
# 2) seen subject hashes: sha1(subject) => 1
# 3) anchors hashes: "#$sha1_hex" (same as $seen in index_entry)
if (my $mime = $node->message) {
- if (length($$dst) == 0) {
- $$dst .= thread_html_head($mime);
+
+ # lazy load the full message from mini_mime:
+ my $path = mid2path(mid_clean($mime->header('Message-ID')));
+ $mime = eval { Email::MIME->new($git->cat_file("HEAD:$path")) };
+ if ($mime) {
+ if (length($$dst) == 0) {
+ $$dst .= thread_html_head($mime);
+ }
+ $$dst .= index_entry(undef, $mime, $level, $state);
}
- $$dst .= index_entry(undef, $mime, $level, $state);
}
- thread_entry($dst, $state, $node->child, $level + 1) if $node->child;
- thread_entry($dst, $state, $node->next, $level) if $node->next;
+ my $cur;
+ $cur = $node->child and
+ thread_entry($dst, $git, $state, $cur, $level + 1);
+ $cur = $node->next and
+ thread_entry($dst, $git, $state, $cur, $level);
}
sub load_results {
- my ($ctx, $res) = @_;
-
- require PublicInbox::GitCatFile;
- my $git = PublicInbox::GitCatFile->new($ctx->{git_dir});
- my @msgs;
- while (my $smsg = shift @{$res->{msgs}}) {
- my $m = $smsg->mid;
- my $path = mid2path($m);
-
- # FIXME: duplicated code from Feed.pm
- my $mime = eval {
- my $str = $git->cat_file("HEAD:$path");
- Email::MIME->new($str);
- };
- unless ($@) {
- $mime->header_set('X-PI-TS', msg_timestamp($mime));
- push @msgs, $mime;
- }
- }
- \@msgs;
+ my ($res) = @_;
+
+ [ map { $_->mini_mime } @{delete $res->{msgs}} ];
}
sub msg_timestamp {
--
EW
next prev parent reply other threads:[~2015-08-20 2:57 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-08-20 2:57 [PATCH 01/11] feed: remove threading from index Eric Wong
2015-08-20 2:57 ` [PATCH 02/11] feed: move timestamp parsing to view Eric Wong
2015-08-20 2:57 ` [PATCH 03/11] use tables for rendering comment nesting Eric Wong
2015-08-20 2:57 ` [PATCH 04/11] view: avoid nesting <a> tags from auto-linkification Eric Wong
2015-08-20 2:57 ` [PATCH 05/11] index: simplify main landing page if search-enabled Eric Wong
2015-08-20 2:57 ` [PATCH 06/11] search: avoid needless decode Eric Wong
2015-08-20 2:57 ` [PATCH 07/11] search: reject ghosts in all cases Eric Wong
2015-08-20 2:57 ` Eric Wong [this message]
2015-08-20 2:57 ` [PATCH 09/11] search: bump schema version to 5 for subject_path Eric Wong
2015-08-20 2:57 ` [PATCH 10/11] index: layout fix + title and Atom feed links at top Eric Wong
2015-08-20 2:57 ` [PATCH 11/11] view: do not fold top-level messages in thread Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1440039443-27052-8-git-send-email-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).