From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 06/17] thread: remove Email::Abstract wrapping
Date: Wed, 5 Oct 2016 23:57:11 +0000 [thread overview]
Message-ID: <20161005235722.14857-7-e@80x24.org> (raw)
In-Reply-To: <20161005235722.14857-1-e@80x24.org>
This roughly doubles performance due to the reduction in
object creation and abstraction layers.
---
lib/PublicInbox/SearchMsg.pm | 29 ------------
lib/PublicInbox/SearchThread.pm | 99 ++++++++++++-----------------------------
lib/PublicInbox/SearchView.pm | 8 ++--
lib/PublicInbox/View.pm | 90 +++++++++++++++++--------------------
t/search.t | 7 +--
5 files changed, 76 insertions(+), 157 deletions(-)
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index 9d873c4..9dcc1e6 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -144,35 +144,6 @@ sub ensure_metadata {
}
}
-# for threading only
-sub mini_mime {
- my ($self) = @_;
- $self->ensure_metadata;
- my @h = (
- 'Subject' => $self->subject,
- 'X-PI-From' => $self->from_name,
- # prevent Email::Simple::Creator from running,
- # this header is useless for threading as we use X-PI-TS
- # for sorting and display:
- 'Date' => EPOCH_822,
- 'Message-ID' => "<$self->{mid}>",
- 'X-PI-TS' => $self->ts,
- );
- if (my $refs = $self->{references}) {
- push @h, References => $refs;
- }
- my $mime = Email::MIME->create(header => \@h);
- my $h = $mime->header_obj;
-
- # set these headers manually since Encode::encode('MIME-Q', ...)
- # will add spaces to long values when using header_str above.
-
- # drop useless headers Email::MIME set for us
- $h->header_set('Date');
- $h->header_set('MIME-Version');
- $mime;
-}
-
sub mid ($;$) {
my ($self, $mid) = @_;
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index 53fec97..d39e9b6 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -20,7 +20,6 @@
package PublicInbox::SearchThread;
use strict;
use warnings;
-use Email::Abstract;
sub new {
return bless {
@@ -30,37 +29,6 @@ sub new {
}, $_[0];
}
-sub _get_hdr {
- my ($class, $msg, $hdr) = @_;
- Email::Abstract->get_header($msg, $hdr) || '';
-}
-
-sub _uniq {
- my %seen;
- return grep { !$seen{$_}++ } @_;
-}
-
-sub _references {
- my $class = shift;
- my $msg = shift;
- my @references = ($class->_get_hdr($msg, "References") =~ /<([^>]+)>/g);
- my $foo = $class->_get_hdr($msg, "In-Reply-To");
- chomp $foo;
- $foo =~ s/.*?<([^>]+)>.*/$1/;
- push @references, $foo
- if $foo =~ /^\S+\@\S+$/ && (!@references || $references[-1] ne $foo);
- return _uniq(@references);
-}
-
-sub _msgid {
- my ($class, $msg) = @_;
- my $id = $class->_get_hdr($msg, "Message-ID");
- die "attempt to thread message with no id" unless $id;
- chomp $id;
- $id =~ s/^<([^>]+)>.*/$1/; # We expect this not to have <>s
- return $id;
-}
-
sub rootset { @{$_[0]{rootset}} }
sub thread {
@@ -77,14 +45,11 @@ sub _finish {
delete $self->{seen};
}
-sub _get_cont_for_id {
- my $self = shift;
- my $id = shift;
- $self->{id_table}{$id} ||= $self->_container_class->new($id);
+sub _get_cont_for_id ($$) {
+ my ($self, $mid) = @_;
+ $self->{id_table}{$mid} ||= PublicInbox::SearchThread::Msg->new($mid);
}
-sub _container_class { 'PublicInbox::SearchThread::Container' }
-
sub _setup {
my ($self) = @_;
@@ -92,41 +57,40 @@ sub _setup {
}
sub _add_message ($$) {
- my ($self, $message) = @_;
+ my ($self, $smsg) = @_;
# A. if id_table...
- my $this_container = $self->_get_cont_for_id($self->_msgid($message));
- $this_container->{message} = $message;
+ my $this = _get_cont_for_id($self, $smsg->{mid});
+ $this->{smsg} = $smsg;
# B. For each element in the message's References field:
- my @refs = $self->_references($message);
-
my $prev;
- for my $ref (@refs) {
- # Find a Container object for the given Message-ID
- my $container = $self->_get_cont_for_id($ref);
-
- # Link the References field's Containers together in the
- # order implied by the References header
- # * If they are already linked don't change the existing links
- # * Do not add a link if adding that link would introduce
- # a loop...
-
- if ($prev &&
- !$container->{parent} && # already linked
- !$container->has_descendent($prev) # would loop
- ) {
- $prev->add_child($container);
+ if (defined(my $refs = $smsg->{references})) {
+ foreach my $ref ($refs =~ m/<([^>]+)>/g) {
+ # Find a Container object for the given Message-ID
+ my $cont = _get_cont_for_id($self, $ref);
+
+ # Link the References field's Containers together in
+ # the order implied by the References header
+ #
+ # * If they are already linked don't change the
+ # existing links
+ # * Do not add a link if adding that link would
+ # introduce a loop...
+ if ($prev &&
+ !$cont->{parent} && # already linked
+ !$cont->has_descendent($prev) # would loop
+ ) {
+ $prev->add_child($cont);
+ }
+ $prev = $cont;
}
- $prev = $container;
}
# C. Set the parent of this message to be the last element in
# References...
- if ($prev &&
- !$this_container->has_descendent($prev) # would loop
- ) {
- $prev->add_child($this_container)
+ if ($prev && !$this->has_descendent($prev)) { # would loop
+ $prev->add_child($this)
}
}
@@ -135,7 +99,7 @@ sub order {
my $ordersub = shift;
# make a fake root
- my $root = $self->_container_class->new( 'fakeroot' );
+ my $root = _get_cont_for_id($self, 'fakeroot');
$root->add_child( $_ ) for @{ $self->{rootset} };
# sort it
@@ -147,17 +111,12 @@ sub order {
$root->remove_child($_) for @$kids;
}
-package PublicInbox::SearchThread::Container;
+package PublicInbox::SearchThread::Msg;
use Carp qw(croak);
use Scalar::Util qw(weaken);
sub new { my $self = shift; bless { id => shift }, $self; }
-sub message { $_[0]->{message} }
-sub child { $_[0]->{child} }
-sub next { $_[0]->{next} }
-sub messageid { $_[0]->{id} }
-
sub add_child {
my ($self, $child) = @_;
croak "Cowardly refusing to become my own parent: $self"
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 0d54c3d..ebeb41f 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -148,7 +148,6 @@ sub mset_thread {
my $i = $_;
my $m = PublicInbox::SearchMsg->load_doc($i->get_document);
$pct{$m->mid} = $i->get_percent;
- $m = $m->mini_mime;
$m;
} ($mset->items);
@@ -156,9 +155,9 @@ sub mset_thread {
$th->thread;
if ($q->{r}) { # order by relevance
$th->order(sub {
- [ sort { (eval { $pct{$b->topmost->messageid} } || 0)
+ [ sort { (eval { $pct{$b->topmost->{id}} } || 0)
<=>
- (eval { $pct{$a->topmost->messageid} } || 0)
+ (eval { $pct{$a->topmost->{id}} } || 0)
} @{$_[0]} ];
});
} else { # order by time (default for threaded view)
@@ -185,8 +184,7 @@ sub mset_thread {
sub {
return unless $msgs;
while ($mime = shift @$msgs) {
- my $mid = mid_clean(mid_mime($mime));
- $mime = $inbox->msg_by_mid($mid) and last;
+ $mime = $inbox->msg_by_smsg($mime) and last;
}
if ($mime) {
$mime = Email::MIME->new($mime);
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index e90efda..748e391 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -214,12 +214,12 @@ sub _th_index_lite {
$rv .= $pad . $irt_map->[1];
if ($idx > 0) {
my $prev = $siblings->[$idx - 1];
- my $pmid = $prev->messageid;
+ my $pmid = $prev->{id};
if ($idx > 2) {
my $s = ($idx - 1). ' preceding siblings ...';
$rv .= pad_link($pmid, $level, $s);
} elsif ($idx == 2) {
- my $ppmid = $siblings->[0]->messageid;
+ my $ppmid = $siblings->[0]->{id};
$rv .= $pad . $mapping->{$ppmid}->[1];
}
$rv .= $pad . $mapping->{$pmid}->[1];
@@ -233,25 +233,25 @@ sub _th_index_lite {
$this =~ s!<a\nhref=[^>]+>([^<]+)</a>!$1!s; # no point linking to self
$rv .= "<b>@ $this";
my $node = $map->[2];
- if (my $child = $node->child) {
- my $cmid = $child->messageid;
+ if (my $child = $node->{child}) {
+ my $cmid = $child->{id};
$rv .= $pad . $mapping->{$cmid}->[1];
if ($nr_c > 2) {
my $s = ($nr_c - 1). ' more replies';
$rv .= pad_link($cmid, $level + 1, $s);
- } elsif (my $cn = $child->next) {
- $rv .= $pad . $mapping->{$cn->messageid}->[1];
+ } elsif (my $cn = $child->{next}) {
+ $rv .= $pad . $mapping->{$cn->{id}}->[1];
}
}
- if (my $next = $node->next) {
- my $nmid = $next->messageid;
+ if (my $next = $node->{next}) {
+ my $nmid = $next->{id};
$rv .= $pad . $mapping->{$nmid}->[1];
my $nnext = $nr_s - $idx;
if ($nnext > 2) {
my $s = ($nnext - 1).' subsequent siblings';
$rv .= pad_link($nmid, $level, $s);
- } elsif (my $nn = $next->next) {
- $rv .= $pad . $mapping->{$nn->messageid}->[1];
+ } elsif (my $nn = $next->{next}) {
+ $rv .= $pad . $mapping->{$nn->{id}}->[1];
}
}
$rv .= $pad ."<a\nhref=#r$id>$s_s, $s_c; $ctx->{s_nr}</a>\n";
@@ -264,7 +264,7 @@ sub walk_thread {
my $level = shift @q;
my $node = shift @q or next;
$cb->($ctx, $level, $node);
- unshift @q, $level+1, $node->child, $level, $node->next;
+ unshift @q, $level+1, $node->{child}, $level, $node->{next};
}
}
@@ -272,12 +272,12 @@ sub pre_thread {
my ($ctx, $level, $node) = @_;
my $mapping = $ctx->{mapping};
my $idx = -1;
- if (my $parent = $node->parent) {
- my $m = $mapping->{$parent->messageid}->[0];
+ if (my $parent = $node->{parent}) {
+ my $m = $mapping->{$parent->{id}}->[0];
$idx = scalar @$m;
push @$m, $node;
}
- $mapping->{$node->messageid} = [ [], '', $node, $idx, $level ];
+ $mapping->{$node->{id}} = [ [], '', $node, $idx, $level ];
skel_dump($ctx, $level, $node);
}
@@ -296,8 +296,8 @@ sub stream_thread ($$) {
while (@q) {
$level = shift @q;
my $node = shift @q or next;
- unshift @q, $level+1, $node->child, $level, $node->next;
- $mime = $inbox->msg_by_mid($node->messageid) and last;
+ unshift @q, $level+1, $node->{child}, $level, $node->{next};
+ $mime = $inbox->msg_by_smsg($node->{smsg}) and last;
}
return missing_thread($ctx) unless $mime;
@@ -309,13 +309,14 @@ sub stream_thread ($$) {
while (@q) {
$level = shift @q;
my $node = shift @q or next;
- unshift @q, $level+1, $node->child, $level, $node->next;
- my $mid = $node->messageid;
- if ($mime = $inbox->msg_by_mid($mid)) {
+ unshift @q, $level+1, $node->{child},
+ $level, $node->{next};
+ my $mid = $node->{id};
+ if ($mime = $inbox->msg_by_smsg($node->{smsg})) {
$mime = Email::MIME->new($mime);
return thread_index_entry($ctx, $level, $mime);
} else {
- return ghost_index_entry($ctx, $level, $mid);
+ return ghost_index_entry($ctx, $level, $node);
}
}
my $ret = join('', thread_adj_level($ctx, 0));
@@ -355,11 +356,11 @@ sub thread_html {
$skel .= '</pre>';
return stream_thread($th, $ctx) unless $ctx->{flat};
- # flat display: lazy load the full message from mini_mime:
+ # flat display: lazy load the full message from smsg
my $inbox = $ctx->{-inbox};
my $mime;
while ($mime = shift @$msgs) {
- $mime = $inbox->msg_by_mid(mid_clean(mid_mime($mime))) and last;
+ $mime = $inbox->msg_by_smsg($mime) and last;
}
return missing_thread($ctx) unless $mime;
$mime = Email::MIME->new($mime);
@@ -369,8 +370,7 @@ sub thread_html {
PublicInbox::WwwStream->response($ctx, 200, sub {
return unless $msgs;
while ($mime = shift @$msgs) {
- $mid = mid_clean(mid_mime($mime));
- $mime = $inbox->msg_by_mid($mid) and last;
+ $mime = $inbox->msg_by_smsg($mime) and last;
}
if ($mime) {
$mime = Email::MIME->new($mime);
@@ -738,7 +738,7 @@ sub indent_for {
sub load_results {
my ($sres) = @_;
- [ map { $_->mini_mime } @{delete $sres->{msgs}} ];
+ [ map { $_->ensure_metadata; $_ } @{delete $sres->{msgs}} ];
}
sub msg_timestamp {
@@ -771,13 +771,13 @@ sub _msg_date {
sub fmt_ts { POSIX::strftime('%Y-%m-%d %k:%M', gmtime($_[0])) }
sub _skel_header {
- my ($ctx, $hdr, $level) = @_;
+ my ($ctx, $smsg, $level) = @_;
my $dst = $ctx->{dst};
my $cur = $ctx->{cur};
- my $mid = mid_clean($hdr->header_raw('Message-ID'));
- my $f = ascii_html($hdr->header('X-PI-From'));
- my $d = _msg_date($hdr) . ' ' . indent_for($level) . th_pfx($level);
+ my $mid = $smsg->{mid};
+ my $f = ascii_html($smsg->from_name);
+ my $d = fmt_ts($smsg->{ts}) . ' ' . indent_for($level) . th_pfx($level);
my $attr = $f;
$ctx->{first_level} ||= $level;
@@ -802,7 +802,7 @@ sub _skel_header {
# Subject is never undef, this mail was loaded from
# our Xapian which would've resulted in '' if it were
# really missing (and Filter rejects empty subjects)
- my $s = $hdr->header('Subject');
+ my $s = $smsg->subject;
my $h = $ctx->{srch}->subject_path($s);
if ($ctx->{seen}->{$h}) {
$s = undef;
@@ -829,10 +829,10 @@ sub _skel_header {
sub skel_dump {
my ($ctx, $level, $node) = @_;
- if (my $mime = $node->message) {
- _skel_header($ctx, $mime->header_obj, $level);
+ if (my $smsg = $node->{smsg}) {
+ _skel_header($ctx, $smsg, $level);
} else {
- my $mid = $node->messageid;
+ my $mid = $node->{id};
my $dst = $ctx->{dst};
my $mapping = $ctx->{mapping};
my $map = $mapping->{$mid} if $mapping;
@@ -857,31 +857,24 @@ sub skel_dump {
sub sort_ts {
[ sort {
- (eval { $a->topmost->message->header('X-PI-TS') } || 0) <=>
- (eval { $b->topmost->message->header('X-PI-TS') } || 0)
+ (eval { $a->topmost->{smsg}->ts } || 0) <=>
+ (eval { $b->topmost->{smsg}->ts } || 0)
} @{$_[0]} ];
}
-sub _tryload_ghost ($$) {
- my ($srch, $mid) = @_;
- my $smsg = $srch->lookup_mail($mid) or return;
- $smsg->mini_mime;
-}
-
# accumulate recent topics if search is supported
# returns 200 if done, 404 if not
sub acc_topic {
my ($ctx, $level, $node) = @_;
my $srch = $ctx->{srch};
- my $mid = $node->messageid;
- my $x = $node->message || _tryload_ghost($srch, $mid);
+ my $mid = $node->{id};
+ my $x = $node->{smsg} || $srch->lookup_mail($mid);
my ($subj, $ts);
my $topic;
if ($x) {
- $x = $x->header_obj;
- $subj = $x->header('Subject') || '';
+ $subj = $x->subject;
$subj = $srch->subject_normalized($subj);
- $ts = $x->header('X-PI-TS');
+ $ts = $x->ts;
if ($level == 0) {
$topic = [ $ts, 1, { $subj => $mid }, $subj ];
$ctx->{-cur_topic} = $topic;
@@ -1023,9 +1016,10 @@ sub thread_adj_level {
}
sub ghost_index_entry {
- my ($ctx, $level, $mid) = @_;
+ my ($ctx, $level, $node) = @_;
my ($beg, $end) = thread_adj_level($ctx, $level);
- $beg . '<pre>'. ghost_parent($ctx->{-upfx}, $mid) . '</pre>' . $end;
+ $beg . '<pre>'. ghost_parent($ctx->{-upfx}, $node->{id})
+ . '</pre>' . $end;
}
1;
diff --git a/t/search.t b/t/search.t
index cce3b9e..eed9c9b 100644
--- a/t/search.t
+++ b/t/search.t
@@ -293,7 +293,7 @@ sub filter_mids {
$smsg->ensure_metadata;
is($smsg->references, '', "no references created");
my $msg = PublicInbox::SearchMsg->load_doc($smsg->{doc});
- is($s, $msg->mini_mime->header('Subject'), 'long subject not rewritten');
+ is($s, $msg->subject, 'long subject not rewritten');
}
{
@@ -310,10 +310,7 @@ sub filter_mids {
my $smsg = $rw->lookup_message('testmessage@example.com');
my $msg = PublicInbox::SearchMsg->load_doc($smsg->{doc});
- # mini_mime technically not valid (I think),
- # but good enough for displaying HTML:
- is($mime->header('Subject'), $msg->mini_mime->header('Subject'),
- 'UTF-8 subject preserved');
+ is($mime->header('Subject'), $msg->subject, 'UTF-8 subject preserved');
}
{
--
EW
next prev parent reply other threads:[~2016-10-05 23:57 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-10-05 23:57 [PATCH 0/17] remove Mail::Thread dependency Eric Wong
2016-10-05 23:57 ` [PATCH 01/17] view: remove "subject dummy" references Eric Wong
2016-10-05 23:57 ` [PATCH 02/17] thread: remove Mail::Thread dependency Eric Wong
2016-10-05 23:57 ` [PATCH 03/17] thread: pass array refs instead of entire arrays Eric Wong
2016-10-05 23:57 ` [PATCH 04/17] thread: remove accessor usage in internals Eric Wong
2016-10-05 23:57 ` [PATCH 05/17] inbox: deal with ghost smsg Eric Wong
2016-10-05 23:57 ` Eric Wong [this message]
2016-10-05 23:57 ` [PATCH 07/17] thread: remove rootset accessor method Eric Wong
2016-10-05 23:57 ` [PATCH 08/17] thread: simplify Eric Wong
2016-10-05 23:57 ` [PATCH 09/17] thread: remove iterate_down Eric Wong
2016-10-05 23:57 ` [PATCH 10/17] thread: avoid incrementing undefined value Eric Wong
2016-10-05 23:57 ` [PATCH 11/17] thread: order_children no longer cares about depth Eric Wong
2016-10-05 23:57 ` [PATCH 12/17] thread: inline and remove recurse_down logic Eric Wong
2016-10-05 23:57 ` [PATCH 13/17] thread: fix sorting without topmost Eric Wong
2016-10-14 21:17 ` [PATCH] thread: reinstates stable ordering when ghosts are present Eric Wong
2016-10-05 23:57 ` [PATCH 14/17] thread: use hash + array instead of hand-rolled linked list Eric Wong
2016-10-05 23:57 ` [PATCH 15/17] view: remove redundant children array in thread views Eric Wong
2016-10-05 23:57 ` [PATCH 16/17] t/thread-cycle: test self-referential messages Eric Wong
2016-10-05 23:57 ` [PATCH 17/17] thread: remove weaken dependency Eric Wong
2016-10-06 8:22 ` [PATCH 0/17] remove Mail::Thread dependency Eric Wong
2016-10-13 3:59 ` [PATCH 0/2] thread: fix regressions from Mail::Thread removal Eric Wong
2016-10-13 3:59 ` [PATCH 1/2] thread: reduce indentation level Eric Wong
2016-10-13 3:59 ` [PATCH 2/2] thread: fix parent/child relationships Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20161005235722.14857-7-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).