* [PATCH 01/17] view: remove "subject dummy" references
2016-10-05 23:57 [PATCH 0/17] remove Mail::Thread dependency Eric Wong
@ 2016-10-05 23:57 ` Eric Wong
2016-10-05 23:57 ` [PATCH 02/17] thread: remove Mail::Thread dependency Eric Wong
` (16 subsequent siblings)
17 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2016-10-05 23:57 UTC (permalink / raw)
To: meta
We will not care for inexact threading by subject or pruning.
---
lib/PublicInbox/View.pm | 8 --------
1 file changed, 8 deletions(-)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 9359209..a3b2681 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -723,8 +723,6 @@ sub anchor_for {
sub ghost_parent {
my ($upfx, $mid) = @_;
- # 'subject dummy' is used internally by Mail::Thread
- return '[no common parent]' if ($mid eq 'subject dummy');
$mid = PublicInbox::Hval->new_msgid($mid);
my $href = $mid->{href};
@@ -838,12 +836,6 @@ sub skel_dump {
my $dst = $ctx->{dst};
my $mapping = $ctx->{mapping};
my $map = $mapping->{$mid} if $mapping;
- if ($mid eq 'subject dummy') {
- my $ncp = "\t[no common parent]\n";
- $map->[1] = $ncp if $map;
- $$dst .= $ncp;
- return;
- }
my $d = $ctx->{pct} ? ' [irrelevant] ' # search result
: ' [not found] ';
$d .= indent_for($level) . th_pfx($level);
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH 02/17] thread: remove Mail::Thread dependency
2016-10-05 23:57 [PATCH 0/17] remove Mail::Thread dependency Eric Wong
2016-10-05 23:57 ` [PATCH 01/17] view: remove "subject dummy" references Eric Wong
@ 2016-10-05 23:57 ` Eric Wong
2016-10-05 23:57 ` [PATCH 03/17] thread: pass array refs instead of entire arrays Eric Wong
` (15 subsequent siblings)
17 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2016-10-05 23:57 UTC (permalink / raw)
To: meta
Introduce our own SearchThread class for threading messages.
This should allow us to specialize and optimize away objects
in future commits.
---
INSTALL | 1 -
MANIFEST | 2 +-
Makefile.PL | 1 -
lib/PublicInbox/SearchIdx.pm | 4 +-
lib/PublicInbox/SearchThread.pm | 323 ++++++++++++++++++++++++++++++++++++++++
lib/PublicInbox/SearchView.pm | 4 +-
lib/PublicInbox/Thread.pm | 86 -----------
lib/PublicInbox/View.pm | 4 +-
lib/PublicInbox/WWW.pm | 2 +-
t/plack.t | 3 +-
10 files changed, 332 insertions(+), 98 deletions(-)
create mode 100644 lib/PublicInbox/SearchThread.pm
delete mode 100644 lib/PublicInbox/Thread.pm
diff --git a/INSTALL b/INSTALL
index 5851892..3a2f840 100644
--- a/INSTALL
+++ b/INSTALL
@@ -37,7 +37,6 @@ Optional components:
Optional Perl modules:
- Plack[1] libplack-perl
- - Mail::Thread (2.5+)[1] libmail-thread-perl
- URI::Escape[1] liburi-perl
- Search::Xapian[2][3] libsearch-xapian-perl
- IO::Compress::Gzip[3] perl-modules (or libio-compress-perl)
diff --git a/MANIFEST b/MANIFEST
index c39fa26..bcc4121 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -78,11 +78,11 @@ lib/PublicInbox/SaPlugin/ListMirror.pm
lib/PublicInbox/Search.pm
lib/PublicInbox/SearchIdx.pm
lib/PublicInbox/SearchMsg.pm
+lib/PublicInbox/SearchThread.pm
lib/PublicInbox/SearchView.pm
lib/PublicInbox/Spamcheck/Spamc.pm
lib/PublicInbox/Spawn.pm
lib/PublicInbox/SpawnPP.pm
-lib/PublicInbox/Thread.pm
lib/PublicInbox/Unsubscribe.pm
lib/PublicInbox/View.pm
lib/PublicInbox/WWW.pm
diff --git a/Makefile.PL b/Makefile.PL
index 4a91103..0bac7c9 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -22,7 +22,6 @@ WriteMakefile(
'Email::MIME::ContentType' => 0,
'Email::Simple' => 0,
'Encode::MIME::Header' => 0,
- 'Mail::Thread' => '2.5', # 2.5+ needed for Email::Simple compat
'Plack' => 0,
'URI::Escape' => 0,
# We have more test dependencies, but do not force
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 23aef9f..4aac028 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -4,8 +4,8 @@
#
# Indexes mail with Xapian and our (SQLite-based) ::Msgmap for use
# with the web and NNTP interfaces. This index maintains thread
-# relationships for use by Mail::Thread. This writes to the search
-# index.
+# relationships for use by PublicInbox::SearchThread.
+# This writes to the search index.
package PublicInbox::SearchIdx;
use strict;
use warnings;
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
new file mode 100644
index 0000000..41fe859
--- /dev/null
+++ b/lib/PublicInbox/SearchThread.pm
@@ -0,0 +1,323 @@
+# This library is free software; you can redistribute it and/or modify
+# it under the same terms as Perl itself.
+#
+# This license differs from the rest of public-inbox
+#
+# Our own jwz-style threading class based on Mail::Thread from CPAN.
+# Mail::Thread is unmaintained and available on some distros.
+# We also do not want pruning or subject grouping, since we want
+# to encourage strict threading and hopefully encourage people
+# to use proper In-Reply-To.
+#
+# This includes fixes from several open bugs for Mail::Thread
+#
+# Avoid circular references
+# - https://rt.cpan.org/Public/Bug/Display.html?id=22817
+#
+# And avoid recursion in recurse_down:
+# - https://rt.cpan.org/Ticket/Display.html?id=116727
+# - http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=833479
+package PublicInbox::SearchThread;
+use strict;
+use warnings;
+use Email::Abstract;
+
+sub new {
+ return bless {
+ messages => $_[1],
+ id_table => {},
+ rootset => []
+ }, $_[0];
+}
+
+sub _get_hdr {
+ my ($class, $msg, $hdr) = @_;
+ Email::Abstract->get_header($msg, $hdr) || '';
+}
+
+sub _uniq {
+ my %seen;
+ return grep { !$seen{$_}++ } @_;
+}
+
+sub _references {
+ my $class = shift;
+ my $msg = shift;
+ my @references = ($class->_get_hdr($msg, "References") =~ /<([^>]+)>/g);
+ my $foo = $class->_get_hdr($msg, "In-Reply-To");
+ chomp $foo;
+ $foo =~ s/.*?<([^>]+)>.*/$1/;
+ push @references, $foo
+ if $foo =~ /^\S+\@\S+$/ && (!@references || $references[-1] ne $foo);
+ return _uniq(@references);
+}
+
+sub _msgid {
+ my ($class, $msg) = @_;
+ my $id = $class->_get_hdr($msg, "Message-ID");
+ die "attempt to thread message with no id" unless $id;
+ chomp $id;
+ $id =~ s/^<([^>]+)>.*/$1/; # We expect this not to have <>s
+ return $id;
+}
+
+sub rootset { @{$_[0]{rootset}} }
+
+sub thread {
+ my $self = shift;
+ $self->_setup();
+ $self->{rootset} = [ grep { !$_->parent } values %{$self->{id_table}} ];
+ $self->_finish();
+}
+
+sub _finish {
+ my $self = shift;
+ delete $self->{id_table};
+ delete $self->{seen};
+}
+
+sub _get_cont_for_id {
+ my $self = shift;
+ my $id = shift;
+ $self->{id_table}{$id} ||= $self->_container_class->new($id);
+}
+
+sub _container_class { 'PublicInbox::SearchThread::Container' }
+
+sub _setup {
+ my ($self) = @_;
+
+ _add_message($self, $_) foreach @{$self->{messages}};
+}
+
+sub _add_message ($$) {
+ my ($self, $message) = @_;
+
+ # A. if id_table...
+ my $this_container = $self->_get_cont_for_id($self->_msgid($message));
+ $this_container->message($message);
+
+ # B. For each element in the message's References field:
+ my @refs = $self->_references($message);
+
+ my $prev;
+ for my $ref (@refs) {
+ # Find a Container object for the given Message-ID
+ my $container = $self->_get_cont_for_id($ref);
+
+ # Link the References field's Containers together in the
+ # order implied by the References header
+ # * If they are already linked don't change the existing links
+ # * Do not add a link if adding that link would introduce
+ # a loop...
+
+ if ($prev &&
+ !$container->parent && # already linked
+ !$container->has_descendent($prev) # would loop
+ ) {
+ $prev->add_child($container);
+ }
+ $prev = $container;
+ }
+
+ # C. Set the parent of this message to be the last element in
+ # References...
+ if ($prev &&
+ !$this_container->has_descendent($prev) # would loop
+ ) {
+ $prev->add_child($this_container)
+ }
+}
+
+sub order {
+ my $self = shift;
+ my $ordersub = shift;
+
+ # make a fake root
+ my $root = $self->_container_class->new( 'fakeroot' );
+ $root->add_child( $_ ) for @{ $self->{rootset} };
+
+ # sort it
+ $root->order_children( $ordersub );
+
+ # and untangle it
+ my @kids = $root->children;
+ $self->{rootset} = \@kids;
+ $root->remove_child($_) for @kids;
+}
+
+package PublicInbox::SearchThread::Container;
+use Carp qw(croak);
+use Scalar::Util qw(weaken);
+
+sub new { my $self = shift; bless { id => shift }, $self; }
+
+sub message { $_[0]->{message} = $_[1] if @_ == 2; $_[0]->{message} }
+sub parent { @_ == 2 ? weaken($_[0]->{parent} = $_[1]) : $_[0]->{parent} }
+sub child { $_[0]->{child} = $_[1] if @_ == 2; $_[0]->{child} }
+sub next { $_[0]->{next} = $_[1] if @_ == 2; $_[0]->{next} }
+sub messageid { $_[0]->{id} }
+
+sub add_child {
+ my ($self, $child) = @_;
+ croak "Cowardly refusing to become my own parent: $self"
+ if $self == $child;
+
+ if (grep { $_ == $child } $self->children) {
+ # All is potentially correct with the world
+ $child->parent($self);
+ return;
+ }
+
+ $child->parent->remove_child($child) if $child->parent;
+
+ $child->next($self->child);
+ $self->child($child);
+ $child->parent($self);
+}
+
+sub remove_child {
+ my ($self, $child) = @_;
+ return unless $self->child;
+ if ($self->child == $child) { # First one's easy.
+ $self->child($child->next);
+ $child->next(undef);
+ $child->parent(undef);
+ return;
+ }
+
+ my $x = $self->child;
+ my $prev = $x;
+ while ($x = $x->next) {
+ if ($x == $child) {
+ $prev->next($x->next); # Unlink x
+ $x->next(undef);
+ $x->parent(undef); # Deparent it
+ return;
+ }
+ $prev = $x;
+ }
+ # oddly, we can get here
+ $child->next(undef);
+ $child->parent(undef);
+}
+
+sub has_descendent {
+ my $self = shift;
+ my $child = shift;
+ die "Assertion failed: $child" unless eval {$child};
+ my $there = 0;
+ $self->recurse_down(sub { $there = 1 if $_[0] == $child });
+
+ return $there;
+}
+
+sub children {
+ my $self = shift;
+ my @children;
+ my $visitor = $self->child;
+ while ($visitor) {
+ push @children, $visitor;
+ $visitor = $visitor->next
+ }
+ return @children;
+}
+
+sub set_children {
+ my $self = shift;
+ my $walk = $self->child( shift );
+ while (@_) { $walk = $walk->next( shift ) }
+ $walk->next(undef) if $walk;
+}
+
+sub order_children {
+ my $self = shift;
+ my $ordersub = shift;
+
+ return unless $ordersub;
+
+ my $sub = sub {
+ my $cont = shift;
+ my @children = $cont->children;
+ return if @children < 2;
+ $cont->set_children( $ordersub->( @children ) );
+ };
+ $self->iterate_down( undef, $sub );
+ undef $sub;
+}
+
+# non-recursive version of recurse_down to avoid stack depth warnings
+sub recurse_down {
+ my ($self, $callback) = @_;
+ my %seen;
+ my @q = ($self);
+ while (my $cont = shift @q) {
+ $seen{$cont}++;
+ $callback->($cont);
+
+ if (my $next = $cont->next) {
+ if ($seen{$next}) {
+ $cont->next(undef);
+ } else {
+ push @q, $next;
+ }
+ }
+ if (my $child = $cont->child) {
+ if ($seen{$child}) {
+ $cont->child(undef);
+ } else {
+ push @q, $child;
+ }
+ }
+ }
+}
+
+sub iterate_down {
+ my $self = shift;
+ my ($before, $after) = @_;
+
+ my %seen;
+ my $walk = $self;
+ my $depth = 0;
+ my @visited;
+ while ($walk) {
+ push @visited, [ $walk, $depth ];
+ $before->($walk, $depth) if $before;
+
+ # spot/break loops
+ $seen{$walk}++;
+
+ my $child = $walk->child;
+ if ($child && $seen{$child}) {
+ $walk->child(undef);
+ $child = undef;
+ }
+
+ my $next = $walk->next;
+ if ($next && $seen{$next}) {
+ $walk->next(undef);
+ $next = undef;
+ }
+
+ # go down, or across
+ if ($child) {
+ $next = $child;
+ ++$depth;
+ }
+
+ # no next? look up
+ if (!$next) {
+ my $up = $walk;
+ while ($up && !$next) {
+ $up = $up->parent;
+ --$depth;
+ $next = $up->next if $up;
+ }
+ }
+ $walk = $next;
+ }
+ return unless $after;
+ while (@visited) { $after->(@{ pop @visited }) }
+}
+
+1;
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 4f0811a..da31109 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -11,7 +11,7 @@ use PublicInbox::View;
use PublicInbox::MID qw(mid2path mid_mime mid_clean mid_escape);
use Email::MIME;
require PublicInbox::Git;
-require PublicInbox::Thread;
+require PublicInbox::SearchThread;
our $LIM = 50;
sub noop {}
@@ -152,7 +152,7 @@ sub mset_thread {
$m;
} ($mset->items);
- my $th = PublicInbox::Thread->new(@m);
+ my $th = PublicInbox::SearchThread->new(\@m);
$th->thread;
if ($q->{r}) { # order by relevance
$th->order(sub {
diff --git a/lib/PublicInbox/Thread.pm b/lib/PublicInbox/Thread.pm
deleted file mode 100644
index 8af9461..0000000
--- a/lib/PublicInbox/Thread.pm
+++ /dev/null
@@ -1,86 +0,0 @@
-# subclass Mail::Thread and use this to workaround a memory leak
-# Based on the patch in: https://rt.cpan.org/Public/Bug/Display.html?id=22817
-#
-# Additionally, workaround for a bug where $walk->topmost returns undef:
-# - http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=795913
-# - https://rt.cpan.org/Ticket/Display.html?id=106498
-#
-# And avoid recursion in recurse_down:
-# - https://rt.cpan.org/Ticket/Display.html?id=116727
-# - http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=833479
-#
-# License differs from the rest of public-inbox (but is compatible):
-# This library is free software; you can redistribute it and/or modify
-# it under the same terms as Perl itself.
-package PublicInbox::Thread;
-use strict;
-use warnings;
-use base qw(Mail::Thread);
-# WARNING! both these Mail::Thread knobs were found by inspecting
-# the Mail::Thread 2.55 source code, and we have some monkey patches
-# in PublicInbox::Thread to fix memory leaks. Since Mail::Thread
-# appears unmaintained, I suppose it's safe to depend on these
-# variables for now:
-{
- no warnings 'once';
- # we want strict threads to expose (and hopefully discourage)
- # use of broken email clients
- $Mail::Thread::nosubject = 1;
- # Keep ghosts with only a single direct child,
- # don't hide that there may be missing messages.
- $Mail::Thread::noprune = 1;
-}
-
-if ($Mail::Thread::VERSION <= 2.55) {
- eval q(sub _container_class { 'PublicInbox::Thread::Container' });
-}
-
-package PublicInbox::Thread::Container;
-use strict;
-use warnings;
-use base qw(Mail::Thread::Container);
-use Scalar::Util qw(weaken);
-sub parent { @_ == 2 ? weaken($_[0]->{parent} = $_[1]) : $_[0]->{parent} }
-
-sub topmost {
- $_[0]->SUPER::topmost || PublicInbox::Thread::CPANRTBug106498->new;
-}
-
-# non-recursive version of recurse_down to avoid stack depth warnings
-sub recurse_down {
- my ($self, $callback) = @_;
- my %seen;
- my @q = ($self);
- while (my $cont = shift @q) {
- $seen{$cont}++;
- $callback->($cont);
-
- if (my $next = $cont->next) {
- if ($seen{$next}) {
- $cont->next(undef);
- } else {
- push @q, $next;
- }
- }
- if (my $child = $cont->child) {
- if ($seen{$child}) {
- $cont->child(undef);
- } else {
- push @q, $child;
- }
- }
- }
-}
-
-# ref:
-# - http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=795913
-# - https://rt.cpan.org/Ticket/Display.html?id=106498
-package PublicInbox::Thread::CPANRTBug106498;
-use strict;
-use warnings;
-
-sub new { bless {}, $_[0] }
-
-sub simple_subject {}
-
-1;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index a3b2681..9f1bf46 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -749,8 +749,8 @@ sub msg_timestamp {
sub thread_results {
my ($msgs) = @_;
- require PublicInbox::Thread;
- my $th = PublicInbox::Thread->new(@$msgs);
+ require PublicInbox::SearchThread;
+ my $th = PublicInbox::SearchThread->new($msgs);
$th->thread;
$th->order(*sort_ts);
$th
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 4d599fc..11fc92e 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -112,7 +112,7 @@ sub call {
sub preload {
require PublicInbox::Feed;
require PublicInbox::View;
- require PublicInbox::Thread;
+ require PublicInbox::SearchThread;
require Email::MIME;
require Digest::SHA;
require POSIX;
diff --git a/t/plack.t b/t/plack.t
index 608afb9..1d62458 100644
--- a/t/plack.t
+++ b/t/plack.t
@@ -11,8 +11,7 @@ my $pi_config = "$tmpdir/config";
my $maindir = "$tmpdir/main.git";
my $addr = 'test-public@example.com';
my $cfgpfx = "publicinbox.test";
-my @mods = qw(HTTP::Request::Common Plack::Test
- Mail::Thread URI::Escape);
+my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape);
foreach my $mod (@mods) {
eval "require $mod";
plan skip_all => "$mod missing for plack.t" if $@;
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH 03/17] thread: pass array refs instead of entire arrays
2016-10-05 23:57 [PATCH 0/17] remove Mail::Thread dependency Eric Wong
2016-10-05 23:57 ` [PATCH 01/17] view: remove "subject dummy" references Eric Wong
2016-10-05 23:57 ` [PATCH 02/17] thread: remove Mail::Thread dependency Eric Wong
@ 2016-10-05 23:57 ` Eric Wong
2016-10-05 23:57 ` [PATCH 04/17] thread: remove accessor usage in internals Eric Wong
` (14 subsequent siblings)
17 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2016-10-05 23:57 UTC (permalink / raw)
To: meta
Copying large arrays is expensive, so avoid it.
This reduces /$INBOX/ time by around 1%.
---
lib/PublicInbox/SearchThread.pm | 25 +++++++++++++------------
lib/PublicInbox/SearchView.pm | 4 ++--
lib/PublicInbox/View.pm | 4 ++--
3 files changed, 17 insertions(+), 16 deletions(-)
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index 41fe859..e086132 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -141,9 +141,9 @@ sub order {
$root->order_children( $ordersub );
# and untangle it
- my @kids = $root->children;
- $self->{rootset} = \@kids;
- $root->remove_child($_) for @kids;
+ my $kids = $root->children;
+ $self->{rootset} = $kids;
+ $root->remove_child($_) for @$kids;
}
package PublicInbox::SearchThread::Container;
@@ -163,7 +163,7 @@ sub add_child {
croak "Cowardly refusing to become my own parent: $self"
if $self == $child;
- if (grep { $_ == $child } $self->children) {
+ if (grep { $_ == $child } @{$self->children}) {
# All is potentially correct with the world
$child->parent($self);
return;
@@ -220,14 +220,15 @@ sub children {
push @children, $visitor;
$visitor = $visitor->next
}
- return @children;
+ \@children;
}
sub set_children {
- my $self = shift;
- my $walk = $self->child( shift );
- while (@_) { $walk = $walk->next( shift ) }
- $walk->next(undef) if $walk;
+ my ($self, $children) = @_;
+ my $walk = $self->{child} = shift @$children;
+ do {
+ $walk = $walk->{next} = shift @$children;
+ } while ($walk);
}
sub order_children {
@@ -238,9 +239,9 @@ sub order_children {
my $sub = sub {
my $cont = shift;
- my @children = $cont->children;
- return if @children < 2;
- $cont->set_children( $ordersub->( @children ) );
+ my $children = $cont->children;
+ return if @$children < 2;
+ $cont->set_children( $ordersub->( $children ) );
};
$self->iterate_down( undef, $sub );
undef $sub;
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index da31109..0d54c3d 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -156,10 +156,10 @@ sub mset_thread {
$th->thread;
if ($q->{r}) { # order by relevance
$th->order(sub {
- sort { (eval { $pct{$b->topmost->messageid} } || 0)
+ [ sort { (eval { $pct{$b->topmost->messageid} } || 0)
<=>
(eval { $pct{$a->topmost->messageid} } || 0)
- } @_;
+ } @{$_[0]} ];
});
} else { # order by time (default for threaded view)
$th->order(*PublicInbox::View::sort_ts);
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 9f1bf46..e90efda 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -856,10 +856,10 @@ sub skel_dump {
}
sub sort_ts {
- sort {
+ [ sort {
(eval { $a->topmost->message->header('X-PI-TS') } || 0) <=>
(eval { $b->topmost->message->header('X-PI-TS') } || 0)
- } @_;
+ } @{$_[0]} ];
}
sub _tryload_ghost ($$) {
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH 04/17] thread: remove accessor usage in internals
2016-10-05 23:57 [PATCH 0/17] remove Mail::Thread dependency Eric Wong
` (2 preceding siblings ...)
2016-10-05 23:57 ` [PATCH 03/17] thread: pass array refs instead of entire arrays Eric Wong
@ 2016-10-05 23:57 ` Eric Wong
2016-10-05 23:57 ` [PATCH 05/17] inbox: deal with ghost smsg Eric Wong
` (13 subsequent siblings)
17 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2016-10-05 23:57 UTC (permalink / raw)
To: meta
This improves top-level index generation performance by 3-4%.
---
lib/PublicInbox/SearchThread.pm | 72 +++++++++++++++++++----------------------
1 file changed, 34 insertions(+), 38 deletions(-)
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index e086132..53fec97 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -66,7 +66,8 @@ sub rootset { @{$_[0]{rootset}} }
sub thread {
my $self = shift;
$self->_setup();
- $self->{rootset} = [ grep { !$_->parent } values %{$self->{id_table}} ];
+ $self->{rootset} = [
+ grep { !$_->{parent} } values %{$self->{id_table}} ];
$self->_finish();
}
@@ -95,7 +96,7 @@ sub _add_message ($$) {
# A. if id_table...
my $this_container = $self->_get_cont_for_id($self->_msgid($message));
- $this_container->message($message);
+ $this_container->{message} = $message;
# B. For each element in the message's References field:
my @refs = $self->_references($message);
@@ -112,7 +113,7 @@ sub _add_message ($$) {
# a loop...
if ($prev &&
- !$container->parent && # already linked
+ !$container->{parent} && # already linked
!$container->has_descendent($prev) # would loop
) {
$prev->add_child($container);
@@ -152,10 +153,9 @@ use Scalar::Util qw(weaken);
sub new { my $self = shift; bless { id => shift }, $self; }
-sub message { $_[0]->{message} = $_[1] if @_ == 2; $_[0]->{message} }
-sub parent { @_ == 2 ? weaken($_[0]->{parent} = $_[1]) : $_[0]->{parent} }
-sub child { $_[0]->{child} = $_[1] if @_ == 2; $_[0]->{child} }
-sub next { $_[0]->{next} = $_[1] if @_ == 2; $_[0]->{next} }
+sub message { $_[0]->{message} }
+sub child { $_[0]->{child} }
+sub next { $_[0]->{next} }
sub messageid { $_[0]->{id} }
sub add_child {
@@ -165,41 +165,39 @@ sub add_child {
if (grep { $_ == $child } @{$self->children}) {
# All is potentially correct with the world
- $child->parent($self);
+ weaken($child->{parent} = $self);
return;
}
- $child->parent->remove_child($child) if $child->parent;
+ my $parent = $child->{parent};
+ remove_child($parent, $child) if $parent;
- $child->next($self->child);
- $self->child($child);
- $child->parent($self);
+ $child->{next} = $self->{child};
+ $self->{child} = $child;
+ weaken($child->{parent} = $self);
}
sub remove_child {
my ($self, $child) = @_;
- return unless $self->child;
- if ($self->child == $child) { # First one's easy.
- $self->child($child->next);
- $child->next(undef);
- $child->parent(undef);
+
+ my $x = $self->{child} or return;
+ if ($x == $child) { # First one's easy.
+ $self->{child} = $child->{next};
+ $child->{parent} = $child->{next} = undef;
return;
}
- my $x = $self->child;
my $prev = $x;
- while ($x = $x->next) {
+ while ($x = $x->{next}) {
if ($x == $child) {
- $prev->next($x->next); # Unlink x
- $x->next(undef);
- $x->parent(undef); # Deparent it
+ $prev->{next} = $x->{next}; # Unlink x
+ $x->{next} = $x->{parent} = undef; # Deparent it
return;
}
$prev = $x;
}
# oddly, we can get here
- $child->next(undef);
- $child->parent(undef);
+ $child->{next} = $child->{parent} = undef;
}
sub has_descendent {
@@ -215,10 +213,10 @@ sub has_descendent {
sub children {
my $self = shift;
my @children;
- my $visitor = $self->child;
+ my $visitor = $self->{child};
while ($visitor) {
push @children, $visitor;
- $visitor = $visitor->next
+ $visitor = $visitor->{next};
}
\@children;
}
@@ -256,16 +254,16 @@ sub recurse_down {
$seen{$cont}++;
$callback->($cont);
- if (my $next = $cont->next) {
+ if (my $next = $cont->{next}) {
if ($seen{$next}) {
- $cont->next(undef);
+ $cont->{next} = undef;
} else {
push @q, $next;
}
}
- if (my $child = $cont->child) {
+ if (my $child = $cont->{child}) {
if ($seen{$child}) {
- $cont->child(undef);
+ $cont->{child} = undef;
} else {
push @q, $child;
}
@@ -288,16 +286,14 @@ sub iterate_down {
# spot/break loops
$seen{$walk}++;
- my $child = $walk->child;
+ my $child = $walk->{child};
if ($child && $seen{$child}) {
- $walk->child(undef);
- $child = undef;
+ $walk->{child} = $child = undef;
}
- my $next = $walk->next;
+ my $next = $walk->{next};
if ($next && $seen{$next}) {
- $walk->next(undef);
- $next = undef;
+ $walk->{next} = $next = undef;
}
# go down, or across
@@ -310,9 +306,9 @@ sub iterate_down {
if (!$next) {
my $up = $walk;
while ($up && !$next) {
- $up = $up->parent;
+ $up = $up->{parent};
--$depth;
- $next = $up->next if $up;
+ $next = $up->{next} if $up;
}
}
$walk = $next;
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH 05/17] inbox: deal with ghost smsg
2016-10-05 23:57 [PATCH 0/17] remove Mail::Thread dependency Eric Wong
` (3 preceding siblings ...)
2016-10-05 23:57 ` [PATCH 04/17] thread: remove accessor usage in internals Eric Wong
@ 2016-10-05 23:57 ` Eric Wong
2016-10-05 23:57 ` [PATCH 06/17] thread: remove Email::Abstract wrapping Eric Wong
` (12 subsequent siblings)
17 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2016-10-05 23:57 UTC (permalink / raw)
To: meta
smsg will be undef for ghost messages in a subsequent commit
---
lib/PublicInbox/Inbox.pm | 2 ++
1 file changed, 2 insertions(+)
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index 414973c..8c63908 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -211,6 +211,8 @@ sub msg_by_path ($$;$) {
sub msg_by_smsg ($$;$) {
my ($self, $smsg, $ref) = @_;
+ return unless defined $smsg; # ghost
+
# backwards compat to fallback to msg_by_mid
# TODO: remove if we bump SCHEMA_VERSION in Search.pm:
defined(my $blob = $smsg->blob) or return msg_by_mid($self, $smsg->mid);
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH 06/17] thread: remove Email::Abstract wrapping
2016-10-05 23:57 [PATCH 0/17] remove Mail::Thread dependency Eric Wong
` (4 preceding siblings ...)
2016-10-05 23:57 ` [PATCH 05/17] inbox: deal with ghost smsg Eric Wong
@ 2016-10-05 23:57 ` Eric Wong
2016-10-05 23:57 ` [PATCH 07/17] thread: remove rootset accessor method Eric Wong
` (11 subsequent siblings)
17 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2016-10-05 23:57 UTC (permalink / raw)
To: meta
This roughly doubles performance due to the reduction in
object creation and abstraction layers.
---
lib/PublicInbox/SearchMsg.pm | 29 ------------
lib/PublicInbox/SearchThread.pm | 99 ++++++++++++-----------------------------
lib/PublicInbox/SearchView.pm | 8 ++--
lib/PublicInbox/View.pm | 90 +++++++++++++++++--------------------
t/search.t | 7 +--
5 files changed, 76 insertions(+), 157 deletions(-)
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index 9d873c4..9dcc1e6 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -144,35 +144,6 @@ sub ensure_metadata {
}
}
-# for threading only
-sub mini_mime {
- my ($self) = @_;
- $self->ensure_metadata;
- my @h = (
- 'Subject' => $self->subject,
- 'X-PI-From' => $self->from_name,
- # prevent Email::Simple::Creator from running,
- # this header is useless for threading as we use X-PI-TS
- # for sorting and display:
- 'Date' => EPOCH_822,
- 'Message-ID' => "<$self->{mid}>",
- 'X-PI-TS' => $self->ts,
- );
- if (my $refs = $self->{references}) {
- push @h, References => $refs;
- }
- my $mime = Email::MIME->create(header => \@h);
- my $h = $mime->header_obj;
-
- # set these headers manually since Encode::encode('MIME-Q', ...)
- # will add spaces to long values when using header_str above.
-
- # drop useless headers Email::MIME set for us
- $h->header_set('Date');
- $h->header_set('MIME-Version');
- $mime;
-}
-
sub mid ($;$) {
my ($self, $mid) = @_;
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index 53fec97..d39e9b6 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -20,7 +20,6 @@
package PublicInbox::SearchThread;
use strict;
use warnings;
-use Email::Abstract;
sub new {
return bless {
@@ -30,37 +29,6 @@ sub new {
}, $_[0];
}
-sub _get_hdr {
- my ($class, $msg, $hdr) = @_;
- Email::Abstract->get_header($msg, $hdr) || '';
-}
-
-sub _uniq {
- my %seen;
- return grep { !$seen{$_}++ } @_;
-}
-
-sub _references {
- my $class = shift;
- my $msg = shift;
- my @references = ($class->_get_hdr($msg, "References") =~ /<([^>]+)>/g);
- my $foo = $class->_get_hdr($msg, "In-Reply-To");
- chomp $foo;
- $foo =~ s/.*?<([^>]+)>.*/$1/;
- push @references, $foo
- if $foo =~ /^\S+\@\S+$/ && (!@references || $references[-1] ne $foo);
- return _uniq(@references);
-}
-
-sub _msgid {
- my ($class, $msg) = @_;
- my $id = $class->_get_hdr($msg, "Message-ID");
- die "attempt to thread message with no id" unless $id;
- chomp $id;
- $id =~ s/^<([^>]+)>.*/$1/; # We expect this not to have <>s
- return $id;
-}
-
sub rootset { @{$_[0]{rootset}} }
sub thread {
@@ -77,14 +45,11 @@ sub _finish {
delete $self->{seen};
}
-sub _get_cont_for_id {
- my $self = shift;
- my $id = shift;
- $self->{id_table}{$id} ||= $self->_container_class->new($id);
+sub _get_cont_for_id ($$) {
+ my ($self, $mid) = @_;
+ $self->{id_table}{$mid} ||= PublicInbox::SearchThread::Msg->new($mid);
}
-sub _container_class { 'PublicInbox::SearchThread::Container' }
-
sub _setup {
my ($self) = @_;
@@ -92,41 +57,40 @@ sub _setup {
}
sub _add_message ($$) {
- my ($self, $message) = @_;
+ my ($self, $smsg) = @_;
# A. if id_table...
- my $this_container = $self->_get_cont_for_id($self->_msgid($message));
- $this_container->{message} = $message;
+ my $this = _get_cont_for_id($self, $smsg->{mid});
+ $this->{smsg} = $smsg;
# B. For each element in the message's References field:
- my @refs = $self->_references($message);
-
my $prev;
- for my $ref (@refs) {
- # Find a Container object for the given Message-ID
- my $container = $self->_get_cont_for_id($ref);
-
- # Link the References field's Containers together in the
- # order implied by the References header
- # * If they are already linked don't change the existing links
- # * Do not add a link if adding that link would introduce
- # a loop...
-
- if ($prev &&
- !$container->{parent} && # already linked
- !$container->has_descendent($prev) # would loop
- ) {
- $prev->add_child($container);
+ if (defined(my $refs = $smsg->{references})) {
+ foreach my $ref ($refs =~ m/<([^>]+)>/g) {
+ # Find a Container object for the given Message-ID
+ my $cont = _get_cont_for_id($self, $ref);
+
+ # Link the References field's Containers together in
+ # the order implied by the References header
+ #
+ # * If they are already linked don't change the
+ # existing links
+ # * Do not add a link if adding that link would
+ # introduce a loop...
+ if ($prev &&
+ !$cont->{parent} && # already linked
+ !$cont->has_descendent($prev) # would loop
+ ) {
+ $prev->add_child($cont);
+ }
+ $prev = $cont;
}
- $prev = $container;
}
# C. Set the parent of this message to be the last element in
# References...
- if ($prev &&
- !$this_container->has_descendent($prev) # would loop
- ) {
- $prev->add_child($this_container)
+ if ($prev && !$this->has_descendent($prev)) { # would loop
+ $prev->add_child($this)
}
}
@@ -135,7 +99,7 @@ sub order {
my $ordersub = shift;
# make a fake root
- my $root = $self->_container_class->new( 'fakeroot' );
+ my $root = _get_cont_for_id($self, 'fakeroot');
$root->add_child( $_ ) for @{ $self->{rootset} };
# sort it
@@ -147,17 +111,12 @@ sub order {
$root->remove_child($_) for @$kids;
}
-package PublicInbox::SearchThread::Container;
+package PublicInbox::SearchThread::Msg;
use Carp qw(croak);
use Scalar::Util qw(weaken);
sub new { my $self = shift; bless { id => shift }, $self; }
-sub message { $_[0]->{message} }
-sub child { $_[0]->{child} }
-sub next { $_[0]->{next} }
-sub messageid { $_[0]->{id} }
-
sub add_child {
my ($self, $child) = @_;
croak "Cowardly refusing to become my own parent: $self"
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 0d54c3d..ebeb41f 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -148,7 +148,6 @@ sub mset_thread {
my $i = $_;
my $m = PublicInbox::SearchMsg->load_doc($i->get_document);
$pct{$m->mid} = $i->get_percent;
- $m = $m->mini_mime;
$m;
} ($mset->items);
@@ -156,9 +155,9 @@ sub mset_thread {
$th->thread;
if ($q->{r}) { # order by relevance
$th->order(sub {
- [ sort { (eval { $pct{$b->topmost->messageid} } || 0)
+ [ sort { (eval { $pct{$b->topmost->{id}} } || 0)
<=>
- (eval { $pct{$a->topmost->messageid} } || 0)
+ (eval { $pct{$a->topmost->{id}} } || 0)
} @{$_[0]} ];
});
} else { # order by time (default for threaded view)
@@ -185,8 +184,7 @@ sub mset_thread {
sub {
return unless $msgs;
while ($mime = shift @$msgs) {
- my $mid = mid_clean(mid_mime($mime));
- $mime = $inbox->msg_by_mid($mid) and last;
+ $mime = $inbox->msg_by_smsg($mime) and last;
}
if ($mime) {
$mime = Email::MIME->new($mime);
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index e90efda..748e391 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -214,12 +214,12 @@ sub _th_index_lite {
$rv .= $pad . $irt_map->[1];
if ($idx > 0) {
my $prev = $siblings->[$idx - 1];
- my $pmid = $prev->messageid;
+ my $pmid = $prev->{id};
if ($idx > 2) {
my $s = ($idx - 1). ' preceding siblings ...';
$rv .= pad_link($pmid, $level, $s);
} elsif ($idx == 2) {
- my $ppmid = $siblings->[0]->messageid;
+ my $ppmid = $siblings->[0]->{id};
$rv .= $pad . $mapping->{$ppmid}->[1];
}
$rv .= $pad . $mapping->{$pmid}->[1];
@@ -233,25 +233,25 @@ sub _th_index_lite {
$this =~ s!<a\nhref=[^>]+>([^<]+)</a>!$1!s; # no point linking to self
$rv .= "<b>@ $this";
my $node = $map->[2];
- if (my $child = $node->child) {
- my $cmid = $child->messageid;
+ if (my $child = $node->{child}) {
+ my $cmid = $child->{id};
$rv .= $pad . $mapping->{$cmid}->[1];
if ($nr_c > 2) {
my $s = ($nr_c - 1). ' more replies';
$rv .= pad_link($cmid, $level + 1, $s);
- } elsif (my $cn = $child->next) {
- $rv .= $pad . $mapping->{$cn->messageid}->[1];
+ } elsif (my $cn = $child->{next}) {
+ $rv .= $pad . $mapping->{$cn->{id}}->[1];
}
}
- if (my $next = $node->next) {
- my $nmid = $next->messageid;
+ if (my $next = $node->{next}) {
+ my $nmid = $next->{id};
$rv .= $pad . $mapping->{$nmid}->[1];
my $nnext = $nr_s - $idx;
if ($nnext > 2) {
my $s = ($nnext - 1).' subsequent siblings';
$rv .= pad_link($nmid, $level, $s);
- } elsif (my $nn = $next->next) {
- $rv .= $pad . $mapping->{$nn->messageid}->[1];
+ } elsif (my $nn = $next->{next}) {
+ $rv .= $pad . $mapping->{$nn->{id}}->[1];
}
}
$rv .= $pad ."<a\nhref=#r$id>$s_s, $s_c; $ctx->{s_nr}</a>\n";
@@ -264,7 +264,7 @@ sub walk_thread {
my $level = shift @q;
my $node = shift @q or next;
$cb->($ctx, $level, $node);
- unshift @q, $level+1, $node->child, $level, $node->next;
+ unshift @q, $level+1, $node->{child}, $level, $node->{next};
}
}
@@ -272,12 +272,12 @@ sub pre_thread {
my ($ctx, $level, $node) = @_;
my $mapping = $ctx->{mapping};
my $idx = -1;
- if (my $parent = $node->parent) {
- my $m = $mapping->{$parent->messageid}->[0];
+ if (my $parent = $node->{parent}) {
+ my $m = $mapping->{$parent->{id}}->[0];
$idx = scalar @$m;
push @$m, $node;
}
- $mapping->{$node->messageid} = [ [], '', $node, $idx, $level ];
+ $mapping->{$node->{id}} = [ [], '', $node, $idx, $level ];
skel_dump($ctx, $level, $node);
}
@@ -296,8 +296,8 @@ sub stream_thread ($$) {
while (@q) {
$level = shift @q;
my $node = shift @q or next;
- unshift @q, $level+1, $node->child, $level, $node->next;
- $mime = $inbox->msg_by_mid($node->messageid) and last;
+ unshift @q, $level+1, $node->{child}, $level, $node->{next};
+ $mime = $inbox->msg_by_smsg($node->{smsg}) and last;
}
return missing_thread($ctx) unless $mime;
@@ -309,13 +309,14 @@ sub stream_thread ($$) {
while (@q) {
$level = shift @q;
my $node = shift @q or next;
- unshift @q, $level+1, $node->child, $level, $node->next;
- my $mid = $node->messageid;
- if ($mime = $inbox->msg_by_mid($mid)) {
+ unshift @q, $level+1, $node->{child},
+ $level, $node->{next};
+ my $mid = $node->{id};
+ if ($mime = $inbox->msg_by_smsg($node->{smsg})) {
$mime = Email::MIME->new($mime);
return thread_index_entry($ctx, $level, $mime);
} else {
- return ghost_index_entry($ctx, $level, $mid);
+ return ghost_index_entry($ctx, $level, $node);
}
}
my $ret = join('', thread_adj_level($ctx, 0));
@@ -355,11 +356,11 @@ sub thread_html {
$skel .= '</pre>';
return stream_thread($th, $ctx) unless $ctx->{flat};
- # flat display: lazy load the full message from mini_mime:
+ # flat display: lazy load the full message from smsg
my $inbox = $ctx->{-inbox};
my $mime;
while ($mime = shift @$msgs) {
- $mime = $inbox->msg_by_mid(mid_clean(mid_mime($mime))) and last;
+ $mime = $inbox->msg_by_smsg($mime) and last;
}
return missing_thread($ctx) unless $mime;
$mime = Email::MIME->new($mime);
@@ -369,8 +370,7 @@ sub thread_html {
PublicInbox::WwwStream->response($ctx, 200, sub {
return unless $msgs;
while ($mime = shift @$msgs) {
- $mid = mid_clean(mid_mime($mime));
- $mime = $inbox->msg_by_mid($mid) and last;
+ $mime = $inbox->msg_by_smsg($mime) and last;
}
if ($mime) {
$mime = Email::MIME->new($mime);
@@ -738,7 +738,7 @@ sub indent_for {
sub load_results {
my ($sres) = @_;
- [ map { $_->mini_mime } @{delete $sres->{msgs}} ];
+ [ map { $_->ensure_metadata; $_ } @{delete $sres->{msgs}} ];
}
sub msg_timestamp {
@@ -771,13 +771,13 @@ sub _msg_date {
sub fmt_ts { POSIX::strftime('%Y-%m-%d %k:%M', gmtime($_[0])) }
sub _skel_header {
- my ($ctx, $hdr, $level) = @_;
+ my ($ctx, $smsg, $level) = @_;
my $dst = $ctx->{dst};
my $cur = $ctx->{cur};
- my $mid = mid_clean($hdr->header_raw('Message-ID'));
- my $f = ascii_html($hdr->header('X-PI-From'));
- my $d = _msg_date($hdr) . ' ' . indent_for($level) . th_pfx($level);
+ my $mid = $smsg->{mid};
+ my $f = ascii_html($smsg->from_name);
+ my $d = fmt_ts($smsg->{ts}) . ' ' . indent_for($level) . th_pfx($level);
my $attr = $f;
$ctx->{first_level} ||= $level;
@@ -802,7 +802,7 @@ sub _skel_header {
# Subject is never undef, this mail was loaded from
# our Xapian which would've resulted in '' if it were
# really missing (and Filter rejects empty subjects)
- my $s = $hdr->header('Subject');
+ my $s = $smsg->subject;
my $h = $ctx->{srch}->subject_path($s);
if ($ctx->{seen}->{$h}) {
$s = undef;
@@ -829,10 +829,10 @@ sub _skel_header {
sub skel_dump {
my ($ctx, $level, $node) = @_;
- if (my $mime = $node->message) {
- _skel_header($ctx, $mime->header_obj, $level);
+ if (my $smsg = $node->{smsg}) {
+ _skel_header($ctx, $smsg, $level);
} else {
- my $mid = $node->messageid;
+ my $mid = $node->{id};
my $dst = $ctx->{dst};
my $mapping = $ctx->{mapping};
my $map = $mapping->{$mid} if $mapping;
@@ -857,31 +857,24 @@ sub skel_dump {
sub sort_ts {
[ sort {
- (eval { $a->topmost->message->header('X-PI-TS') } || 0) <=>
- (eval { $b->topmost->message->header('X-PI-TS') } || 0)
+ (eval { $a->topmost->{smsg}->ts } || 0) <=>
+ (eval { $b->topmost->{smsg}->ts } || 0)
} @{$_[0]} ];
}
-sub _tryload_ghost ($$) {
- my ($srch, $mid) = @_;
- my $smsg = $srch->lookup_mail($mid) or return;
- $smsg->mini_mime;
-}
-
# accumulate recent topics if search is supported
# returns 200 if done, 404 if not
sub acc_topic {
my ($ctx, $level, $node) = @_;
my $srch = $ctx->{srch};
- my $mid = $node->messageid;
- my $x = $node->message || _tryload_ghost($srch, $mid);
+ my $mid = $node->{id};
+ my $x = $node->{smsg} || $srch->lookup_mail($mid);
my ($subj, $ts);
my $topic;
if ($x) {
- $x = $x->header_obj;
- $subj = $x->header('Subject') || '';
+ $subj = $x->subject;
$subj = $srch->subject_normalized($subj);
- $ts = $x->header('X-PI-TS');
+ $ts = $x->ts;
if ($level == 0) {
$topic = [ $ts, 1, { $subj => $mid }, $subj ];
$ctx->{-cur_topic} = $topic;
@@ -1023,9 +1016,10 @@ sub thread_adj_level {
}
sub ghost_index_entry {
- my ($ctx, $level, $mid) = @_;
+ my ($ctx, $level, $node) = @_;
my ($beg, $end) = thread_adj_level($ctx, $level);
- $beg . '<pre>'. ghost_parent($ctx->{-upfx}, $mid) . '</pre>' . $end;
+ $beg . '<pre>'. ghost_parent($ctx->{-upfx}, $node->{id})
+ . '</pre>' . $end;
}
1;
diff --git a/t/search.t b/t/search.t
index cce3b9e..eed9c9b 100644
--- a/t/search.t
+++ b/t/search.t
@@ -293,7 +293,7 @@ sub filter_mids {
$smsg->ensure_metadata;
is($smsg->references, '', "no references created");
my $msg = PublicInbox::SearchMsg->load_doc($smsg->{doc});
- is($s, $msg->mini_mime->header('Subject'), 'long subject not rewritten');
+ is($s, $msg->subject, 'long subject not rewritten');
}
{
@@ -310,10 +310,7 @@ sub filter_mids {
my $smsg = $rw->lookup_message('testmessage@example.com');
my $msg = PublicInbox::SearchMsg->load_doc($smsg->{doc});
- # mini_mime technically not valid (I think),
- # but good enough for displaying HTML:
- is($mime->header('Subject'), $msg->mini_mime->header('Subject'),
- 'UTF-8 subject preserved');
+ is($mime->header('Subject'), $msg->subject, 'UTF-8 subject preserved');
}
{
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH 07/17] thread: remove rootset accessor method
2016-10-05 23:57 [PATCH 0/17] remove Mail::Thread dependency Eric Wong
` (5 preceding siblings ...)
2016-10-05 23:57 ` [PATCH 06/17] thread: remove Email::Abstract wrapping Eric Wong
@ 2016-10-05 23:57 ` Eric Wong
2016-10-05 23:57 ` [PATCH 08/17] thread: simplify Eric Wong
` (10 subsequent siblings)
17 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2016-10-05 23:57 UTC (permalink / raw)
To: meta
It doesn't buy us much and copying to a new array is slower;
but probably not measurable in real-world use.
---
lib/PublicInbox/SearchThread.pm | 2 --
lib/PublicInbox/View.pm | 4 ++--
2 files changed, 2 insertions(+), 4 deletions(-)
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index d39e9b6..7e89946 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -29,8 +29,6 @@ sub new {
}, $_[0];
}
-sub rootset { @{$_[0]{rootset}} }
-
sub thread {
my $self = shift;
$self->_setup();
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 748e391..7554d54 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -259,7 +259,7 @@ sub _th_index_lite {
sub walk_thread {
my ($th, $ctx, $cb) = @_;
- my @q = map { (0, $_) } $th->rootset;
+ my @q = map { (0, $_) } @{$th->{rootset}};
while (@q) {
my $level = shift @q;
my $node = shift @q or next;
@@ -291,7 +291,7 @@ sub stream_thread ($$) {
my ($th, $ctx) = @_;
my $inbox = $ctx->{-inbox};
my $mime;
- my @q = map { (0, $_) } $th->rootset;
+ my @q = map { (0, $_) } @{$th->{rootset}};
my $level;
while (@q) {
$level = shift @q;
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH 08/17] thread: simplify
2016-10-05 23:57 [PATCH 0/17] remove Mail::Thread dependency Eric Wong
` (6 preceding siblings ...)
2016-10-05 23:57 ` [PATCH 07/17] thread: remove rootset accessor method Eric Wong
@ 2016-10-05 23:57 ` Eric Wong
2016-10-05 23:57 ` [PATCH 09/17] thread: remove iterate_down Eric Wong
` (9 subsequent siblings)
17 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2016-10-05 23:57 UTC (permalink / raw)
To: meta
Single use subroutines actually make the code more complex in
this case, and there's never a {seen} field in $self.
---
lib/PublicInbox/SearchThread.pm | 14 +-------------
1 file changed, 1 insertion(+), 13 deletions(-)
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index 7e89946..a161662 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -31,16 +31,10 @@ sub new {
sub thread {
my $self = shift;
- $self->_setup();
+ _add_message($self, $_) foreach @{$self->{messages}};
$self->{rootset} = [
grep { !$_->{parent} } values %{$self->{id_table}} ];
- $self->_finish();
-}
-
-sub _finish {
- my $self = shift;
delete $self->{id_table};
- delete $self->{seen};
}
sub _get_cont_for_id ($$) {
@@ -48,12 +42,6 @@ sub _get_cont_for_id ($$) {
$self->{id_table}{$mid} ||= PublicInbox::SearchThread::Msg->new($mid);
}
-sub _setup {
- my ($self) = @_;
-
- _add_message($self, $_) foreach @{$self->{messages}};
-}
-
sub _add_message ($$) {
my ($self, $smsg) = @_;
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH 09/17] thread: remove iterate_down
2016-10-05 23:57 [PATCH 0/17] remove Mail::Thread dependency Eric Wong
` (7 preceding siblings ...)
2016-10-05 23:57 ` [PATCH 08/17] thread: simplify Eric Wong
@ 2016-10-05 23:57 ` Eric Wong
2016-10-05 23:57 ` [PATCH 10/17] thread: avoid incrementing undefined value Eric Wong
` (8 subsequent siblings)
17 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2016-10-05 23:57 UTC (permalink / raw)
To: meta
Unnecessary subs and complexity. This was hiding the fact
that $before is never used.
---
lib/PublicInbox/SearchThread.pm | 39 +++++++++++++--------------------------
1 file changed, 13 insertions(+), 26 deletions(-)
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index a161662..dad783e 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -81,8 +81,7 @@ sub _add_message ($$) {
}
sub order {
- my $self = shift;
- my $ordersub = shift;
+ my ($self, $ordersub) = @_;
# make a fake root
my $root = _get_cont_for_id($self, 'fakeroot');
@@ -174,22 +173,6 @@ sub set_children {
} while ($walk);
}
-sub order_children {
- my $self = shift;
- my $ordersub = shift;
-
- return unless $ordersub;
-
- my $sub = sub {
- my $cont = shift;
- my $children = $cont->children;
- return if @$children < 2;
- $cont->set_children( $ordersub->( $children ) );
- };
- $self->iterate_down( undef, $sub );
- undef $sub;
-}
-
# non-recursive version of recurse_down to avoid stack depth warnings
sub recurse_down {
my ($self, $callback) = @_;
@@ -216,17 +199,14 @@ sub recurse_down {
}
}
-sub iterate_down {
- my $self = shift;
- my ($before, $after) = @_;
+sub order_children {
+ my ($walk, $ordersub) = @_;
my %seen;
- my $walk = $self;
my $depth = 0;
my @visited;
while ($walk) {
- push @visited, [ $walk, $depth ];
- $before->($walk, $depth) if $before;
+ push @visited, $walk;
# spot/break loops
$seen{$walk}++;
@@ -258,8 +238,15 @@ sub iterate_down {
}
$walk = $next;
}
- return unless $after;
- while (@visited) { $after->(@{ pop @visited }) }
+ foreach my $cont (@visited) {
+ my $children = $cont->children;
+ next if @$children < 2;
+ $children = $ordersub->($children);
+ $cont = $cont->{child} = shift @$children;
+ do {
+ $cont = $cont->{next} = shift @$children;
+ } while ($cont);
+ }
}
1;
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH 10/17] thread: avoid incrementing undefined value
2016-10-05 23:57 [PATCH 0/17] remove Mail::Thread dependency Eric Wong
` (8 preceding siblings ...)
2016-10-05 23:57 ` [PATCH 09/17] thread: remove iterate_down Eric Wong
@ 2016-10-05 23:57 ` Eric Wong
2016-10-05 23:57 ` [PATCH 11/17] thread: order_children no longer cares about depth Eric Wong
` (7 subsequent siblings)
17 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2016-10-05 23:57 UTC (permalink / raw)
To: meta
It is pointless to increment when setting a true value is
simpler as there is no need to read before writing.
---
lib/PublicInbox/SearchThread.pm | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index dad783e..ba31f43 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -179,7 +179,7 @@ sub recurse_down {
my %seen;
my @q = ($self);
while (my $cont = shift @q) {
- $seen{$cont}++;
+ $seen{$cont} = 1;
$callback->($cont);
if (my $next = $cont->{next}) {
@@ -209,7 +209,7 @@ sub order_children {
push @visited, $walk;
# spot/break loops
- $seen{$walk}++;
+ $seen{$walk} = 1;
my $child = $walk->{child};
if ($child && $seen{$child}) {
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH 11/17] thread: order_children no longer cares about depth
2016-10-05 23:57 [PATCH 0/17] remove Mail::Thread dependency Eric Wong
` (9 preceding siblings ...)
2016-10-05 23:57 ` [PATCH 10/17] thread: avoid incrementing undefined value Eric Wong
@ 2016-10-05 23:57 ` Eric Wong
2016-10-05 23:57 ` [PATCH 12/17] thread: inline and remove recurse_down logic Eric Wong
` (6 subsequent siblings)
17 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2016-10-05 23:57 UTC (permalink / raw)
To: meta
We never use the depth anywhere in this sub
---
lib/PublicInbox/SearchThread.pm | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index ba31f43..2e7b79a 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -203,7 +203,6 @@ sub order_children {
my ($walk, $ordersub) = @_;
my %seen;
- my $depth = 0;
my @visited;
while ($walk) {
push @visited, $walk;
@@ -222,17 +221,13 @@ sub order_children {
}
# go down, or across
- if ($child) {
- $next = $child;
- ++$depth;
- }
+ $next = $child if $child;
# no next? look up
if (!$next) {
my $up = $walk;
while ($up && !$next) {
$up = $up->{parent};
- --$depth;
$next = $up->{next} if $up;
}
}
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH 12/17] thread: inline and remove recurse_down logic
2016-10-05 23:57 [PATCH 0/17] remove Mail::Thread dependency Eric Wong
` (10 preceding siblings ...)
2016-10-05 23:57 ` [PATCH 11/17] thread: order_children no longer cares about depth Eric Wong
@ 2016-10-05 23:57 ` Eric Wong
2016-10-05 23:57 ` [PATCH 13/17] thread: fix sorting without topmost Eric Wong
` (5 subsequent siblings)
17 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2016-10-05 23:57 UTC (permalink / raw)
To: meta
We no longer recurse, and it's too hard to come up with
a new name for a sub we will only use once.
---
lib/PublicInbox/SearchThread.pm | 55 +++++++++++++++++------------------------
1 file changed, 23 insertions(+), 32 deletions(-)
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index 2e7b79a..153eef2 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -145,42 +145,13 @@ sub remove_child {
}
sub has_descendent {
- my $self = shift;
- my $child = shift;
- die "Assertion failed: $child" unless eval {$child};
- my $there = 0;
- $self->recurse_down(sub { $there = 1 if $_[0] == $child });
-
- return $there;
-}
-
-sub children {
- my $self = shift;
- my @children;
- my $visitor = $self->{child};
- while ($visitor) {
- push @children, $visitor;
- $visitor = $visitor->{next};
- }
- \@children;
-}
-
-sub set_children {
- my ($self, $children) = @_;
- my $walk = $self->{child} = shift @$children;
- do {
- $walk = $walk->{next} = shift @$children;
- } while ($walk);
-}
-
-# non-recursive version of recurse_down to avoid stack depth warnings
-sub recurse_down {
- my ($self, $callback) = @_;
+ my ($self, $child) = @_;
my %seen;
my @q = ($self);
while (my $cont = shift @q) {
$seen{$cont} = 1;
- $callback->($cont);
+
+ return 1 if $cont == $child;
if (my $next = $cont->{next}) {
if ($seen{$next}) {
@@ -197,6 +168,26 @@ sub recurse_down {
}
}
}
+ 0;
+}
+
+sub children {
+ my $self = shift;
+ my @children;
+ my $visitor = $self->{child};
+ while ($visitor) {
+ push @children, $visitor;
+ $visitor = $visitor->{next};
+ }
+ \@children;
+}
+
+sub set_children {
+ my ($self, $children) = @_;
+ my $walk = $self->{child} = shift @$children;
+ do {
+ $walk = $walk->{next} = shift @$children;
+ } while ($walk);
}
sub order_children {
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH 13/17] thread: fix sorting without topmost
2016-10-05 23:57 [PATCH 0/17] remove Mail::Thread dependency Eric Wong
` (11 preceding siblings ...)
2016-10-05 23:57 ` [PATCH 12/17] thread: inline and remove recurse_down logic Eric Wong
@ 2016-10-05 23:57 ` Eric Wong
2016-10-14 21:17 ` [PATCH] thread: reinstates stable ordering when ghosts are present Eric Wong
2016-10-05 23:57 ` [PATCH 14/17] thread: use hash + array instead of hand-rolled linked list Eric Wong
` (4 subsequent siblings)
17 siblings, 1 reply; 23+ messages in thread
From: Eric Wong @ 2016-10-05 23:57 UTC (permalink / raw)
To: meta
This bug was hidden, and we may not be able to efficiently
implement a topmost subroutine with the hash-based (vs
linked-list) based container for threading in the next
commit.
---
lib/PublicInbox/SearchView.pm | 5 ++---
lib/PublicInbox/View.pm | 4 ++--
2 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index ebeb41f..cfe6dff 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -155,9 +155,8 @@ sub mset_thread {
$th->thread;
if ($q->{r}) { # order by relevance
$th->order(sub {
- [ sort { (eval { $pct{$b->topmost->{id}} } || 0)
- <=>
- (eval { $pct{$a->topmost->{id}} } || 0)
+ [ sort { ( $pct{$b->{id}} || 0) <=>
+ ( $pct{$a->{id}} || 0)
} @{$_[0]} ];
});
} else { # order by time (default for threaded view)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 7554d54..c09b4a2 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -857,8 +857,8 @@ sub skel_dump {
sub sort_ts {
[ sort {
- (eval { $a->topmost->{smsg}->ts } || 0) <=>
- (eval { $b->topmost->{smsg}->ts } || 0)
+ (eval { $a->{smsg}->ts } || 0) <=>
+ (eval { $b->{smsg}->ts } || 0)
} @{$_[0]} ];
}
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH] thread: reinstates stable ordering when ghosts are present
2016-10-05 23:57 ` [PATCH 13/17] thread: fix sorting without topmost Eric Wong
@ 2016-10-14 21:17 ` Eric Wong
0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2016-10-14 21:17 UTC (permalink / raw)
To: meta
This reverts commit 3c9dd6619f825f0515e7e4afa1bd55c99c1a68d3
("thread: fix sorting without topmost")
and reinstates the "topmost" routine for sorting purposes.
---
lib/PublicInbox/SearchThread.pm | 10 ++++++++++
lib/PublicInbox/SearchView.pm | 5 +++--
lib/PublicInbox/View.pm | 4 ++--
3 files changed, 15 insertions(+), 4 deletions(-)
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index 24a56d2..fe70406 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -98,6 +98,16 @@ sub new {
}, $_[0];
}
+sub topmost {
+ my ($self) = @_;
+ my @q = ($self);
+ while (my $cont = shift @q) {
+ return $cont if $cont->{smsg};
+ push @q, values %{$cont->{children}};
+ }
+ undef;
+}
+
sub add_child {
my ($self, $child) = @_;
croak "Cowardly refusing to become my own parent: $self"
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index cfe6dff..ebeb41f 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -155,8 +155,9 @@ sub mset_thread {
$th->thread;
if ($q->{r}) { # order by relevance
$th->order(sub {
- [ sort { ( $pct{$b->{id}} || 0) <=>
- ( $pct{$a->{id}} || 0)
+ [ sort { (eval { $pct{$b->topmost->{id}} } || 0)
+ <=>
+ (eval { $pct{$a->topmost->{id}} } || 0)
} @{$_[0]} ];
});
} else { # order by time (default for threaded view)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 0f00458..5d5808f 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -852,8 +852,8 @@ sub skel_dump {
sub sort_ts {
[ sort {
- (eval { $a->{smsg}->ts } || 0) <=>
- (eval { $b->{smsg}->ts } || 0)
+ (eval { $a->topmost->{smsg}->ts } || 0) <=>
+ (eval { $b->topmost->{smsg}->ts } || 0)
} @{$_[0]} ];
}
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH 14/17] thread: use hash + array instead of hand-rolled linked list
2016-10-05 23:57 [PATCH 0/17] remove Mail::Thread dependency Eric Wong
` (12 preceding siblings ...)
2016-10-05 23:57 ` [PATCH 13/17] thread: fix sorting without topmost Eric Wong
@ 2016-10-05 23:57 ` Eric Wong
2016-10-05 23:57 ` [PATCH 15/17] view: remove redundant children array in thread views Eric Wong
` (3 subsequent siblings)
17 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2016-10-05 23:57 UTC (permalink / raw)
To: meta
This starts to show noticeable performance improvements when
attempting to thread over 400 messages; but the improvement
may not be measurable with less.
However, the resulting code is much shorter and (IMHO)
much easier to understand.
---
MANIFEST | 1 +
lib/PublicInbox/SearchThread.pm | 158 +++++++++-------------------------------
lib/PublicInbox/View.pm | 28 ++++---
t/thread-cycle.t | 86 ++++++++++++++++++++++
4 files changed, 138 insertions(+), 135 deletions(-)
create mode 100644 t/thread-cycle.t
diff --git a/MANIFEST b/MANIFEST
index bcc4121..3a4d7c4 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -155,6 +155,7 @@ t/qspawn.t
t/search.t
t/spamcheck_spamc.t
t/spawn.t
+t/thread-cycle.t
t/utf8.mbox
t/view.t
t/watch_maildir.t
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index 153eef2..05de9ec 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -32,9 +32,8 @@ sub new {
sub thread {
my $self = shift;
_add_message($self, $_) foreach @{$self->{messages}};
- $self->{rootset} = [
- grep { !$_->{parent} } values %{$self->{id_table}} ];
- delete $self->{id_table};
+ my $id_table = delete $self->{id_table};
+ $self->{rootset} = [ grep { !$_->{parent} } values %$id_table ];
}
sub _get_cont_for_id ($$) {
@@ -82,156 +81,67 @@ sub _add_message ($$) {
sub order {
my ($self, $ordersub) = @_;
-
- # make a fake root
- my $root = _get_cont_for_id($self, 'fakeroot');
- $root->add_child( $_ ) for @{ $self->{rootset} };
-
- # sort it
- $root->order_children( $ordersub );
-
- # and untangle it
- my $kids = $root->children;
- $self->{rootset} = $kids;
- $root->remove_child($_) for @$kids;
+ my $rootset = $ordersub->($self->{rootset});
+ $self->{rootset} = $rootset;
+ $_->order_children($ordersub) for @$rootset;
}
package PublicInbox::SearchThread::Msg;
+use strict;
+use warnings;
use Carp qw(croak);
use Scalar::Util qw(weaken);
-sub new { my $self = shift; bless { id => shift }, $self; }
+sub new {
+ bless {
+ id => $_[1],
+ children => {}, # becomes an array when sorted by ->order(...)
+ }, $_[0];
+}
sub add_child {
my ($self, $child) = @_;
croak "Cowardly refusing to become my own parent: $self"
if $self == $child;
- if (grep { $_ == $child } @{$self->children}) {
- # All is potentially correct with the world
- weaken($child->{parent} = $self);
- return;
- }
-
- my $parent = $child->{parent};
- remove_child($parent, $child) if $parent;
+ my $cid = $child->{id};
+ $self->{children}->{$cid} = $child;
- $child->{next} = $self->{child};
- $self->{child} = $child;
- weaken($child->{parent} = $self);
-}
-
-sub remove_child {
- my ($self, $child) = @_;
-
- my $x = $self->{child} or return;
- if ($x == $child) { # First one's easy.
- $self->{child} = $child->{next};
- $child->{parent} = $child->{next} = undef;
- return;
+ # reparenting:
+ if (defined(my $parent = $child->{parent})) {
+ delete $parent->{children}->{$cid};
}
- my $prev = $x;
- while ($x = $x->{next}) {
- if ($x == $child) {
- $prev->{next} = $x->{next}; # Unlink x
- $x->{next} = $x->{parent} = undef; # Deparent it
- return;
- }
- $prev = $x;
- }
- # oddly, we can get here
- $child->{next} = $child->{parent} = undef;
+ weaken($child->{parent} = $self);
}
sub has_descendent {
- my ($self, $child) = @_;
+ my ($cur, $child) = @_;
my %seen;
- my @q = ($self);
- while (my $cont = shift @q) {
- $seen{$cont} = 1;
+ my @q = ($cur->{parent} || $cur);
- return 1 if $cont == $child;
+ while (defined($cur = shift @q)) {
+ return 1 if $cur == $child;
- if (my $next = $cont->{next}) {
- if ($seen{$next}) {
- $cont->{next} = undef;
- } else {
- push @q, $next;
- }
- }
- if (my $child = $cont->{child}) {
- if ($seen{$child}) {
- $cont->{child} = undef;
- } else {
- push @q, $child;
- }
+ if (!$seen{$cur}++) {
+ push @q, values %{$cur->{children}};
}
}
0;
}
-sub children {
- my $self = shift;
- my @children;
- my $visitor = $self->{child};
- while ($visitor) {
- push @children, $visitor;
- $visitor = $visitor->{next};
- }
- \@children;
-}
-
-sub set_children {
- my ($self, $children) = @_;
- my $walk = $self->{child} = shift @$children;
- do {
- $walk = $walk->{next} = shift @$children;
- } while ($walk);
-}
-
sub order_children {
- my ($walk, $ordersub) = @_;
+ my ($cur, $ordersub) = @_;
- my %seen;
- my @visited;
- while ($walk) {
- push @visited, $walk;
+ my %seen = ($cur => 1);
+ my @q = ($cur);
+ while (defined($cur = shift @q)) {
+ my $c = $cur->{children}; # The hashref here...
- # spot/break loops
- $seen{$walk} = 1;
-
- my $child = $walk->{child};
- if ($child && $seen{$child}) {
- $walk->{child} = $child = undef;
- }
-
- my $next = $walk->{next};
- if ($next && $seen{$next}) {
- $walk->{next} = $next = undef;
- }
-
- # go down, or across
- $next = $child if $child;
-
- # no next? look up
- if (!$next) {
- my $up = $walk;
- while ($up && !$next) {
- $up = $up->{parent};
- $next = $up->{next} if $up;
- }
- }
- $walk = $next;
- }
- foreach my $cont (@visited) {
- my $children = $cont->children;
- next if @$children < 2;
- $children = $ordersub->($children);
- $cont = $cont->{child} = shift @$children;
- do {
- $cont = $cont->{next} = shift @$children;
- } while ($cont);
+ $c = [ grep { !$seen{$_}++ } values %$c ]; # spot/break loops
+ $c = $ordersub->($c) if scalar @$c > 1;
+ $cur->{children} = $c; # ...becomes an arrayref
+ push @q, @$c;
}
}
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index c09b4a2..d0c6d33 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -203,13 +203,15 @@ sub _th_index_lite {
my $pad = ' ';
# map = [children, attr, node, idx, level]
my $map = $mapping->{$mid_raw};
- my $nr_c = scalar @{$map->[0]};
+ my $children = $map->[0];
+ my $nr_c = scalar @$children;
my $nr_s = 0;
my $level = $map->[4];
my $idx = $map->[3];
+ my $siblings;
my $irt_map = $mapping->{$irt} if defined $irt;
if (defined $irt_map) {
- my $siblings = $irt_map->[0];
+ $siblings = $irt_map->[0];
$nr_s = scalar(@$siblings) - 1;
$rv .= $pad . $irt_map->[1];
if ($idx > 0) {
@@ -233,24 +235,26 @@ sub _th_index_lite {
$this =~ s!<a\nhref=[^>]+>([^<]+)</a>!$1!s; # no point linking to self
$rv .= "<b>@ $this";
my $node = $map->[2];
- if (my $child = $node->{child}) {
- my $cmid = $child->{id};
+ if ($nr_c) {
+ my $cmid = $children->[0]->{id};
$rv .= $pad . $mapping->{$cmid}->[1];
if ($nr_c > 2) {
my $s = ($nr_c - 1). ' more replies';
$rv .= pad_link($cmid, $level + 1, $s);
- } elsif (my $cn = $child->{next}) {
+ } elsif (my $cn = $children->[1]) {
$rv .= $pad . $mapping->{$cn->{id}}->[1];
}
}
- if (my $next = $node->{next}) {
+
+ my $next = $siblings->[$idx+1] if $siblings && $idx >= 0;
+ if ($next) {
my $nmid = $next->{id};
$rv .= $pad . $mapping->{$nmid}->[1];
my $nnext = $nr_s - $idx;
if ($nnext > 2) {
my $s = ($nnext - 1).' subsequent siblings';
$rv .= pad_link($nmid, $level, $s);
- } elsif (my $nn = $next->{next}) {
+ } elsif (my $nn = $siblings->[$idx + 2]) {
$rv .= $pad . $mapping->{$nn->{id}}->[1];
}
}
@@ -264,7 +268,8 @@ sub walk_thread {
my $level = shift @q;
my $node = shift @q or next;
$cb->($ctx, $level, $node);
- unshift @q, $level+1, $node->{child}, $level, $node->{next};
+ ++$level;
+ unshift @q, map { ($level, $_) } @{$node->{children}};
}
}
@@ -296,7 +301,8 @@ sub stream_thread ($$) {
while (@q) {
$level = shift @q;
my $node = shift @q or next;
- unshift @q, $level+1, $node->{child}, $level, $node->{next};
+ my $cl = $level + 1;
+ unshift @q, map { ($cl, $_) } @{$node->{children}};
$mime = $inbox->msg_by_smsg($node->{smsg}) and last;
}
return missing_thread($ctx) unless $mime;
@@ -309,8 +315,8 @@ sub stream_thread ($$) {
while (@q) {
$level = shift @q;
my $node = shift @q or next;
- unshift @q, $level+1, $node->{child},
- $level, $node->{next};
+ my $cl = $level + 1;
+ unshift @q, map { ($cl, $_) } @{$node->{children}};
my $mid = $node->{id};
if ($mime = $inbox->msg_by_smsg($node->{smsg})) {
$mime = Email::MIME->new($mime);
diff --git a/t/thread-cycle.t b/t/thread-cycle.t
new file mode 100644
index 0000000..4d60f7e
--- /dev/null
+++ b/t/thread-cycle.t
@@ -0,0 +1,86 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use_ok('PublicInbox::SearchMsg');
+use_ok('PublicInbox::SearchThread');
+use Email::Simple;
+my $mt = eval {
+ require Mail::Thread;
+ no warnings 'once';
+ $Mail::Thread::nosubject = 1;
+ $Mail::Thread::noprune = 1;
+};
+my @check;
+my @msgs = map {
+ my $msg = $_;
+ $msg->{references} =~ s/\s+/ /sg if $msg->{references};
+ my $simple = Email::Simple->create(header => [
+ 'Message-Id' => "<$msg->{mid}>",
+ 'References' => $msg->{references},
+ ]);
+ push @check, $simple;
+ bless $msg, 'PublicInbox::SearchMsg'
+} (
+
+# data from t/testbox-6 in Mail::Thread 2.55:
+ { mid => '20021124145312.GA1759@nlin.net' },
+ { mid => 'slrnau448m.7l4.markj+0111@cloaked.freeserve.co.uk',
+ references => '<20021124145312.GA1759@nlin.net>',
+ },
+ { mid => '15842.10677.577458.656565@jupiter.akutech-local.de',
+ references => '<20021124145312.GA1759@nlin.net>
+ <slrnau448m.7l4.markj+0111@cloaked.freeserve.co.uk>',
+ },
+ { mid => '20021125171807.GK8236@somanetworks.com',
+ references => '<20021124145312.GA1759@nlin.net>
+ <slrnau448m.7l4.markj+0111@cloaked.freeserve.co.uk>
+ <15842.10677.577458.656565@jupiter.akutech-local.de>',
+ },
+ { mid => '15843.12163.554914.469248@jupiter.akutech-local.de',
+ references => '<20021124145312.GA1759@nlin.net>
+ <slrnau448m.7l4.markj+0111@cloaked.freeserve.co.uk>
+ <15842.10677.577458.656565@jupiter.akutech-local.de>
+ <E18GPHf-0000zp-00@cloaked.freeserve.co.uk>',
+ },
+ { mid => 'E18GPHf-0000zp-00@cloaked.freeserve.co.uk',
+ references => '<20021124145312.GA1759@nlin.net>
+ <slrnau448m.7l4.markj+0111@cloaked.freeserve.co.uk>
+ <15842.10677.577458.656565@jupiter.akutech-local.de>'
+ }
+);
+
+my $th = PublicInbox::SearchThread->new(\@msgs);
+$th->thread;
+$th->order(sub { [ sort { $a->{id} cmp $b->{id} } @{$_[0]} ] });
+my $st = '';
+my @q = map { (0, $_) } @{$th->{rootset}};
+while (@q) {
+ my $level = shift @q;
+ my $node = shift @q or next;
+ $st .= (" "x$level). "$node->{id}\n";
+ my $cl = $level + 1;
+ unshift @q, map { ($cl, $_) } @{$node->{children}}
+}
+
+SKIP: {
+ skip 'Mail::Thread missing', 1 unless $mt;
+ $mt = Mail::Thread->new(@check);
+ $mt->thread;
+ $mt->order(sub { sort { $a->messageid cmp $b->messageid } @_ });
+ my $check = '';
+
+ @q = map { (0, $_) } $mt->rootset;
+ while (@q) {
+ my $level = shift @q;
+ my $node = shift @q or next;
+ $check .= (" "x$level) . $node->messageid . "\n";
+ unshift @q, $level + 1, $node->child, $level, $node->next;
+ }
+ is($check, $st, 'Mail::Thread output matches');
+}
+
+done_testing();
+
+1;
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH 15/17] view: remove redundant children array in thread views
2016-10-05 23:57 [PATCH 0/17] remove Mail::Thread dependency Eric Wong
` (13 preceding siblings ...)
2016-10-05 23:57 ` [PATCH 14/17] thread: use hash + array instead of hand-rolled linked list Eric Wong
@ 2016-10-05 23:57 ` Eric Wong
2016-10-05 23:57 ` [PATCH 16/17] t/thread-cycle: test self-referential messages Eric Wong
` (2 subsequent siblings)
17 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2016-10-05 23:57 UTC (permalink / raw)
To: meta
Each node has an entire arrayref of its children nowadays, so
there's no need to waste time and memory creating another one.
---
lib/PublicInbox/View.pm | 63 ++++++++++++++++++++-----------------------------
1 file changed, 26 insertions(+), 37 deletions(-)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index d0c6d33..0f00458 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -201,19 +201,16 @@ sub _th_index_lite {
my $rv = '';
my $mapping = $ctx->{mapping} or return $rv;
my $pad = ' ';
- # map = [children, attr, node, idx, level]
- my $map = $mapping->{$mid_raw};
- my $children = $map->[0];
+ my ($attr, $node, $idx, $level) = @{$mapping->{$mid_raw}};
+ my $children = $node->{children};
my $nr_c = scalar @$children;
my $nr_s = 0;
- my $level = $map->[4];
- my $idx = $map->[3];
my $siblings;
my $irt_map = $mapping->{$irt} if defined $irt;
if (defined $irt_map) {
- $siblings = $irt_map->[0];
+ $siblings = $irt_map->[1]->{children};
$nr_s = scalar(@$siblings) - 1;
- $rv .= $pad . $irt_map->[1];
+ $rv .= $pad . $irt_map->[0];
if ($idx > 0) {
my $prev = $siblings->[$idx - 1];
my $pmid = $prev->{id};
@@ -222,40 +219,38 @@ sub _th_index_lite {
$rv .= pad_link($pmid, $level, $s);
} elsif ($idx == 2) {
my $ppmid = $siblings->[0]->{id};
- $rv .= $pad . $mapping->{$ppmid}->[1];
+ $rv .= $pad . $mapping->{$ppmid}->[0];
}
- $rv .= $pad . $mapping->{$pmid}->[1];
+ $rv .= $pad . $mapping->{$pmid}->[0];
}
}
my $s_s = nr_to_s($nr_s, 'sibling', 'siblings');
my $s_c = nr_to_s($nr_c, 'reply', 'replies');
- my $this = $map->[1];
- $this =~ s!\n\z!</b>\n!s;
- $this =~ s!<a\nhref.*</a> !!s; # no point in duplicating subject
- $this =~ s!<a\nhref=[^>]+>([^<]+)</a>!$1!s; # no point linking to self
- $rv .= "<b>@ $this";
- my $node = $map->[2];
+ $attr =~ s!\n\z!</b>\n!s;
+ $attr =~ s!<a\nhref.*</a> !!s; # no point in duplicating subject
+ $attr =~ s!<a\nhref=[^>]+>([^<]+)</a>!$1!s; # no point linking to self
+ $rv .= "<b>@ $attr";
if ($nr_c) {
my $cmid = $children->[0]->{id};
- $rv .= $pad . $mapping->{$cmid}->[1];
+ $rv .= $pad . $mapping->{$cmid}->[0];
if ($nr_c > 2) {
my $s = ($nr_c - 1). ' more replies';
$rv .= pad_link($cmid, $level + 1, $s);
} elsif (my $cn = $children->[1]) {
- $rv .= $pad . $mapping->{$cn->{id}}->[1];
+ $rv .= $pad . $mapping->{$cn->{id}}->[0];
}
}
my $next = $siblings->[$idx+1] if $siblings && $idx >= 0;
if ($next) {
my $nmid = $next->{id};
- $rv .= $pad . $mapping->{$nmid}->[1];
+ $rv .= $pad . $mapping->{$nmid}->[0];
my $nnext = $nr_s - $idx;
if ($nnext > 2) {
my $s = ($nnext - 1).' subsequent siblings';
$rv .= pad_link($nmid, $level, $s);
} elsif (my $nn = $siblings->[$idx + 2]) {
- $rv .= $pad . $mapping->{$nn->{id}}->[1];
+ $rv .= $pad . $mapping->{$nn->{id}}->[0];
}
}
$rv .= $pad ."<a\nhref=#r$id>$s_s, $s_c; $ctx->{s_nr}</a>\n";
@@ -263,26 +258,20 @@ sub _th_index_lite {
sub walk_thread {
my ($th, $ctx, $cb) = @_;
- my @q = map { (0, $_) } @{$th->{rootset}};
+ my @q = map { (0, $_, -1) } @{$th->{rootset}};
while (@q) {
- my $level = shift @q;
- my $node = shift @q or next;
- $cb->($ctx, $level, $node);
+ my ($level, $node, $i) = splice(@q, 0, 3);
+ defined $node or next;
+ $cb->($ctx, $level, $node, $i);
++$level;
- unshift @q, map { ($level, $_) } @{$node->{children}};
+ $i = 0;
+ unshift @q, map { ($level, $_, $i++) } @{$node->{children}};
}
}
sub pre_thread {
- my ($ctx, $level, $node) = @_;
- my $mapping = $ctx->{mapping};
- my $idx = -1;
- if (my $parent = $node->{parent}) {
- my $m = $mapping->{$parent->{id}}->[0];
- $idx = scalar @$m;
- push @$m, $node;
- }
- $mapping->{$node->{id}} = [ [], '', $node, $idx, $level ];
+ my ($ctx, $level, $node, $idx) = @_;
+ $ctx->{mapping}->{$node->{id}} = [ '', $node, $idx, $level ];
skel_dump($ctx, $level, $node);
}
@@ -825,7 +814,7 @@ sub _skel_header {
my $map = $mapping->{$mid};
$id = id_compress($mid, 1);
$m = '#m'.$id;
- $map->[1] = "$d<a\nhref=\"$m\">$end";
+ $map->[0] = "$d<a\nhref=\"$m\">$end";
$id = "\nid=r".$id;
} else {
$m = $ctx->{-upfx}.mid_escape($mid).'/';
@@ -840,8 +829,6 @@ sub skel_dump {
} else {
my $mid = $node->{id};
my $dst = $ctx->{dst};
- my $mapping = $ctx->{mapping};
- my $map = $mapping->{$mid} if $mapping;
my $d = $ctx->{pct} ? ' [irrelevant] ' # search result
: ' [not found] ';
$d .= indent_for($level) . th_pfx($level);
@@ -850,9 +837,11 @@ sub skel_dump {
my $href = $upfx . $m->{href} . '/';
my $html = $m->as_html;
+ my $mapping = $ctx->{mapping};
+ my $map = $mapping->{$mid} if $mapping;
if ($map) {
my $id = id_compress($mid, 1);
- $map->[1] = $d . qq{<<a\nhref=#r$id>$html</a>>\n};
+ $map->[0] = $d . qq{<<a\nhref=#r$id>$html</a>>\n};
$d .= qq{<<a\nhref="$href"\nid=r$id>$html</a>>\n};
} else {
$d .= qq{<<a\nhref="$href">$html</a>>\n};
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH 16/17] t/thread-cycle: test self-referential messages
2016-10-05 23:57 [PATCH 0/17] remove Mail::Thread dependency Eric Wong
` (14 preceding siblings ...)
2016-10-05 23:57 ` [PATCH 15/17] view: remove redundant children array in thread views Eric Wong
@ 2016-10-05 23:57 ` Eric Wong
2016-10-05 23:57 ` [PATCH 17/17] thread: remove weaken dependency Eric Wong
2016-10-06 8:22 ` [PATCH 0/17] remove Mail::Thread dependency Eric Wong
17 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2016-10-05 23:57 UTC (permalink / raw)
To: meta
Some broken (or malicious) mailers may include a generated
Message-ID in its References header, so be prepared for it.
---
t/thread-cycle.t | 39 +++++++++++++++++++++++++--------------
1 file changed, 25 insertions(+), 14 deletions(-)
diff --git a/t/thread-cycle.t b/t/thread-cycle.t
index 4d60f7e..0e1ecfe 100644
--- a/t/thread-cycle.t
+++ b/t/thread-cycle.t
@@ -51,18 +51,7 @@ my @msgs = map {
}
);
-my $th = PublicInbox::SearchThread->new(\@msgs);
-$th->thread;
-$th->order(sub { [ sort { $a->{id} cmp $b->{id} } @{$_[0]} ] });
-my $st = '';
-my @q = map { (0, $_) } @{$th->{rootset}};
-while (@q) {
- my $level = shift @q;
- my $node = shift @q or next;
- $st .= (" "x$level). "$node->{id}\n";
- my $cl = $level + 1;
- unshift @q, map { ($cl, $_) } @{$node->{children}}
-}
+my $st = thread_to_s(\@msgs);
SKIP: {
skip 'Mail::Thread missing', 1 unless $mt;
@@ -71,7 +60,7 @@ SKIP: {
$mt->order(sub { sort { $a->messageid cmp $b->messageid } @_ });
my $check = '';
- @q = map { (0, $_) } $mt->rootset;
+ my @q = map { (0, $_) } $mt->rootset;
while (@q) {
my $level = shift @q;
my $node = shift @q or next;
@@ -81,6 +70,28 @@ SKIP: {
is($check, $st, 'Mail::Thread output matches');
}
+@msgs = map { bless $_, 'PublicInbox::SearchMsg' } (
+ { mid => 'a@b' },
+ { mid => 'b@c', references => '<a@b> <b@c>' },
+ { mid => 'd@e', references => '<d@e>' },
+);
+
+is(thread_to_s(\@msgs), "a\@b\n b\@c\nd\@e\n", 'ok with self-references');
+
done_testing();
-1;
+sub thread_to_s {
+ my $th = PublicInbox::SearchThread->new(shift);
+ $th->thread;
+ $th->order(sub { [ sort { $a->{id} cmp $b->{id} } @{$_[0]} ] });
+ my $st = '';
+ my @q = map { (0, $_) } @{$th->{rootset}};
+ while (@q) {
+ my $level = shift @q;
+ my $node = shift @q or next;
+ $st .= (" "x$level). "$node->{id}\n";
+ my $cl = $level + 1;
+ unshift @q, map { ($cl, $_) } @{$node->{children}};
+ }
+ $st;
+}
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH 17/17] thread: remove weaken dependency
2016-10-05 23:57 [PATCH 0/17] remove Mail::Thread dependency Eric Wong
` (15 preceding siblings ...)
2016-10-05 23:57 ` [PATCH 16/17] t/thread-cycle: test self-referential messages Eric Wong
@ 2016-10-05 23:57 ` Eric Wong
2016-10-06 8:22 ` [PATCH 0/17] remove Mail::Thread dependency Eric Wong
17 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2016-10-05 23:57 UTC (permalink / raw)
To: meta
We have to walk through all the messages after threading
anyways to build the rootset, so we can just delete all
the parent references at that point.
---
lib/PublicInbox/SearchThread.pm | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index 05de9ec..003a8d1 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -33,7 +33,7 @@ sub thread {
my $self = shift;
_add_message($self, $_) foreach @{$self->{messages}};
my $id_table = delete $self->{id_table};
- $self->{rootset} = [ grep { !$_->{parent} } values %$id_table ];
+ $self->{rootset} = [ grep { !delete $_->{parent} } values %$id_table ];
}
sub _get_cont_for_id ($$) {
@@ -90,7 +90,6 @@ package PublicInbox::SearchThread::Msg;
use strict;
use warnings;
use Carp qw(croak);
-use Scalar::Util qw(weaken);
sub new {
bless {
@@ -112,7 +111,7 @@ sub add_child {
delete $parent->{children}->{$cid};
}
- weaken($child->{parent} = $self);
+ $child->{parent} = $self;
}
sub has_descendent {
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH 0/17] remove Mail::Thread dependency
2016-10-05 23:57 [PATCH 0/17] remove Mail::Thread dependency Eric Wong
` (16 preceding siblings ...)
2016-10-05 23:57 ` [PATCH 17/17] thread: remove weaken dependency Eric Wong
@ 2016-10-06 8:22 ` Eric Wong
2016-10-13 3:59 ` [PATCH 0/2] thread: fix regressions from Mail::Thread removal Eric Wong
17 siblings, 1 reply; 23+ messages in thread
From: Eric Wong @ 2016-10-06 8:22 UTC (permalink / raw)
To: meta
I wrote:
> This greatly reduces the amount of code we need to load while
> reducing abstractions which slow us down and hurt memory use
> when displaying gigantic threads.
Rolled back for now on the non-Tor endpoint.
This seems to be handling the threading
for https://public-inbox.org/git/200706180821.59582.arekm@maven.pl/
incorrectly, but not with /T/
https://public-inbox.org/git/200706180821.59582.arekm@maven.pl/T/
oddly...
Will investigate further in a week or so.
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH 0/2] thread: fix regressions from Mail::Thread removal
2016-10-06 8:22 ` [PATCH 0/17] remove Mail::Thread dependency Eric Wong
@ 2016-10-13 3:59 ` Eric Wong
2016-10-13 3:59 ` [PATCH 1/2] thread: reduce indentation level Eric Wong
2016-10-13 3:59 ` [PATCH 2/2] thread: fix parent/child relationships Eric Wong
0 siblings, 2 replies; 23+ messages in thread
From: Eric Wong @ 2016-10-13 3:59 UTC (permalink / raw)
To: meta
Eric Wong (2):
thread: reduce indentation level
thread: fix parent/child relationships
lib/PublicInbox/SearchThread.pm | 54 ++++++++++++++++++-----------------------
1 file changed, 24 insertions(+), 30 deletions(-)
--
EW
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH 1/2] thread: reduce indentation level
2016-10-13 3:59 ` [PATCH 0/2] thread: fix regressions from Mail::Thread removal Eric Wong
@ 2016-10-13 3:59 ` Eric Wong
2016-10-13 3:59 ` [PATCH 2/2] thread: fix parent/child relationships Eric Wong
1 sibling, 0 replies; 23+ messages in thread
From: Eric Wong @ 2016-10-13 3:59 UTC (permalink / raw)
To: meta
This should reduce differences from the original Mail::Thread
code and hopefully make things easier-to-follow.
---
lib/PublicInbox/SearchThread.pm | 38 +++++++++++++++++++-------------------
1 file changed, 19 insertions(+), 19 deletions(-)
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index 003a8d1..c6bd999 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -49,27 +49,27 @@ sub _add_message ($$) {
$this->{smsg} = $smsg;
# B. For each element in the message's References field:
+ defined(my $refs = $smsg->{references}) or return;
+
my $prev;
- if (defined(my $refs = $smsg->{references})) {
- foreach my $ref ($refs =~ m/<([^>]+)>/g) {
- # Find a Container object for the given Message-ID
- my $cont = _get_cont_for_id($self, $ref);
-
- # Link the References field's Containers together in
- # the order implied by the References header
- #
- # * If they are already linked don't change the
- # existing links
- # * Do not add a link if adding that link would
- # introduce a loop...
- if ($prev &&
- !$cont->{parent} && # already linked
- !$cont->has_descendent($prev) # would loop
- ) {
- $prev->add_child($cont);
- }
- $prev = $cont;
+ foreach my $ref ($refs =~ m/<([^>]+)>/g) {
+ # Find a Container object for the given Message-ID
+ my $cont = _get_cont_for_id($self, $ref);
+
+ # Link the References field's Containers together in
+ # the order implied by the References header
+ #
+ # * If they are already linked don't change the
+ # existing links
+ # * Do not add a link if adding that link would
+ # introduce a loop...
+ if ($prev &&
+ !$cont->{parent} && # already linked
+ !$cont->has_descendent($prev) # would loop
+ ) {
+ $prev->add_child($cont);
}
+ $prev = $cont;
}
# C. Set the parent of this message to be the last element in
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH 2/2] thread: fix parent/child relationships
2016-10-13 3:59 ` [PATCH 0/2] thread: fix regressions from Mail::Thread removal Eric Wong
2016-10-13 3:59 ` [PATCH 1/2] thread: reduce indentation level Eric Wong
@ 2016-10-13 3:59 ` Eric Wong
1 sibling, 0 replies; 23+ messages in thread
From: Eric Wong @ 2016-10-13 3:59 UTC (permalink / raw)
To: meta
The ordering change in add_child is critical if $self == $parent
as the {children} hash was lost before this change.
has_descendent can be simplified by walking upwards from the child
instead of downwards from the parent.
This fixes threading regressions introduced in
commit 30100c46326e2eac275e0af13116636701d2537e
("thread: use hash + array instead of hand-rolled linked list")
---
lib/PublicInbox/SearchThread.pm | 16 +++++-----------
1 file changed, 5 insertions(+), 11 deletions(-)
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index c6bd999..24a56d2 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -104,27 +104,21 @@ sub add_child {
if $self == $child;
my $cid = $child->{id};
- $self->{children}->{$cid} = $child;
# reparenting:
if (defined(my $parent = $child->{parent})) {
delete $parent->{children}->{$cid};
}
+ $self->{children}->{$cid} = $child;
$child->{parent} = $self;
}
sub has_descendent {
- my ($cur, $child) = @_;
- my %seen;
- my @q = ($cur->{parent} || $cur);
-
- while (defined($cur = shift @q)) {
- return 1 if $cur == $child;
-
- if (!$seen{$cur}++) {
- push @q, values %{$cur->{children}};
- }
+ my ($self, $child) = @_;
+ while ($child) {
+ return 1 if $self == $child;
+ $child = $child->{parent};
}
0;
}
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread