From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: AS51167 193.164.131.0/24 X-Spam-Status: No, score=-1.8 required=3.0 tests=AWL,BAYES_00, RCVD_IN_MSPIKE_BL,RCVD_IN_MSPIKE_ZBI,RCVD_IN_XBL,RDNS_NONE,SPF_FAIL, SPF_HELO_FAIL shortcircuit=no autolearn=no autolearn_force=no version=3.4.0 Received: from 80x24.org (unknown [193.164.131.95]) by dcvr.yhbt.net (Postfix) with ESMTP id 463B720281 for ; Mon, 2 Oct 2017 22:24:50 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] search: try to fill in ghosts when generating thread skeleton Date: Mon, 2 Oct 2017 22:24:49 +0000 Message-Id: <20171002222449.8492-1-e@80x24.org> List-Id: Since we attempt to fill in threads by Subject, our thread skeletons can cross actual thread IDs, leading to the possibility of false ghosts showing up in the skeleton. Try to fill in the ghosts as well as possible by performing a message lookup. --- lib/PublicInbox/SearchThread.pm | 18 ++++++++++-------- lib/PublicInbox/SearchView.pm | 6 ++++-- lib/PublicInbox/View.pm | 8 ++++---- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm index 2966907..2e66099 100644 --- a/lib/PublicInbox/SearchThread.pm +++ b/lib/PublicInbox/SearchThread.pm @@ -22,14 +22,15 @@ use strict; use warnings; sub thread { - my ($messages, $ordersub) = @_; + my ($messages, $ordersub, $srch) = @_; my $id_table = {}; _add_message($id_table, $_) foreach @$messages; my $rootset = [ grep { - !delete($_->{parent}) && $_->visible } values %$id_table ]; + !delete($_->{parent}) && $_->visible($srch) + } values %$id_table ]; $id_table = undef; $rootset = $ordersub->($rootset); - $_->order_children($ordersub) for @$rootset; + $_->order_children($ordersub, $srch) for @$rootset; $rootset; } @@ -129,20 +130,21 @@ sub has_descendent { # Do not show/keep ghosts iff they have no children. Sometimes # a ghost Message-ID is the result of a long header line # being folded/mangled by a MUA, and not a missing message. -sub visible ($) { - my ($self) = @_; - $self->{smsg} || scalar values %{$self->{children}}; +sub visible ($$) { + my ($self, $srch) = @_; + ($self->{smsg} ||= eval { $srch->lookup_message($self->{id})}) || + (scalar values %{$self->{children}}); } sub order_children { - my ($cur, $ordersub) = @_; + my ($cur, $ordersub, $srch) = @_; my %seen = ($cur => 1); # self-referential loop prevention my @q = ($cur); while (defined($cur = shift @q)) { my $c = $cur->{children}; # The hashref here... - $c = [ grep { !$seen{$_}++ && visible($_) } values %$c ]; + $c = [ grep { !$seen{$_}++ && visible($_, $srch) } values %$c ]; $c = $ordersub->($c) if scalar @$c > 1; $cur->{children} = $c; # ...becomes an arrayref push @q, @$c; diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index a597403..c42cf2d 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -207,7 +207,8 @@ sub sort_relevance { sub mset_thread { my ($ctx, $mset, $q) = @_; my %pct; - my $msgs = $ctx->{srch}->retry_reopen(sub { [ map { + my $srch = $ctx->{srch}; + my $msgs = $srch->retry_reopen(sub { [ map { my $i = $_; my $smsg = PublicInbox::SearchMsg->load_doc($i->get_document); $pct{$smsg->mid} = $i->get_percent; @@ -215,7 +216,8 @@ sub mset_thread { } ($mset->items) ]}); my $r = $q->{r}; my $rootset = PublicInbox::SearchThread::thread($msgs, - $r ? sort_relevance(\%pct) : *PublicInbox::View::sort_ts); + $r ? sort_relevance(\%pct) : *PublicInbox::View::sort_ts, + $srch); my $skel = search_nav_bot($mset, $q). "
";
 	my $inbox = $ctx->{-inbox};
 	$ctx->{-upfx} = '';
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index b39c820..bc45bfa 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -370,7 +370,7 @@ sub thread_html {
 	$ctx->{mapping} = {};
 	$ctx->{s_nr} = "$nr+ messages in thread";
 
-	my $rootset = thread_results($msgs);
+	my $rootset = thread_results($msgs, $srch);
 
 	# reduce hash lookups in pre_thread->skel_dump
 	my $inbox = $ctx->{-inbox};
@@ -607,7 +607,7 @@ sub thread_skel {
 	# reduce hash lookups in skel_dump
 	my $ibx = $ctx->{-inbox};
 	$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
-	walk_thread(thread_results($sres), $ctx, *skel_dump);
+	walk_thread(thread_results($sres, $srch), $ctx, *skel_dump);
 
 	$ctx->{parent_msg} = $parent;
 }
@@ -736,7 +736,7 @@ sub msg_timestamp {
 }
 
 sub thread_results {
-	my ($msgs) = @_;
+	my ($msgs, $srch) = @_;
 	require PublicInbox::SearchThread;
 	PublicInbox::SearchThread::thread($msgs, *sort_ts);
 }
@@ -1000,7 +1000,7 @@ sub index_topics {
 	my $nr = scalar @{$sres->{msgs}};
 	if ($nr) {
 		$sres = load_results($srch, $sres);
-		walk_thread(thread_results($sres), $ctx, *acc_topic);
+		walk_thread(thread_results($sres, $srch), $ctx, *acc_topic);
 	}
 	$ctx->{-next_o} = $off+ $nr;
 	$ctx->{-cur_o} = $off;
-- 
EW