From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 9C0761FB05 for ; Thu, 14 Jan 2021 07:06:28 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 08/14] lei q: reinstate smsg dedupe Date: Wed, 13 Jan 2021 19:06:21 -1200 Message-Id: <20210114070627.18195-9-e@80x24.org> In-Reply-To: <20210114070627.18195-1-e@80x24.org> References: <20210114070627.18195-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Now that dedupe is serialization and fork-safe, we can wire it back up in our query results paths. --- lib/PublicInbox/LeiQuery.pm | 5 ++--- lib/PublicInbox/LeiXSearch.pm | 8 ++++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm index 1a3e1193..69d2f9a6 100644 --- a/lib/PublicInbox/LeiQuery.pm +++ b/lib/PublicInbox/LeiQuery.pm @@ -26,14 +26,13 @@ sub lei_q { my $sto = $self->_lei_store(1); my $cfg = $self->_lei_cfg(1); my $opt = $self->{opt}; - require PublicInbox::LeiDedupe; - my $dd = PublicInbox::LeiDedupe->new($self); # --local is enabled by default # src: LeiXSearch || LeiSearch || Inbox my @srcs; require PublicInbox::LeiXSearch; require PublicInbox::LeiOverview; + require PublicInbox::LeiDedupe; my $lxs = PublicInbox::LeiXSearch->new; # --external is enabled by default, but allow --no-external @@ -49,8 +48,8 @@ sub lei_q { unshift(@srcs, $sto->search) if $opt->{'local'}; # no forking workers after this - require PublicInbox::LeiOverview; $self->{ovv} = PublicInbox::LeiOverview->new($self); + $self->{dd} = PublicInbox::LeiDedupe->new($self); my %mset_opt = map { $_ => $opt->{$_} } qw(thread limit offset); $mset_opt{asc} = $opt->{'reverse'} ? 1 : 0; $mset_opt{qstr} = join(' ', map {; diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index 68889e81..80e7a7f7 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -103,6 +103,8 @@ sub query_thread_mset { # for --thread my $mo = { %{$lei->{mset_opt}} }; my $mset; my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei); + my $dd = $lei->{dd}; + $dd->prepare_dedupe; do { $mset = $srch->mset($mo->{qstr}, $mo); my $ids = $srch->mset_to_artnums($mset, $mo); @@ -112,7 +114,7 @@ sub query_thread_mset { # for --thread while ($over->expand_thread($ctx)) { for my $n (@{$ctx->{xids}}) { my $smsg = $over->get_art($n) or next; - # next if $dd->is_smsg_dup($smsg); TODO + next if $dd->is_smsg_dup($smsg); my $mitem = delete $n2item{$smsg->{num}}; $each_smsg->($smsg, $mitem); # $self->out($buf .= $ORS); @@ -132,11 +134,13 @@ sub query_mset { # non-parallel for non-"--thread" users my $mset; $self->attach_external($_) for @$srcs; my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei); + my $dd = $lei->{dd}; + $dd->prepare_dedupe; do { $mset = $self->mset($mo->{qstr}, $mo); for my $it ($mset->items) { my $smsg = smsg_for($self, $it) or next; - # next if $dd->is_smsg_dup($smsg); + next if $dd->is_smsg_dup($smsg); $each_smsg->($smsg, $it); # $self->out($buf .= $ORS) if defined $buf; #$emit_cb->($smsg);