From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 478E01F55F for ; Sun, 1 Oct 2023 22:29:07 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1696199347; bh=7c/VOl90Cqqu9tJQAPkjolCklJu6yXdteDVlv9qXenQ=; h=From:To:Subject:Date:From; b=hv8GuAOZT7hoEnZMU0y7/iZbbbxDQDj8t8uPfN9lG0/RT1/jlcj2ycmRSjx6BRz3V URgNiXUaJSRYVWmcb6OADqoZKJ7zHUMVeJoLsyWaeXwp4Rs5YPmO0BJIa8ABx2yjOZ G99XXoLcKctxw1YOTwObDdKf68S1hyuWeRSYFjoE= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] lei up: fix missing -t/--threads matches w/ saved search Date: Sun, 1 Oct 2023 22:29:07 +0000 Message-ID: <20231001222907.2271753-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: We must not filter out seen docids from the mset; but only with the result of over->expand_thread. --- lib/PublicInbox/LeiXSearch.pm | 34 +++++++++++++--------------------- lib/PublicInbox/Over.pm | 7 +++++-- t/lei-q-save.t | 19 +++++++++++++++++++ 3 files changed, 37 insertions(+), 23 deletions(-) diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index 7f4911b3..5f105567 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -176,14 +176,10 @@ sub query_one_mset { # for --threads and l2m w/o sort my $threads = $lei->{opt}->{threads} // 0; my $fl = $threads > 1 ? 1 : undef; my $lss = $lei->{lss}; - my $maxk = "external.$dir.maxuid"; - my $stop_at = $lss ? $lss->{-cfg}->{$maxk} : undef; - if (defined $stop_at) { - ref($stop_at) and - return warn("$maxk=$stop_at has multiple values\n"); - ($stop_at =~ /[^0-9]/) and - return warn("$maxk=$stop_at not numeric\n"); - } + my $maxk = "external.$dir.maxuid"; # max of previous, so our min + my $min = $lss ? ($lss->{-cfg}->{$maxk} // 0) : 0; + ref($min) and return warn("$maxk=$min has multiple values\n"); + ($min =~ /[^0-9]/) and return warn("$maxk=$min not numeric\n"); my $first_ids; do { $mset = eval { $srch->mset($mo->{qstr}, $mo) }; @@ -192,29 +188,26 @@ sub query_one_mset { # for --threads and l2m w/o sort $mset->get_matches_estimated); wait_startq($lei); # wait for keyword updates my $ids = $srch->mset_to_artnums($mset, $mo); - @$ids = grep { $_ > $stop_at } @$ids if defined($stop_at); my $i = 0; if ($threads) { # copy $ids if $lss since over->expand_thread # shifts @{$ctx->{ids}} $first_ids = [ @$ids ] if $lss; - my $ctx = { ids => $ids }; - my %n2item = map { ($ids->[$i++], $_) } $mset->items; - while ($over->expand_thread($ctx)) { - for my $n (@{$ctx->{xids}}) { + my $ctx = { ids => $ids, min => $min }; + my %n2item = map { $ids->[$i++] => $_ } $mset->items; + while ($over->expand_thread($ctx)) { # fills {xids} + for my $n (@{delete $ctx->{xids}}) { my $smsg = $over->get_art($n) or next; - my $mitem = delete $n2item{$n}; + my $mi = delete $n2item{$n}; next if $smsg->{bytes} == 0; - if ($mitem && $can_kw) { - mitem_kw($srch, $smsg, $mitem, - $fl); - } elsif ($mitem && $fl) { + if ($mi && $can_kw) { + mitem_kw($srch, $smsg, $mi, $fl) + } elsif ($mi && $fl) { # call ->xsmsg_vmd, later $smsg->{lei_q_tt_flagged} = 1; } - $each_smsg->($smsg, $mitem); + $each_smsg->($smsg, $mi); } - @{$ctx->{xids}} = (); } } else { $first_ids = $ids; @@ -230,7 +223,6 @@ sub query_one_mset { # for --threads and l2m w/o sort } while (_mset_more($mset, $mo)); _check_mset_limit($lei, $dir, $mset); if ($lss && scalar(@$first_ids)) { - undef $stop_at; my $max = $first_ids->[0]; $lss->cfg_set($maxk, $max); undef $lss; diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm index 82034b30..e3a8adb1 100644 --- a/lib/PublicInbox/Over.pm +++ b/lib/PublicInbox/Over.pm @@ -12,6 +12,7 @@ use DBD::SQLite; use PublicInbox::Smsg; use Compress::Zlib qw(uncompress); use constant DEFAULT_LIMIT => 1000; +use List::Util (); # for max sub dbh_new { my ($self, $rw) = @_; @@ -198,10 +199,12 @@ ORDER BY $sort_col DESC } # strict `tid' matches, only, for thread-expanded mbox.gz search results -# and future CLI interface +# and lei # returns true if we have IDs, undef if not sub expand_thread { my ($self, $ctx) = @_; + # previous maxuid for LeiSavedSearch is our min: + my $lss_min = $ctx->{min} // 0; my $dbh = dbh($self); do { defined(my $num = $ctx->{ids}->[0]) or return; @@ -214,7 +217,7 @@ SELECT num FROM over WHERE tid = ? AND num > ? ORDER BY num ASC LIMIT 1000 my $xids = $dbh->selectcol_arrayref($sql, undef, $tid, - $ctx->{prev} // 0); + List::Util::max($ctx->{prev} // 0, $lss_min)); if (scalar(@$xids)) { $ctx->{prev} = $xids->[-1]; $ctx->{xids} = $xids; diff --git a/t/lei-q-save.t b/t/lei-q-save.t index 1d9d5a51..53311696 100644 --- a/t/lei-q-save.t +++ b/t/lei-q-save.t @@ -15,6 +15,7 @@ $doc3->header_set('Date', PublicInbox::Smsg::date({ds => time - (86400 * 4)})); my $cat_env = { VISUAL => 'cat', EDITOR => 'cat' }; my $pre_existing = <<'EOF'; From x Mon Sep 17 00:00:00 2001 +From: Message-ID: Subject: pre-existing Date: Sat, 02 Oct 2010 00:00:00 +0000 @@ -286,5 +287,23 @@ test_lei(sub { is(eml_load($new[0])->header('Subject'), 'do not ever call, again', 'up retrieved correct message'); + # --thread expansion + $d = "$home/thread-expand"; + lei_ok(qw(q --no-external m:import-before@example.com -t -o), $d); + @orig = glob("$d/{new,cur}/*"); + is(scalar(@orig), 1, 'one result so far'); + lei_ok [ qw(import -Feml) ], undef, { 0 => \<<'EOM' }; +Date: Sun, 02 Oct 2023 00:00:00 +0000 +From: +In-Reply-To: +Message-ID: +Subject: reply1 +EOM + + lei_ok qw(up), $d; + @new = glob("$d/{new,cur}/*"); + is(scalar(@new), 2, 'got new message'); + is_xdeeply([grep { $_ eq $orig[0] } @new], \@orig, + 'original message preserved on up w/ threads'); }); done_testing;