From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH] lei up: fix missing -t/--threads matches w/ saved search
Date: Sun, 1 Oct 2023 22:29:07 +0000 [thread overview]
Message-ID: <20231001222907.2271753-1-e@80x24.org> (raw)
We must not filter out seen docids from the mset; but only with
the result of over->expand_thread.
---
lib/PublicInbox/LeiXSearch.pm | 34 +++++++++++++---------------------
lib/PublicInbox/Over.pm | 7 +++++--
t/lei-q-save.t | 19 +++++++++++++++++++
3 files changed, 37 insertions(+), 23 deletions(-)
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 7f4911b3..5f105567 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -176,14 +176,10 @@ sub query_one_mset { # for --threads and l2m w/o sort
my $threads = $lei->{opt}->{threads} // 0;
my $fl = $threads > 1 ? 1 : undef;
my $lss = $lei->{lss};
- my $maxk = "external.$dir.maxuid";
- my $stop_at = $lss ? $lss->{-cfg}->{$maxk} : undef;
- if (defined $stop_at) {
- ref($stop_at) and
- return warn("$maxk=$stop_at has multiple values\n");
- ($stop_at =~ /[^0-9]/) and
- return warn("$maxk=$stop_at not numeric\n");
- }
+ my $maxk = "external.$dir.maxuid"; # max of previous, so our min
+ my $min = $lss ? ($lss->{-cfg}->{$maxk} // 0) : 0;
+ ref($min) and return warn("$maxk=$min has multiple values\n");
+ ($min =~ /[^0-9]/) and return warn("$maxk=$min not numeric\n");
my $first_ids;
do {
$mset = eval { $srch->mset($mo->{qstr}, $mo) };
@@ -192,29 +188,26 @@ sub query_one_mset { # for --threads and l2m w/o sort
$mset->get_matches_estimated);
wait_startq($lei); # wait for keyword updates
my $ids = $srch->mset_to_artnums($mset, $mo);
- @$ids = grep { $_ > $stop_at } @$ids if defined($stop_at);
my $i = 0;
if ($threads) {
# copy $ids if $lss since over->expand_thread
# shifts @{$ctx->{ids}}
$first_ids = [ @$ids ] if $lss;
- my $ctx = { ids => $ids };
- my %n2item = map { ($ids->[$i++], $_) } $mset->items;
- while ($over->expand_thread($ctx)) {
- for my $n (@{$ctx->{xids}}) {
+ my $ctx = { ids => $ids, min => $min };
+ my %n2item = map { $ids->[$i++] => $_ } $mset->items;
+ while ($over->expand_thread($ctx)) { # fills {xids}
+ for my $n (@{delete $ctx->{xids}}) {
my $smsg = $over->get_art($n) or next;
- my $mitem = delete $n2item{$n};
+ my $mi = delete $n2item{$n};
next if $smsg->{bytes} == 0;
- if ($mitem && $can_kw) {
- mitem_kw($srch, $smsg, $mitem,
- $fl);
- } elsif ($mitem && $fl) {
+ if ($mi && $can_kw) {
+ mitem_kw($srch, $smsg, $mi, $fl)
+ } elsif ($mi && $fl) {
# call ->xsmsg_vmd, later
$smsg->{lei_q_tt_flagged} = 1;
}
- $each_smsg->($smsg, $mitem);
+ $each_smsg->($smsg, $mi);
}
- @{$ctx->{xids}} = ();
}
} else {
$first_ids = $ids;
@@ -230,7 +223,6 @@ sub query_one_mset { # for --threads and l2m w/o sort
} while (_mset_more($mset, $mo));
_check_mset_limit($lei, $dir, $mset);
if ($lss && scalar(@$first_ids)) {
- undef $stop_at;
my $max = $first_ids->[0];
$lss->cfg_set($maxk, $max);
undef $lss;
diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm
index 82034b30..e3a8adb1 100644
--- a/lib/PublicInbox/Over.pm
+++ b/lib/PublicInbox/Over.pm
@@ -12,6 +12,7 @@ use DBD::SQLite;
use PublicInbox::Smsg;
use Compress::Zlib qw(uncompress);
use constant DEFAULT_LIMIT => 1000;
+use List::Util (); # for max
sub dbh_new {
my ($self, $rw) = @_;
@@ -198,10 +199,12 @@ ORDER BY $sort_col DESC
}
# strict `tid' matches, only, for thread-expanded mbox.gz search results
-# and future CLI interface
+# and lei
# returns true if we have IDs, undef if not
sub expand_thread {
my ($self, $ctx) = @_;
+ # previous maxuid for LeiSavedSearch is our min:
+ my $lss_min = $ctx->{min} // 0;
my $dbh = dbh($self);
do {
defined(my $num = $ctx->{ids}->[0]) or return;
@@ -214,7 +217,7 @@ SELECT num FROM over WHERE tid = ? AND num > ?
ORDER BY num ASC LIMIT 1000
my $xids = $dbh->selectcol_arrayref($sql, undef, $tid,
- $ctx->{prev} // 0);
+ List::Util::max($ctx->{prev} // 0, $lss_min));
if (scalar(@$xids)) {
$ctx->{prev} = $xids->[-1];
$ctx->{xids} = $xids;
diff --git a/t/lei-q-save.t b/t/lei-q-save.t
index 1d9d5a51..53311696 100644
--- a/t/lei-q-save.t
+++ b/t/lei-q-save.t
@@ -15,6 +15,7 @@ $doc3->header_set('Date', PublicInbox::Smsg::date({ds => time - (86400 * 4)}));
my $cat_env = { VISUAL => 'cat', EDITOR => 'cat' };
my $pre_existing = <<'EOF';
From x Mon Sep 17 00:00:00 2001
+From: <x@example.com>
Message-ID: <import-before@example.com>
Subject: pre-existing
Date: Sat, 02 Oct 2010 00:00:00 +0000
@@ -286,5 +287,23 @@ test_lei(sub {
is(eml_load($new[0])->header('Subject'), 'do not ever call, again',
'up retrieved correct message');
+ # --thread expansion
+ $d = "$home/thread-expand";
+ lei_ok(qw(q --no-external m:import-before@example.com -t -o), $d);
+ @orig = glob("$d/{new,cur}/*");
+ is(scalar(@orig), 1, 'one result so far');
+ lei_ok [ qw(import -Feml) ], undef, { 0 => \<<'EOM' };
+Date: Sun, 02 Oct 2023 00:00:00 +0000
+From: <x@example.com>
+In-Reply-To: <import-before@example.com>
+Message-ID: <reply1@example.com>
+Subject: reply1
+EOM
+
+ lei_ok qw(up), $d;
+ @new = glob("$d/{new,cur}/*");
+ is(scalar(@new), 2, 'got new message');
+ is_xdeeply([grep { $_ eq $orig[0] } @new], \@orig,
+ 'original message preserved on up w/ threads');
});
done_testing;
reply other threads:[~2023-10-01 22:29 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231001222907.2271753-1-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).