unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH] lei up: fix missing -t/--threads matches w/ saved search
Date: Sun,  1 Oct 2023 22:29:07 +0000	[thread overview]
Message-ID: <20231001222907.2271753-1-e@80x24.org> (raw)

We must not filter out seen docids from the mset; but only with
the result of over->expand_thread.
---
 lib/PublicInbox/LeiXSearch.pm | 34 +++++++++++++---------------------
 lib/PublicInbox/Over.pm       |  7 +++++--
 t/lei-q-save.t                | 19 +++++++++++++++++++
 3 files changed, 37 insertions(+), 23 deletions(-)

diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 7f4911b3..5f105567 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -176,14 +176,10 @@ sub query_one_mset { # for --threads and l2m w/o sort
 	my $threads = $lei->{opt}->{threads} // 0;
 	my $fl = $threads > 1 ? 1 : undef;
 	my $lss = $lei->{lss};
-	my $maxk = "external.$dir.maxuid";
-	my $stop_at = $lss ? $lss->{-cfg}->{$maxk} : undef;
-	if (defined $stop_at) {
-		ref($stop_at) and
-			return warn("$maxk=$stop_at has multiple values\n");
-		($stop_at =~ /[^0-9]/) and
-			return warn("$maxk=$stop_at not numeric\n");
-	}
+	my $maxk = "external.$dir.maxuid"; # max of previous, so our min
+	my $min = $lss ? ($lss->{-cfg}->{$maxk} // 0) : 0;
+	ref($min) and return warn("$maxk=$min has multiple values\n");
+	($min =~ /[^0-9]/) and return warn("$maxk=$min not numeric\n");
 	my $first_ids;
 	do {
 		$mset = eval { $srch->mset($mo->{qstr}, $mo) };
@@ -192,29 +188,26 @@ sub query_one_mset { # for --threads and l2m w/o sort
 				$mset->get_matches_estimated);
 		wait_startq($lei); # wait for keyword updates
 		my $ids = $srch->mset_to_artnums($mset, $mo);
-		@$ids = grep { $_ > $stop_at } @$ids if defined($stop_at);
 		my $i = 0;
 		if ($threads) {
 			# copy $ids if $lss since over->expand_thread
 			# shifts @{$ctx->{ids}}
 			$first_ids = [ @$ids ] if $lss;
-			my $ctx = { ids => $ids };
-			my %n2item = map { ($ids->[$i++], $_) } $mset->items;
-			while ($over->expand_thread($ctx)) {
-				for my $n (@{$ctx->{xids}}) {
+			my $ctx = { ids => $ids, min => $min };
+			my %n2item = map { $ids->[$i++] => $_ } $mset->items;
+			while ($over->expand_thread($ctx)) { # fills {xids}
+				for my $n (@{delete $ctx->{xids}}) {
 					my $smsg = $over->get_art($n) or next;
-					my $mitem = delete $n2item{$n};
+					my $mi = delete $n2item{$n};
 					next if $smsg->{bytes} == 0;
-					if ($mitem && $can_kw) {
-						mitem_kw($srch, $smsg, $mitem,
-							$fl);
-					} elsif ($mitem && $fl) {
+					if ($mi && $can_kw) {
+						mitem_kw($srch, $smsg, $mi, $fl)
+					} elsif ($mi && $fl) {
 						# call ->xsmsg_vmd, later
 						$smsg->{lei_q_tt_flagged} = 1;
 					}
-					$each_smsg->($smsg, $mitem);
+					$each_smsg->($smsg, $mi);
 				}
-				@{$ctx->{xids}} = ();
 			}
 		} else {
 			$first_ids = $ids;
@@ -230,7 +223,6 @@ sub query_one_mset { # for --threads and l2m w/o sort
 	} while (_mset_more($mset, $mo));
 	_check_mset_limit($lei, $dir, $mset);
 	if ($lss && scalar(@$first_ids)) {
-		undef $stop_at;
 		my $max = $first_ids->[0];
 		$lss->cfg_set($maxk, $max);
 		undef $lss;
diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm
index 82034b30..e3a8adb1 100644
--- a/lib/PublicInbox/Over.pm
+++ b/lib/PublicInbox/Over.pm
@@ -12,6 +12,7 @@ use DBD::SQLite;
 use PublicInbox::Smsg;
 use Compress::Zlib qw(uncompress);
 use constant DEFAULT_LIMIT => 1000;
+use List::Util (); # for max
 
 sub dbh_new {
 	my ($self, $rw) = @_;
@@ -198,10 +199,12 @@ ORDER BY $sort_col DESC
 }
 
 # strict `tid' matches, only, for thread-expanded mbox.gz search results
-# and future CLI interface
+# and lei
 # returns true if we have IDs, undef if not
 sub expand_thread {
 	my ($self, $ctx) = @_;
+	# previous maxuid for LeiSavedSearch is our min:
+	my $lss_min = $ctx->{min} // 0;
 	my $dbh = dbh($self);
 	do {
 		defined(my $num = $ctx->{ids}->[0]) or return;
@@ -214,7 +217,7 @@ SELECT num FROM over WHERE tid = ? AND num > ?
 ORDER BY num ASC LIMIT 1000
 
 			my $xids = $dbh->selectcol_arrayref($sql, undef, $tid,
-							$ctx->{prev} // 0);
+				List::Util::max($ctx->{prev} // 0, $lss_min));
 			if (scalar(@$xids)) {
 				$ctx->{prev} = $xids->[-1];
 				$ctx->{xids} = $xids;
diff --git a/t/lei-q-save.t b/t/lei-q-save.t
index 1d9d5a51..53311696 100644
--- a/t/lei-q-save.t
+++ b/t/lei-q-save.t
@@ -15,6 +15,7 @@ $doc3->header_set('Date', PublicInbox::Smsg::date({ds => time - (86400 * 4)}));
 my $cat_env = { VISUAL => 'cat', EDITOR => 'cat' };
 my $pre_existing = <<'EOF';
 From x Mon Sep 17 00:00:00 2001
+From: <x@example.com>
 Message-ID: <import-before@example.com>
 Subject: pre-existing
 Date: Sat, 02 Oct 2010 00:00:00 +0000
@@ -286,5 +287,23 @@ test_lei(sub {
 	is(eml_load($new[0])->header('Subject'), 'do not ever call, again',
 		'up retrieved correct message');
 
+	# --thread expansion
+	$d = "$home/thread-expand";
+	lei_ok(qw(q --no-external m:import-before@example.com -t -o), $d);
+	@orig = glob("$d/{new,cur}/*");
+	is(scalar(@orig), 1, 'one result so far');
+	lei_ok [ qw(import -Feml) ], undef, { 0 => \<<'EOM' };
+Date: Sun, 02 Oct 2023 00:00:00 +0000
+From: <x@example.com>
+In-Reply-To: <import-before@example.com>
+Message-ID: <reply1@example.com>
+Subject: reply1
+EOM
+
+	lei_ok qw(up), $d;
+	@new = glob("$d/{new,cur}/*");
+	is(scalar(@new), 2, 'got new message');
+	is_xdeeply([grep { $_ eq $orig[0] } @new], \@orig,
+		'original message preserved on up w/ threads');
 });
 done_testing;

                 reply	other threads:[~2023-10-01 22:29 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231001222907.2271753-1-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).