From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [RFC] www: mbox: reuse Xapian::Enquire object
Date: Thu, 17 Aug 2023 21:54:46 +0000 [thread overview]
Message-ID: <20230817215446.1692719-1-e@80x24.org> (raw)
I'm not sure if this helps from a correctness perspective[1].
I'm hoping it can work better with MVCC in avoiding
duplicate/skipped documents in the face of concurrent
modifications. It doesn't noticeably save CPU cycles by
avoiding parser and Enquire object setup overhead, either.
[1] also posted on xapian-discuss about it <20230817212826.M47893@dcvr>
This is a tangent from the ongoing -cindex --associate work
acceptably...
---
lib/PublicInbox/Isearch.pm | 2 ++
lib/PublicInbox/Mbox.pm | 7 +++----
lib/PublicInbox/Search.pm | 8 +++++++-
3 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/lib/PublicInbox/Isearch.pm b/lib/PublicInbox/Isearch.pm
index 5cbc36fd..38c80770 100644
--- a/lib/PublicInbox/Isearch.pm
+++ b/lib/PublicInbox/Isearch.pm
@@ -119,6 +119,8 @@ sub mset_to_smsg {
wantarray ? ($mset->get_matches_estimated, \@msgs) : \@msgs;
}
+sub run_enquire { $_[0]->{es}->run_enquire($_[1], $_[2]) }
+
sub has_threadid { 1 }
sub help { $_[0]->{es}->help }
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index bf61bb0e..258bb3e6 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -187,7 +187,7 @@ sub results_cb {
}
# refill result set, deprioritize since there's many results
my $srch = $ctx->{ibx}->isrch or return $ctx->gone('search');
- my $mset = $srch->mset($ctx->{query}, $ctx->{qopts});
+ my $mset = $srch->run_enquire($ctx->{enquire}, $ctx->{qopts});
my $size = $mset->size or return;
$ctx->{qopts}->{offset} += $size;
$ctx->{ids} = $srch->mset_to_artnums($mset, $ctx->{qopts});
@@ -210,7 +210,7 @@ sub results_thread_cb {
# refill result set, deprioritize since there's many results
my $srch = $ctx->{ibx}->isrch or return $ctx->gone('search');
- my $mset = $srch->mset($ctx->{query}, $ctx->{qopts});
+ my $mset = $srch->run_enquire($ctx->{enquire}, $ctx->{qopts});
my $size = $mset->size or return;
$ctx->{qopts}->{offset} += $size;
$ctx->{ids} = $srch->mset_to_artnums($mset, $ctx->{qopts});
@@ -240,11 +240,10 @@ sub mbox_all {
}
$qopts->{threads} = 1 if $q->{t};
$srch->query_approxidate($ctx->{ibx}->git, $q_string);
- my $mset = $srch->mset($q_string, $qopts);
+ (my $mset, $ctx->{enquire}) = $srch->mset($q_string, $qopts);
$qopts->{offset} = $mset->size or
return [404, [qw(Content-Type text/plain)],
["No results found\n"]];
- $ctx->{query} = $q_string;
$ctx->{ids} = $srch->mset_to_artnums($mset, $qopts);
require PublicInbox::MboxGz;
my $fn;
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index b2de3450..5d19ed9f 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -414,6 +414,11 @@ sub do_enquire { # shared with CodeSearch
# on `-tt' ({threads} > 1) which sets the Flagged|Important keyword
(($opt->{threads} // 0) == 1 && has_threadid($self)) and
$enq->set_collapse_key(THREADID);
+ run_enquire($self, $enq, $opt);
+}
+
+sub run_enquire {
+ my ($self, $enq, $opt) = @_;
retry_reopen($self, \&enquire_once, $enq,
$opt->{offset} || 0, $opt->{limit} || 50);
}
@@ -450,7 +455,8 @@ sub has_threadid ($) {
sub enquire_once { # retry_reopen callback
my (undef, $enq, $offset, $limit) = @_;
- $enq->get_mset($offset, $limit);
+ my $mset = $enq->get_mset($offset, $limit);
+ wantarray ? ($mset, $enq) : $mset;
}
sub mset_to_smsg {
reply other threads:[~2023-08-17 21:54 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230817215446.1692719-1-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).