From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 297321F406 for ; Thu, 17 Aug 2023 21:54:46 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1692309286; bh=h2A4l2HtGmOibON2O+pOwbEpbbM5Zd7FF8WIaDoQ4b0=; h=From:To:Subject:Date:From; b=aQIvu/+cahMzYSms1eRjJESVAXRx2MRgPgZAilK9rkMxkUS6xk0VQdAnfK8HPuqQv J7w4A54D0J51Xc3T16Lm4dY/Ma8LE9JV6uEr/VeplRnE8Q0iKMonahkO+v23HXIrX4 L2wtKZmXWyH+wIX6k0W2sPoaBvb7zkD/7eChqThg= From: Eric Wong To: meta@public-inbox.org Subject: [RFC] www: mbox: reuse Xapian::Enquire object Date: Thu, 17 Aug 2023 21:54:46 +0000 Message-ID: <20230817215446.1692719-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: I'm not sure if this helps from a correctness perspective[1]. I'm hoping it can work better with MVCC in avoiding duplicate/skipped documents in the face of concurrent modifications. It doesn't noticeably save CPU cycles by avoiding parser and Enquire object setup overhead, either. [1] also posted on xapian-discuss about it <20230817212826.M47893@dcvr> This is a tangent from the ongoing -cindex --associate work acceptably... --- lib/PublicInbox/Isearch.pm | 2 ++ lib/PublicInbox/Mbox.pm | 7 +++---- lib/PublicInbox/Search.pm | 8 +++++++- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/lib/PublicInbox/Isearch.pm b/lib/PublicInbox/Isearch.pm index 5cbc36fd..38c80770 100644 --- a/lib/PublicInbox/Isearch.pm +++ b/lib/PublicInbox/Isearch.pm @@ -119,6 +119,8 @@ sub mset_to_smsg { wantarray ? ($mset->get_matches_estimated, \@msgs) : \@msgs; } +sub run_enquire { $_[0]->{es}->run_enquire($_[1], $_[2]) } + sub has_threadid { 1 } sub help { $_[0]->{es}->help } diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index bf61bb0e..258bb3e6 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -187,7 +187,7 @@ sub results_cb { } # refill result set, deprioritize since there's many results my $srch = $ctx->{ibx}->isrch or return $ctx->gone('search'); - my $mset = $srch->mset($ctx->{query}, $ctx->{qopts}); + my $mset = $srch->run_enquire($ctx->{enquire}, $ctx->{qopts}); my $size = $mset->size or return; $ctx->{qopts}->{offset} += $size; $ctx->{ids} = $srch->mset_to_artnums($mset, $ctx->{qopts}); @@ -210,7 +210,7 @@ sub results_thread_cb { # refill result set, deprioritize since there's many results my $srch = $ctx->{ibx}->isrch or return $ctx->gone('search'); - my $mset = $srch->mset($ctx->{query}, $ctx->{qopts}); + my $mset = $srch->run_enquire($ctx->{enquire}, $ctx->{qopts}); my $size = $mset->size or return; $ctx->{qopts}->{offset} += $size; $ctx->{ids} = $srch->mset_to_artnums($mset, $ctx->{qopts}); @@ -240,11 +240,10 @@ sub mbox_all { } $qopts->{threads} = 1 if $q->{t}; $srch->query_approxidate($ctx->{ibx}->git, $q_string); - my $mset = $srch->mset($q_string, $qopts); + (my $mset, $ctx->{enquire}) = $srch->mset($q_string, $qopts); $qopts->{offset} = $mset->size or return [404, [qw(Content-Type text/plain)], ["No results found\n"]]; - $ctx->{query} = $q_string; $ctx->{ids} = $srch->mset_to_artnums($mset, $qopts); require PublicInbox::MboxGz; my $fn; diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index b2de3450..5d19ed9f 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -414,6 +414,11 @@ sub do_enquire { # shared with CodeSearch # on `-tt' ({threads} > 1) which sets the Flagged|Important keyword (($opt->{threads} // 0) == 1 && has_threadid($self)) and $enq->set_collapse_key(THREADID); + run_enquire($self, $enq, $opt); +} + +sub run_enquire { + my ($self, $enq, $opt) = @_; retry_reopen($self, \&enquire_once, $enq, $opt->{offset} || 0, $opt->{limit} || 50); } @@ -450,7 +455,8 @@ sub has_threadid ($) { sub enquire_once { # retry_reopen callback my (undef, $enq, $offset, $limit) = @_; - $enq->get_mset($offset, $limit); + my $mset = $enq->get_mset($offset, $limit); + wantarray ? ($mset, $enq) : $mset; } sub mset_to_smsg {