From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 6FD941FC97 for ; Fri, 27 Nov 2020 09:52:55 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 06/12] miscsearch: implement ->newsgroup_matches Date: Fri, 27 Nov 2020 09:52:48 +0000 Message-Id: <20201127095254.21624-7-e@80x24.org> In-Reply-To: <20201127095254.21624-1-e@80x24.org> References: <20201127095254.21624-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This may be used to speed up newsgroup searches down-the-line, but the grep perlop isn't too shabby, at the moment. --- lib/PublicInbox/MiscSearch.pm | 40 +++++++++++++++++++++++++++++++++++ t/extsearch.t | 4 ++++ 2 files changed, 44 insertions(+) diff --git a/lib/PublicInbox/MiscSearch.pm b/lib/PublicInbox/MiscSearch.pm index 48ef6914..f2e31443 100644 --- a/lib/PublicInbox/MiscSearch.pm +++ b/lib/PublicInbox/MiscSearch.pm @@ -76,6 +76,46 @@ sub mset { retry_reopen($self, \&misc_enquire_once, $qr, $opt); } +sub ibx_matches_once { # retry_reopen callback + my ($self, $qr, $by_newsgroup) = @_; + # double in case no newsgroups are configured: + my $limit = scalar(keys %$by_newsgroup) * 2; + my $opt = { limit => $limit, offset => 0, relevance => -1 }; + my $ret = {}; # newsgroup => $ibx of matches + while (1) { + my $mset = misc_enquire_once($self, $qr, $opt); + for my $mi ($mset->items) { + my $doc = $mi->get_document; + my $end = $doc->termlist_end; + my $cur = $doc->termlist_begin; + $cur->skip_to('Q'); + if ($cur != $end) { + my $ng = $cur->get_termname; # eidx_key + $ng =~ s/\AQ// or warn "BUG: no `Q': $ng"; + if (my $ibx = $by_newsgroup->{$ng}) { + $ret->{$ng} = $ibx; + } + } else { + warn <get_docid} has no `Q' (eidx_key) term +EOF + } + } + my $nr = $mset->size; + return $ret if $nr < $limit; + $opt->{offset} += $nr; + } +} + +# returns a newsgroup => PublicInbox::Inbox mapping +sub newsgroup_matches { + my ($self, $qs, $pi_cfg) = @_; + my $qp = $self->{qp} //= mi_qp_new($self); + $qs .= ' type:inbox'; + my $qr = $qp->parse_query($qs, $PublicInbox::Search::QP_FLAGS); + retry_reopen($self, \&ibx_matches_once, $qr, $pi_cfg->{-by_newsgroup}); +} + sub ibx_data_once { my ($self, $ibx) = @_; my $xdb = $self->{xdb}; diff --git a/t/extsearch.t b/t/extsearch.t index 0045294b..85cdf74a 100644 --- a/t/extsearch.t +++ b/t/extsearch.t @@ -79,5 +79,9 @@ my @it = $misc->mset('')->items; is(scalar(@it), 2, 'two inboxes'); like($it[0]->get_document->get_data, qr/v2test/, 'docdata matched v2'); like($it[1]->get_document->get_data, qr/v1test/, 'docdata matched v1'); +my $pi_cfg = PublicInbox::Config->new; +$pi_cfg->fill_all; +my $ret = $misc->newsgroup_matches('', $pi_cfg); +is_deeply($pi_cfg->{-by_newsgroup}, $ret, '->newsgroup_matches'); done_testing;