From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id DB3941F8C7 for ; Wed, 23 Jun 2021 11:14:22 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 1/3] search: make xap_terms easier-to-use and use it more Date: Wed, 23 Jun 2021 07:14:20 -0400 Message-Id: <20210623111422.30182-2-e@80x24.org> In-Reply-To: <20210623111422.30182-1-e@80x24.org> References: <20210623111422.30182-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This allows us to simplify callers throughout, and exceptions are can no longer be silently hidden. MiscSearch now uses xap_terms for looking up eidx_key terms for a code reduction. We also simplify LeiStore->_msg_kw for runtime use by moving the MsetIterator handling into t/lei_store.t test case. --- lib/PublicInbox/LeiSearch.pm | 16 +++++++--------- lib/PublicInbox/LeiXSearch.pm | 4 ++-- lib/PublicInbox/MiscSearch.pm | 23 +++++++---------------- lib/PublicInbox/Search.pm | 22 +++++++++------------- lib/PublicInbox/SearchIdx.pm | 5 +++-- t/lei_store.t | 3 ++- 6 files changed, 30 insertions(+), 43 deletions(-) diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm index d0963e92..06ea6299 100644 --- a/lib/PublicInbox/LeiSearch.pm +++ b/lib/PublicInbox/LeiSearch.pm @@ -19,16 +19,13 @@ sub num2docid ($$) { } sub _msg_kw { # retry_reopen callback - my ($self, $num) = @_; # num_or_mitem - my $xdb = $self->xdb; # set {nshard}; - my $docid = ref($num) ? $num->get_docid : num2docid($self, $num); - my $kw = xap_terms('K', $xdb, $docid); - warn "E: #$docid ($num): $@\n" if $@; - wantarray ? sort(keys(%$kw)) : $kw; + my ($self, $num) = @_; + my $xdb = $self->xdb; # set {nshard} for num2docid; + xap_terms('K', $xdb, num2docid($self, $num)); } -sub msg_keywords { - my ($self, $num) = @_; # num_or_mitem +sub msg_keywords { # array or hashref + my ($self, $num) = @_; $self->retry_reopen(\&_msg_kw, $num); } @@ -138,7 +135,8 @@ sub kw_changed { $docids //= []; @$docids = sort { $a <=> $b } values %$xoids; } - my $cur_kw = msg_keywords($self, $docids->[0]); + my $cur_kw = eval { msg_keywords($self, $docids->[0]) }; + die "E: #$docids->[0] keyword lookup failure: $@\n" if $@; # RFC 5550 sec 5.9 on the $Forwarded keyword states: # "Once set, the flag SHOULD NOT be cleared" diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index beb955bb..cac7fb7d 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -71,11 +71,11 @@ sub _mitem_kw { # retry_reopen callback my $doc = $mitem->get_document; my $kw = xap_terms('K', $doc); $kw->{flagged} = 1 if $flagged; - my $L = xap_terms('L', $doc); + my @L = xap_terms('L', $doc); # we keep the empty {kw} array here to prevent expensive work in # ->xsmsg_vmd, _unbless_smsg will clobber it iff it's empty $smsg->{kw} = [ sort keys %$kw ]; - $smsg->{L} = [ sort keys %$L ] if scalar(keys %$L); + $smsg->{L} = \@L if scalar(@L); } sub mitem_kw ($$$;$) { diff --git a/lib/PublicInbox/MiscSearch.pm b/lib/PublicInbox/MiscSearch.pm index ead9a278..4e010453 100644 --- a/lib/PublicInbox/MiscSearch.pm +++ b/lib/PublicInbox/MiscSearch.pm @@ -5,7 +5,7 @@ package PublicInbox::MiscSearch; use strict; use v5.10.1; -use PublicInbox::Search qw(retry_reopen int_val); +use PublicInbox::Search qw(retry_reopen int_val xap_terms); my $json; # Xapian value columns: @@ -90,15 +90,10 @@ sub ibx_matches_once { # retry_reopen callback while (1) { my $mset = misc_enquire_once($self, $qr, $opt); for my $mi ($mset->items) { - my $doc = $mi->get_document; - my $end = $doc->termlist_end; - my $cur = $doc->termlist_begin; - $cur->skip_to('Q'); - if ($cur != $end) { - my $ng = $cur->get_termname; # eidx_key - $ng =~ s/\AQ// or warn "BUG: no `Q': $ng"; - if (my $ibx = $by_newsgroup->{$ng}) { - $ret->{$ng} = $ibx; + my ($eidx_key) = xap_terms('Q', $mi->get_document); + if (defined($eidx_key)) { + if (my $ibx = $by_newsgroup->{$eidx_key}) { + $ret->{$eidx_key} = $ibx; } } else { warn <termlist_end; - my $cur = $doc->termlist_begin; - $cur->skip_to('Q'); - return if $cur == $end; - my $eidx_key = $cur->get_termname; - $eidx_key =~ s/\AQ// or return; # expired + my ($eidx_key) = xap_terms('Q', $doc); + return unless defined($eidx_key); # expired my $ce = $cache->{$eidx_key} = {}; $ce->{uidvalidity} = int_val($doc, $UIDVALIDITY); $ce->{-modified} = int_val($doc, $MODIFIED); diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 59a5a3b0..7e19e616 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -557,19 +557,15 @@ sub get_pct ($) { # mset item sub xap_terms ($$;@) { my ($pfx, $xdb_or_doc, @docid) = @_; # @docid may be empty () my %ret; - eval { - my $end = $xdb_or_doc->termlist_end(@docid); - my $cur = $xdb_or_doc->termlist_begin(@docid); - for (; $cur != $end; $cur++) { - $cur->skip_to($pfx); - last if $cur == $end; - my $tn = $cur->get_termname; - if (index($tn, $pfx) == 0) { - $ret{substr($tn, length($pfx))} = undef; - } - } - }; - \%ret; + my $end = $xdb_or_doc->termlist_end(@docid); + my $cur = $xdb_or_doc->termlist_begin(@docid); + for (; $cur != $end; $cur++) { + $cur->skip_to($pfx); + last if $cur == $end; + my $tn = $cur->get_termname; + $ret{substr($tn, length($pfx))} = undef if !index($tn, $pfx); + } + wantarray ? sort(keys(%ret)) : \%ret; } 1; diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index f553eda6..65764cc8 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -435,8 +435,9 @@ sub add_xapian ($$$$) { if (my $old = $merge_vmd ? _get_doc($self, $smsg->{num}) : undef) { my @x = @VMD_MAP; while (my ($field, $pfx) = splice(@x, 0, 2)) { - my $vals = xap_terms($pfx, $old); - $doc->add_boolean_term($pfx.$_) for keys %$vals; + for my $term (xap_terms($pfx, $old)) { + $doc->add_boolean_term($pfx.$term); + } } } $self->{xdb}->replace_document($smsg->{num}, $doc); diff --git a/t/lei_store.t b/t/lei_store.t index db94f6da..73b5c74d 100644 --- a/t/lei_store.t +++ b/t/lei_store.t @@ -31,7 +31,8 @@ $sto->done; is($mset->size, 1, 'search works'); is_deeply($es->mset_to_artnums($mset), [ $msgs->[0]->{num} ], 'mset_to_artnums'); - my @kw = $es->msg_keywords(($mset->items)[0]); + my $mi = ($mset->items)[0]; + my @kw = PublicInbox::Search::xap_terms('K', $mi->get_document); is_deeply(\@kw, [], 'no flags'); }