From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 9482B1F5AE for ; Fri, 28 May 2021 19:47:23 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] lei: retry_reopen on read-only Xapian access Date: Fri, 28 May 2021 19:47:23 +0000 Message-Id: <20210528194723.24011-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Xapian DBs may be modified by a parallel process while we're reading it, and Xapian's MVCC model places the burden on readers to retry operations. We'll also have retry_reopen croak instead of die on errors, which ought to help us track down some "Document not found" errors I've occasionally seen when using "lei ". --- lib/PublicInbox/LeiSearch.pm | 33 ++++++++++++++++++++++++--------- lib/PublicInbox/LeiXSearch.pm | 21 ++++++++++++++------- lib/PublicInbox/Search.pm | 7 ++++--- 3 files changed, 42 insertions(+), 19 deletions(-) diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm index 9297d060..b09d1e45 100644 --- a/lib/PublicInbox/LeiSearch.pm +++ b/lib/PublicInbox/LeiSearch.pm @@ -18,7 +18,7 @@ sub num2docid ($$) { ($num - 1) * $nshard + $num % $nshard + 1; } -sub msg_keywords { +sub _msg_kw { # retry_reopen callback my ($self, $num) = @_; # num_or_mitem my $xdb = $self->xdb; # set {nshard}; my $docid = ref($num) ? $num->get_docid : num2docid($self, $num); @@ -27,13 +27,16 @@ sub msg_keywords { wantarray ? sort(keys(%$kw)) : $kw; } -# returns undef if blob is unknown -sub oid_keywords { - my ($self, $oidhex) = @_; - my @num = $self->over->blob_exists($oidhex) or return; +sub msg_keywords { + my ($self, $num) = @_; # num_or_mitem + $self->retry_reopen(\&_msg_kw, $num); +} + +sub _oid_kw { # retry_reopen callback + my ($self, $nums) = @_; my $xdb = $self->xdb; # set {nshard}; my %kw; - for my $num (@num) { # there should only be one... + for my $num (@$nums) { # there should only be one... my $doc = $xdb->get_document(num2docid($self, $num)); my $x = xap_terms('K', $doc); %kw = (%kw, %$x); @@ -41,10 +44,15 @@ sub oid_keywords { \%kw; } -# lookup keywords+labels for external messages -sub xsmsg_vmd { +# returns undef if blob is unknown +sub oid_keywords { + my ($self, $oidhex) = @_; + my @num = $self->over->blob_exists($oidhex) or return; + $self->retry_reopen(\&_oid_kw, \@num); +} + +sub _xsmsg_vmd { # retry_reopen my ($self, $smsg, $want_label) = @_; - return if $smsg->{kw}; my $xdb = $self->xdb; # set {nshard}; my (%kw, %L, $doc, $x); $kw{flagged} = 1 if delete($smsg->{lei_q_tt_flagged}); @@ -62,6 +70,13 @@ sub xsmsg_vmd { $smsg->{L} = [ sort keys %L ] if scalar(keys(%L)); } +# lookup keywords+labels for external messages +sub xsmsg_vmd { + my ($self, $smsg, $want_label) = @_; + return if $smsg->{kw}; # already set by LeiXSearch->mitem_kw + $self->retry_reopen(\&_xsmsg_vmd, $smsg, $want_label); +} + # when a message has no Message-IDs at all, this is needed for # unsent Draft messages, at least sub content_key ($) { diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index 760f9718..2e548a7a 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -67,17 +67,23 @@ sub remotes { @{$_[0]->{remotes} // []} } # called by PublicInbox::Search::xdb (usually via ->mset) sub xdb_shards_flat { @{$_[0]->{shards_flat} // []} } -sub mitem_kw ($$;$) { - my ($smsg, $mitem, $flagged) = @_; - my $kw = xap_terms('K', my $doc = $mitem->get_document); +sub _mitem_kw { # retry_reopen callback + my ($srch, $smsg, $mitem, $flagged) = @_; + my $doc = $mitem->get_document; + my $kw = xap_terms('K', $doc); $kw->{flagged} = 1 if $flagged; + my $L = xap_terms('L', $doc); # we keep the empty {kw} array here to prevent expensive work in # ->xsmsg_vmd, _unbless_smsg will clobber it iff it's empty $smsg->{kw} = [ sort keys %$kw ]; - my $L = xap_terms('L', $doc); $smsg->{L} = [ sort keys %$L ] if scalar(keys %$L); } +sub mitem_kw ($$$;$) { + my ($srch, $smsg, $mitem, $flagged) = @_; + $srch->retry_reopen(\&_mitem_kw, $smsg, $mitem, $flagged); +} + # like over->get_art sub smsg_for { my ($self, $mitem) = @_; @@ -90,7 +96,7 @@ sub smsg_for { my $smsg = $ibx->over->get_art($num); return if $smsg->{bytes} == 0; # external message if ($ibx->can('msg_keywords')) { - mitem_kw($smsg, $mitem); + mitem_kw($self, $smsg, $mitem); } $smsg; } @@ -194,7 +200,8 @@ sub query_one_mset { # for --threads and l2m w/o sort my $mitem = delete $n2item{$n}; next if $smsg->{bytes} == 0; if ($mitem && $can_kw) { - mitem_kw($smsg, $mitem, $fl); + mitem_kw($srch, $smsg, $mitem, + $fl); } elsif ($mitem && $fl) { # call ->xsmsg_vmd, later $smsg->{lei_q_tt_flagged} = 1; @@ -210,7 +217,7 @@ sub query_one_mset { # for --threads and l2m w/o sort my $mitem = $items[$i++]; my $smsg = $over->get_art($n) or next; next if $smsg->{bytes} == 0; - mitem_kw($smsg, $mitem, $fl) if $can_kw; + mitem_kw($srch, $smsg, $mitem, $fl) if $can_kw; $each_smsg->($smsg, $mitem); } } diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index fbcff2c3..59a5a3b0 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -9,6 +9,7 @@ use parent qw(Exporter); our @EXPORT_OK = qw(retry_reopen int_val get_pct xap_terms); use List::Util qw(max); use POSIX qw(strftime); +use Carp (); # values for searching, changing the numeric value breaks # compatibility with old indices (so don't change them it) @@ -405,16 +406,16 @@ sub retry_reopen { # Exception: The revision being read has been discarded - # you should call Xapian::Database::reopen() if (ref($@) =~ /\bDatabaseModifiedError\b/) { - warn "reopen try #$i on $@\n"; + warn "# reopen try #$i on $@\n"; reopen($self); } else { # let caller decide how to spew, because ExtMsg queries # get wonky and trigger: # "something terrible happened at .../Xapian/Enquire.pm" - die; + Carp::croak($@); } } - die "Too many Xapian database modifications in progress\n"; + Carp::croak("Too many Xapian database modifications in progress\n"); } sub _do_enquire {