* [PATCH] lei: retry_reopen on read-only Xapian access
@ 2021-05-28 19:47 Eric Wong
0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2021-05-28 19:47 UTC (permalink / raw)
To: meta
Xapian DBs may be modified by a parallel process while we're
reading it, and Xapian's MVCC model places the burden on readers
to retry operations.
We'll also have retry_reopen croak instead of die on errors,
which ought to help us track down some "Document not found"
errors I've occasionally seen when using "lei <q|up>".
---
lib/PublicInbox/LeiSearch.pm | 33 ++++++++++++++++++++++++---------
lib/PublicInbox/LeiXSearch.pm | 21 ++++++++++++++-------
lib/PublicInbox/Search.pm | 7 ++++---
3 files changed, 42 insertions(+), 19 deletions(-)
diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm
index 9297d060..b09d1e45 100644
--- a/lib/PublicInbox/LeiSearch.pm
+++ b/lib/PublicInbox/LeiSearch.pm
@@ -18,7 +18,7 @@ sub num2docid ($$) {
($num - 1) * $nshard + $num % $nshard + 1;
}
-sub msg_keywords {
+sub _msg_kw { # retry_reopen callback
my ($self, $num) = @_; # num_or_mitem
my $xdb = $self->xdb; # set {nshard};
my $docid = ref($num) ? $num->get_docid : num2docid($self, $num);
@@ -27,13 +27,16 @@ sub msg_keywords {
wantarray ? sort(keys(%$kw)) : $kw;
}
-# returns undef if blob is unknown
-sub oid_keywords {
- my ($self, $oidhex) = @_;
- my @num = $self->over->blob_exists($oidhex) or return;
+sub msg_keywords {
+ my ($self, $num) = @_; # num_or_mitem
+ $self->retry_reopen(\&_msg_kw, $num);
+}
+
+sub _oid_kw { # retry_reopen callback
+ my ($self, $nums) = @_;
my $xdb = $self->xdb; # set {nshard};
my %kw;
- for my $num (@num) { # there should only be one...
+ for my $num (@$nums) { # there should only be one...
my $doc = $xdb->get_document(num2docid($self, $num));
my $x = xap_terms('K', $doc);
%kw = (%kw, %$x);
@@ -41,10 +44,15 @@ sub oid_keywords {
\%kw;
}
-# lookup keywords+labels for external messages
-sub xsmsg_vmd {
+# returns undef if blob is unknown
+sub oid_keywords {
+ my ($self, $oidhex) = @_;
+ my @num = $self->over->blob_exists($oidhex) or return;
+ $self->retry_reopen(\&_oid_kw, \@num);
+}
+
+sub _xsmsg_vmd { # retry_reopen
my ($self, $smsg, $want_label) = @_;
- return if $smsg->{kw};
my $xdb = $self->xdb; # set {nshard};
my (%kw, %L, $doc, $x);
$kw{flagged} = 1 if delete($smsg->{lei_q_tt_flagged});
@@ -62,6 +70,13 @@ sub xsmsg_vmd {
$smsg->{L} = [ sort keys %L ] if scalar(keys(%L));
}
+# lookup keywords+labels for external messages
+sub xsmsg_vmd {
+ my ($self, $smsg, $want_label) = @_;
+ return if $smsg->{kw}; # already set by LeiXSearch->mitem_kw
+ $self->retry_reopen(\&_xsmsg_vmd, $smsg, $want_label);
+}
+
# when a message has no Message-IDs at all, this is needed for
# unsent Draft messages, at least
sub content_key ($) {
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 760f9718..2e548a7a 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -67,17 +67,23 @@ sub remotes { @{$_[0]->{remotes} // []} }
# called by PublicInbox::Search::xdb (usually via ->mset)
sub xdb_shards_flat { @{$_[0]->{shards_flat} // []} }
-sub mitem_kw ($$;$) {
- my ($smsg, $mitem, $flagged) = @_;
- my $kw = xap_terms('K', my $doc = $mitem->get_document);
+sub _mitem_kw { # retry_reopen callback
+ my ($srch, $smsg, $mitem, $flagged) = @_;
+ my $doc = $mitem->get_document;
+ my $kw = xap_terms('K', $doc);
$kw->{flagged} = 1 if $flagged;
+ my $L = xap_terms('L', $doc);
# we keep the empty {kw} array here to prevent expensive work in
# ->xsmsg_vmd, _unbless_smsg will clobber it iff it's empty
$smsg->{kw} = [ sort keys %$kw ];
- my $L = xap_terms('L', $doc);
$smsg->{L} = [ sort keys %$L ] if scalar(keys %$L);
}
+sub mitem_kw ($$$;$) {
+ my ($srch, $smsg, $mitem, $flagged) = @_;
+ $srch->retry_reopen(\&_mitem_kw, $smsg, $mitem, $flagged);
+}
+
# like over->get_art
sub smsg_for {
my ($self, $mitem) = @_;
@@ -90,7 +96,7 @@ sub smsg_for {
my $smsg = $ibx->over->get_art($num);
return if $smsg->{bytes} == 0; # external message
if ($ibx->can('msg_keywords')) {
- mitem_kw($smsg, $mitem);
+ mitem_kw($self, $smsg, $mitem);
}
$smsg;
}
@@ -194,7 +200,8 @@ sub query_one_mset { # for --threads and l2m w/o sort
my $mitem = delete $n2item{$n};
next if $smsg->{bytes} == 0;
if ($mitem && $can_kw) {
- mitem_kw($smsg, $mitem, $fl);
+ mitem_kw($srch, $smsg, $mitem,
+ $fl);
} elsif ($mitem && $fl) {
# call ->xsmsg_vmd, later
$smsg->{lei_q_tt_flagged} = 1;
@@ -210,7 +217,7 @@ sub query_one_mset { # for --threads and l2m w/o sort
my $mitem = $items[$i++];
my $smsg = $over->get_art($n) or next;
next if $smsg->{bytes} == 0;
- mitem_kw($smsg, $mitem, $fl) if $can_kw;
+ mitem_kw($srch, $smsg, $mitem, $fl) if $can_kw;
$each_smsg->($smsg, $mitem);
}
}
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index fbcff2c3..59a5a3b0 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -9,6 +9,7 @@ use parent qw(Exporter);
our @EXPORT_OK = qw(retry_reopen int_val get_pct xap_terms);
use List::Util qw(max);
use POSIX qw(strftime);
+use Carp ();
# values for searching, changing the numeric value breaks
# compatibility with old indices (so don't change them it)
@@ -405,16 +406,16 @@ sub retry_reopen {
# Exception: The revision being read has been discarded -
# you should call Xapian::Database::reopen()
if (ref($@) =~ /\bDatabaseModifiedError\b/) {
- warn "reopen try #$i on $@\n";
+ warn "# reopen try #$i on $@\n";
reopen($self);
} else {
# let caller decide how to spew, because ExtMsg queries
# get wonky and trigger:
# "something terrible happened at .../Xapian/Enquire.pm"
- die;
+ Carp::croak($@);
}
}
- die "Too many Xapian database modifications in progress\n";
+ Carp::croak("Too many Xapian database modifications in progress\n");
}
sub _do_enquire {
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2021-05-28 19:47 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-28 19:47 [PATCH] lei: retry_reopen on read-only Xapian access Eric Wong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).