unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH] lei: retry_reopen on read-only Xapian access
@ 2021-05-28 19:47 Eric Wong
  0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2021-05-28 19:47 UTC (permalink / raw)
  To: meta

Xapian DBs may be modified by a parallel process while we're
reading it, and Xapian's MVCC model places the burden on readers
to retry operations.

We'll also have retry_reopen croak instead of die on errors,
which ought to help us track down some "Document not found"
errors I've occasionally seen when using "lei <q|up>".
---
 lib/PublicInbox/LeiSearch.pm  | 33 ++++++++++++++++++++++++---------
 lib/PublicInbox/LeiXSearch.pm | 21 ++++++++++++++-------
 lib/PublicInbox/Search.pm     |  7 ++++---
 3 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm
index 9297d060..b09d1e45 100644
--- a/lib/PublicInbox/LeiSearch.pm
+++ b/lib/PublicInbox/LeiSearch.pm
@@ -18,7 +18,7 @@ sub num2docid ($$) {
 	($num - 1) * $nshard + $num % $nshard + 1;
 }
 
-sub msg_keywords {
+sub _msg_kw { # retry_reopen callback
 	my ($self, $num) = @_; # num_or_mitem
 	my $xdb = $self->xdb; # set {nshard};
 	my $docid = ref($num) ? $num->get_docid : num2docid($self, $num);
@@ -27,13 +27,16 @@ sub msg_keywords {
 	wantarray ? sort(keys(%$kw)) : $kw;
 }
 
-# returns undef if blob is unknown
-sub oid_keywords {
-	my ($self, $oidhex) = @_;
-	my @num = $self->over->blob_exists($oidhex) or return;
+sub msg_keywords {
+	my ($self, $num) = @_; # num_or_mitem
+	$self->retry_reopen(\&_msg_kw, $num);
+}
+
+sub _oid_kw { # retry_reopen callback
+	my ($self, $nums) = @_;
 	my $xdb = $self->xdb; # set {nshard};
 	my %kw;
-	for my $num (@num) { # there should only be one...
+	for my $num (@$nums) { # there should only be one...
 		my $doc = $xdb->get_document(num2docid($self, $num));
 		my $x = xap_terms('K', $doc);
 		%kw = (%kw, %$x);
@@ -41,10 +44,15 @@ sub oid_keywords {
 	\%kw;
 }
 
-# lookup keywords+labels for external messages
-sub xsmsg_vmd {
+# returns undef if blob is unknown
+sub oid_keywords {
+	my ($self, $oidhex) = @_;
+	my @num = $self->over->blob_exists($oidhex) or return;
+	$self->retry_reopen(\&_oid_kw, \@num);
+}
+
+sub _xsmsg_vmd { # retry_reopen
 	my ($self, $smsg, $want_label) = @_;
-	return if $smsg->{kw};
 	my $xdb = $self->xdb; # set {nshard};
 	my (%kw, %L, $doc, $x);
 	$kw{flagged} = 1 if delete($smsg->{lei_q_tt_flagged});
@@ -62,6 +70,13 @@ sub xsmsg_vmd {
 	$smsg->{L} = [ sort keys %L ] if scalar(keys(%L));
 }
 
+# lookup keywords+labels for external messages
+sub xsmsg_vmd {
+	my ($self, $smsg, $want_label) = @_;
+	return if $smsg->{kw}; # already set by LeiXSearch->mitem_kw
+	$self->retry_reopen(\&_xsmsg_vmd, $smsg, $want_label);
+}
+
 # when a message has no Message-IDs at all, this is needed for
 # unsent Draft messages, at least
 sub content_key ($) {
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 760f9718..2e548a7a 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -67,17 +67,23 @@ sub remotes { @{$_[0]->{remotes} // []} }
 # called by PublicInbox::Search::xdb (usually via ->mset)
 sub xdb_shards_flat { @{$_[0]->{shards_flat} // []} }
 
-sub mitem_kw ($$;$) {
-	my ($smsg, $mitem, $flagged) = @_;
-	my $kw = xap_terms('K', my $doc = $mitem->get_document);
+sub _mitem_kw { # retry_reopen callback
+	my ($srch, $smsg, $mitem, $flagged) = @_;
+	my $doc = $mitem->get_document;
+	my $kw = xap_terms('K', $doc);
 	$kw->{flagged} = 1 if $flagged;
+	my $L = xap_terms('L', $doc);
 	# we keep the empty {kw} array here to prevent expensive work in
 	# ->xsmsg_vmd, _unbless_smsg will clobber it iff it's empty
 	$smsg->{kw} = [ sort keys %$kw ];
-	my $L = xap_terms('L', $doc);
 	$smsg->{L} = [ sort keys %$L ] if scalar(keys %$L);
 }
 
+sub mitem_kw ($$$;$) {
+	my ($srch, $smsg, $mitem, $flagged) = @_;
+	$srch->retry_reopen(\&_mitem_kw, $smsg, $mitem, $flagged);
+}
+
 # like over->get_art
 sub smsg_for {
 	my ($self, $mitem) = @_;
@@ -90,7 +96,7 @@ sub smsg_for {
 	my $smsg = $ibx->over->get_art($num);
 	return if $smsg->{bytes} == 0; # external message
 	if ($ibx->can('msg_keywords')) {
-		mitem_kw($smsg, $mitem);
+		mitem_kw($self, $smsg, $mitem);
 	}
 	$smsg;
 }
@@ -194,7 +200,8 @@ sub query_one_mset { # for --threads and l2m w/o sort
 					my $mitem = delete $n2item{$n};
 					next if $smsg->{bytes} == 0;
 					if ($mitem && $can_kw) {
-						mitem_kw($smsg, $mitem, $fl);
+						mitem_kw($srch, $smsg, $mitem,
+							$fl);
 					} elsif ($mitem && $fl) {
 						# call ->xsmsg_vmd, later
 						$smsg->{lei_q_tt_flagged} = 1;
@@ -210,7 +217,7 @@ sub query_one_mset { # for --threads and l2m w/o sort
 				my $mitem = $items[$i++];
 				my $smsg = $over->get_art($n) or next;
 				next if $smsg->{bytes} == 0;
-				mitem_kw($smsg, $mitem, $fl) if $can_kw;
+				mitem_kw($srch, $smsg, $mitem, $fl) if $can_kw;
 				$each_smsg->($smsg, $mitem);
 			}
 		}
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index fbcff2c3..59a5a3b0 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -9,6 +9,7 @@ use parent qw(Exporter);
 our @EXPORT_OK = qw(retry_reopen int_val get_pct xap_terms);
 use List::Util qw(max);
 use POSIX qw(strftime);
+use Carp ();
 
 # values for searching, changing the numeric value breaks
 # compatibility with old indices (so don't change them it)
@@ -405,16 +406,16 @@ sub retry_reopen {
 		# Exception: The revision being read has been discarded -
 		# you should call Xapian::Database::reopen()
 		if (ref($@) =~ /\bDatabaseModifiedError\b/) {
-			warn "reopen try #$i on $@\n";
+			warn "# reopen try #$i on $@\n";
 			reopen($self);
 		} else {
 			# let caller decide how to spew, because ExtMsg queries
 			# get wonky and trigger:
 			# "something terrible happened at .../Xapian/Enquire.pm"
-			die;
+			Carp::croak($@);
 		}
 	}
-	die "Too many Xapian database modifications in progress\n";
+	Carp::croak("Too many Xapian database modifications in progress\n");
 }
 
 sub _do_enquire {

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2021-05-28 19:47 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-28 19:47 [PATCH] lei: retry_reopen on read-only Xapian access Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).