From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 10/9] extsearchidx: lock eidxq on full --reindex
Date: Wed, 16 Dec 2020 23:04:53 +0000 [thread overview]
Message-ID: <20201216230453.GA12030@dcvr> (raw)
In-Reply-To: <20201216064038.GA4161@dcvr>
Eric Wong <e@80x24.org> wrote:
> Putting eidxq in over.sqlite3 was a bad idea when multiple
> -extindex processes may run :x Nothing fatal or leading to
> index corruption, just some stuff delayed or work needlessly
> repeated.
It's still nice to be able to resume interrupted runs, however.
Final patch in this series from burning SSD hell, I hope...
------------8<------------
Subject: [PATCH] extsearchidx: lock eidxq on full --reindex
Incremental indexing can use the `eidxq' reindexing queue for
handling deletes and resuming interrupted indexing. Ensure
those incremental -extindex invocations do not steal (and
prematurely perform) work that an "-extindex --reindex"
invocation is handling.
---
lib/PublicInbox/ExtSearchIdx.pm | 86 +++++++++++++++++++++++++++++++++
1 file changed, 86 insertions(+)
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index b5024823..f492734a 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -18,6 +18,8 @@ use strict;
use v5.10.1;
use parent qw(PublicInbox::ExtSearch PublicInbox::Lock);
use Carp qw(croak carp);
+use Sys::Hostname qw(hostname);
+use POSIX qw(strftime);
use PublicInbox::Search;
use PublicInbox::SearchIdx qw(crlf_adjust prepare_stack is_ancestor
is_bad_blob);
@@ -524,9 +526,86 @@ sub checkpoint_due ($) {
${$sync->{need_checkpoint}} || (now() > $sync->{next_check});
}
+sub host_ident () {
+ # I've copied FS images and only changed the hostname before,
+ # so prepend hostname. Use `state' since these a BOFH can change
+ # these while this process is running and we always want to be
+ # able to release locks taken by this process.
+ state $retval = hostname . '-' . do {
+ my $m; # machine-id(5) is systemd
+ if (open(my $fh, '<', '/etc/machine-id')) { $m = <$fh> }
+ # hostid(1) is in GNU coreutils, kern.hostid is FreeBSD
+ chomp($m ||= `hostid` || `sysctl -n kern.hostid`);
+ $m;
+ };
+}
+
+sub eidxq_release {
+ my ($self) = @_;
+ my $expect = delete($self->{-eidxq_locked}) or return;
+ my ($owner_pid, undef) = split(/-/, $expect);
+ return if $owner_pid != $$; # shards may fork
+ my $oidx = $self->{oidx};
+ $oidx->begin_lazy;
+ my $cur = $oidx->eidx_meta('eidxq_lock') // '';
+ if ($cur eq $expect) {
+ $oidx->eidx_meta('eidxq_lock', '');
+ return 1;
+ } elsif ($cur ne '') {
+ warn "E: eidxq_lock($expect) stolen by $cur\n";
+ } else {
+ warn "E: eidxq_lock($expect) released by another process\n";
+ }
+ undef;
+}
+
+sub DESTROY {
+ my ($self) = @_;
+ eidxq_release($self) and $self->{oidx}->commit_lazy;
+}
+
+sub _eidxq_take ($) {
+ my ($self) = @_;
+ my $val = "$$-${\time}-$>-".host_ident;
+ $self->{oidx}->eidx_meta('eidxq_lock', $val);
+ $self->{-eidxq_locked} = $val;
+}
+
+sub eidxq_lock_acquire ($) {
+ my ($self) = @_;
+ my $oidx = $self->{oidx};
+ $oidx->begin_lazy;
+ my $cur = $oidx->eidx_meta('eidxq_lock') || return _eidxq_take($self);
+ if (my $locked = $self->{-eidxq_locked}) { # be lazy
+ return $locked if $locked eq $cur;
+ }
+ my ($pid, $time, $euid, $ident) = split(/-/, $cur, 4);
+ my $t = strftime('%Y-%m-%d %k:%M:%S', gmtime($time));
+ if ($euid == $> && $ident eq host_ident) {
+ if (kill(0, $pid)) {
+ warn <<EOM; return;
+I: PID:$pid (re)indexing Xapian since $t, it will continue our work
+EOM
+ }
+ if ($!{ESRCH}) {
+ warn "I: eidxq_lock is stale ($cur), clobbering\n";
+ return _eidxq_take($self);
+ }
+ warn "E: kill(0, $pid) failed: $!\n"; # fall-through:
+ }
+ my $fn = $oidx->dbh->sqlite_db_filename;
+ warn <<EOF;
+W: PID:$pid, UID:$euid on $ident is indexing Xapian since $t
+W: If this is unexpected, delete `eidxq_lock' from the `eidx_meta' table:
+W: sqlite3 $fn 'DELETE FROM eidx_meta WHERE key = "eidxq_lock"'
+EOF
+ undef;
+}
+
sub eidxq_process ($$) { # for reindexing
my ($self, $sync) = @_;
+ return unless eidxq_lock_acquire($self);
my $dbh = $self->{oidx}->dbh;
my $tot = $dbh->selectrow_array('SELECT COUNT(*) FROM eidxq') or return;
${$sync->{nr}} = 0;
@@ -719,6 +798,12 @@ sub _reindex_inbox ($$$) {
sub eidx_reindex {
my ($self, $sync) = @_;
+ # acquire eidxq_lock early because full reindex takes forever
+ # and incremental -extindex processes can run during our checkpoints
+ if (!eidxq_lock_acquire($self)) {
+ warn "E: aborting --reindex\n";
+ return;
+ }
for my $ibx (@{$self->{ibx_list}}) {
_reindex_inbox($self, $sync, $ibx);
last if $sync->{quit};
@@ -769,6 +854,7 @@ sub eidx_sync { # main entry point
$self->{oidx}->rethread_done($opt) unless $sync->{quit};
eidxq_process($self, $sync) unless $sync->{quit};
+ eidxq_release($self);
PublicInbox::V2Writable::done($self);
}
prev parent reply other threads:[~2020-12-16 23:04 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-12-11 3:37 [PATCH] extindex: preliminary --reindex support Eric Wong
2020-12-12 19:53 ` [PATCH 2/1] extindex: reindex: drop stale rows from over.sqlite3 Eric Wong
2020-12-15 2:02 ` [PATCH 0/9] extindex: --reindex support Eric Wong
2020-12-15 2:02 ` [PATCH 1/9] extindex: preliminary " Eric Wong
2020-12-15 2:02 ` [PATCH 2/9] extindex: delete stale messages from over.sqlite3 Eric Wong
2020-12-15 2:02 ` [PATCH 3/9] over: sort xref3 by xnum if ibx_id repeats Eric Wong
2020-12-15 2:02 ` [PATCH 4/9] extindex: support --rethread and content bifurcation Eric Wong
2020-12-15 2:02 ` [PATCH 5/9] extsearchidx: reindex works on Xapian, too Eric Wong
2020-12-15 2:02 ` [PATCH 6/9] extsearchidx: checkpoint releases locks Eric Wong
2020-12-15 2:02 ` [PATCH 7/9] extsearchidx: simplify reindex code paths Eric Wong
2020-12-15 2:02 ` [PATCH 8/9] extsearchidx: reindex releases over.sqlite3 handles properly Eric Wong
2020-12-15 2:02 ` [PATCH 9/9] searchidxshard: simplify newline elimination Eric Wong
2020-12-16 6:40 ` [PATCH 0/9] extindex: --reindex support Eric Wong
2020-12-16 23:04 ` Eric Wong [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201216230453.GA12030@dcvr \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).