From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id C3BEE20603 for ; Thu, 23 May 2019 09:37:08 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 11/26] xcpdb: implement using Perl bindings Date: Thu, 23 May 2019 09:36:49 +0000 Message-Id: <20190523093704.18367-12-e@80x24.org> In-Reply-To: <20190523093704.18367-1-e@80x24.org> References: <20190523093704.18367-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: By avoid copydatabase(1) entirely, we can make further changes to avoid locking the entire inbox for a long operation and switch to fine-grained locking. --- lib/PublicInbox/Xapcmd.pm | 77 +++++++++++++++++++++++++++++++++++++-- script/public-inbox-xcpdb | 2 +- 2 files changed, 75 insertions(+), 4 deletions(-) diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm index 81e2f10..ca74ea0 100644 --- a/lib/PublicInbox/Xapcmd.pm +++ b/lib/PublicInbox/Xapcmd.pm @@ -24,15 +24,36 @@ sub commit_changes ($$$) { remove_tree("$old/old") or die "failed to remove $old/old: $!\n"; } +sub xspawn { + my ($cmd, $env, $opt) = @_; + if (ref($cmd->[0]) eq 'CODE') { + my $cb = shift(@$cmd); # $cb = cpdb() + defined(my $pid = fork) or die "fork: $!"; + return $pid if $pid > 0; + eval { $cb->($cmd, $env, $opt) }; + die $@ if $@; + exit 0; + } else { + spawn($cmd, $env, $opt); + } +} + sub run { my ($ibx, $cmd, $env, $opt) = @_; $opt ||= {}; my $dir = $ibx->{mainrepo} or die "no mainrepo in inbox\n"; - which($cmd->[0]) or die "$cmd->[0] not found in PATH\n"; + my $exe = $cmd->[0]; + my $pfx = $exe; + if (ref($exe) eq 'CODE') { + $pfx = 'CODE'; + require Search::Xapian::WritableDatabase; + } else { + which($exe) or die "$exe not found in PATH\n"; + } $ibx->umask_prepare; my $old = $ibx->search->xdir(1); -d $old or die "$old does not exist\n"; - my $new = tempdir($cmd->[0].'-XXXXXXXX', DIR => $dir); + my $new = tempdir("$pfx-XXXXXXXX", DIR => $dir); my $v = $ibx->{version} || 1; my @cmds; if ($v == 1) { @@ -58,7 +79,7 @@ sub run { while (@cmds) { while (scalar(keys(%pids)) < $max && scalar(@cmds)) { my $x = shift @cmds; - $pids{spawn($x, $env, $opt)} = $x; + $pids{xspawn($x, $env, $opt)} = $x; } while (scalar keys %pids) { @@ -71,4 +92,54 @@ sub run { }); } +sub cpdb_retryable ($$) { + my ($src, $err) = @_; + if (ref($err) eq 'Search::Xapian::DatabaseModifiedError') { + warn "$err, reopening and retrying\n"; + $src->reopen; + return 1; + } + die $err if $err; + 0; +} + +sub cpdb { + my ($args, $env, $opt) = @_; + my ($old, $new) = @$args; + my $src = Search::Xapian::Database->new($old); + + # like copydatabase(1), be sure we don't overwrite anything in case + # of other bugs: + my $creat = Search::Xapian::DB_CREATE(); + my $dst = Search::Xapian::WritableDatabase->new($new, $creat); + my ($it, $end); + + do { + eval { + # update the only metadata key for v1: + my $lc = $src->get_metadata('last_commit'); + $dst->set_metadata('last_commit', $lc) if $lc; + + $it = $src->postlist_begin(''); + $end = $src->postlist_end(''); + }; + } while (cpdb_retryable($src, $@)); + + do { + eval { + while ($it != $end) { + my $docid = $it->get_docid; + my $doc = $src->get_document($docid); + $dst->replace_document($docid, $doc); + $it->inc; + } + + # unlike copydatabase(1), we don't copy spelling + # and synonym data (or other user metadata) since + # the Perl APIs don't expose iterators for them + # (and public-inbox does not use those features) + }; + } while (cpdb_retryable($src, $@)); +} + 1; diff --git a/script/public-inbox-xcpdb b/script/public-inbox-xcpdb index cbf9f55..d494991 100755 --- a/script/public-inbox-xcpdb +++ b/script/public-inbox-xcpdb @@ -8,7 +8,7 @@ use PublicInbox::Admin; PublicInbox::Admin::require_or_die('-search'); my $usage = "Usage: public-inbox-xcpdb INBOX_DIR\n"; my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV) or die $usage; -my $cmd = [qw(copydatabase --no-renumber)]; +my $cmd = [ \&PublicInbox::Xapcmd::cpdb ]; open my $null, '>', '/dev/null' or die "failed to open /dev/null: $!\n"; my $rdr = { 1 => fileno($null) }; foreach (@ibxs) { -- EW