From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id D94271FA12 for ; Wed, 12 Aug 2020 09:17:19 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 5/6] xcpdb: wire up new index options and --help Date: Wed, 12 Aug 2020 09:17:18 +0000 Message-Id: <20200812091719.13739-6-e@yhbt.net> In-Reply-To: <20200812091719.13739-1-e@yhbt.net> References: <20200812091719.13739-1-e@yhbt.net> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: --sequential-shard also disables the copy parallelism (--jobs), so it can be useful for systems unable to handle parallel random I/O but still want many shards. There was a missing "use strict", too, which is fixed. --- Documentation/public-inbox-xcpdb.pod | 19 +++++++- lib/PublicInbox/Xapcmd.pm | 3 +- script/public-inbox-xcpdb | 66 +++++++++++++++++++++++----- 3 files changed, 75 insertions(+), 13 deletions(-) diff --git a/Documentation/public-inbox-xcpdb.pod b/Documentation/public-inbox-xcpdb.pod index 2ed4c5821..62a28c0a1 100644 --- a/Documentation/public-inbox-xcpdb.pod +++ b/Documentation/public-inbox-xcpdb.pod @@ -19,7 +19,7 @@ L, and L. =over -=item --compact +=item -c, --compact In addition to performing the copy operation, run L on each Xapian shard after copying but before finalizing it. @@ -52,6 +52,23 @@ Disable L and L. Available in public-inbox 1.6.0 (PENDING). +=item --sequential-shard + +Copy each shard sequentially, ignoring C<--jobs>. This also +affects indexing done at the end of a run. + +=item --batch-size=BYTES + +=item --max-size=BYTES + +See L for a description of these options. + +These indexing options indexing at the end of a run. +C may run in parallel with with +L, and C needs to +reindex changes made to the old Xapian DBs by +L while it was running. + =back =head1 ENVIRONMENT diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm index b6279218c..46548a948 100644 --- a/lib/PublicInbox/Xapcmd.pm +++ b/lib/PublicInbox/Xapcmd.pm @@ -82,7 +82,8 @@ sub commit_changes ($$$$) { $im->{shards} = $n; } } - + my $env = $opt->{-idx_env}; + local %ENV = (%ENV, %$env) if $env; PublicInbox::Admin::index_inbox($ibx, $im, $opt); } } diff --git a/script/public-inbox-xcpdb b/script/public-inbox-xcpdb index 2c91598cb..718a34b77 100755 --- a/script/public-inbox-xcpdb +++ b/script/public-inbox-xcpdb @@ -1,20 +1,64 @@ -#!/usr/bin/perl -w +#!perl -w # Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ -# xcpdb: Xapian copy database, a wrapper around Xapian's copydatabase(1) +use strict; +use v5.10.1; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); -use PublicInbox::InboxWritable; -use PublicInbox::Xapcmd; +my $usage = 'Usage: public-inbox-xcpdb [options] INBOX_DIR'; +my $help = < -1, compact => 0, fsync => 1 }; +GetOptions($opt, qw( + fsync|sync! compact|c reshard|R=i + max_size|max-size=s batch_size|batch-size=s + sequential_shard|seq-shard|sequential-shard + jobs|j=i quiet|q verbose|v + blocksize|b=s no-full|n fuller|F + help|?)) or die "bad command-line args\n$usage"; +if ($opt->{help}) { print $help; exit 0 }; + use PublicInbox::Admin; PublicInbox::Admin::require_or_die('-search'); -my $usage = "Usage: public-inbox-xcpdb [--compact] INBOX_DIR\n"; -my $opt = { fsync => 1 }; -my @opt = (qw(fsync|sync! compact reshard|R=i), - @PublicInbox::Xapcmd::COMPACT_OPT); -GetOptions($opt, @opt) or die "bad command-line args\n$usage"; -my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV) or die $usage; + +require PublicInbox::Config; +my $cfg = PublicInbox::Config->new; +my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, undef, $cfg) or + die $usage; +my $idx_env = PublicInbox::Admin::index_prepare($opt, $cfg); + +# we only set XAPIAN_FLUSH_THRESHOLD for index, since cpdb doesn't +# know sizes, only doccounts +$opt->{-idx_env} = $idx_env; + +if ($opt->{sequential_shard} && ($opt->{jobs} // 1) > 1) { + warn "W: --jobs=$opt->{jobs} ignored with --sequential-shard\n"; + $opt->{jobs} = 0; +} + +require PublicInbox::InboxWritable; +require PublicInbox::Xapcmd; foreach (@ibxs) { my $ibx = PublicInbox::InboxWritable->new($_); - # we rely on --no-renumber to keep docids synched to NNTP + # we rely on --no-renumber to keep docids synched for NNTP PublicInbox::Xapcmd::run($ibx, 'cpdb', $opt); }