From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id DC76F1F4B4 for ; Wed, 23 Dec 2020 23:02:55 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] index: update [extindex "all"] by default, support -E Date: Wed, 23 Dec 2020 23:02:55 +0000 Message-Id: <20201223230255.30404-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: In most cases, this ensures users will only have to opt-in to using -extindex once and won't have to issue extra commands to keep external indices up-to-date when using public-inbox-index. Since we support arbitrary numbers of external indices for ease-of-development, we'll support repeating "-E" ("--update-extindex=") in case users want to test changes in parallel. --- Documentation/public-inbox-index.pod | 19 ++++++++++- script/public-inbox-index | 47 ++++++++++++++++++++++++++-- 2 files changed, 63 insertions(+), 3 deletions(-) diff --git a/Documentation/public-inbox-index.pod b/Documentation/public-inbox-index.pod index 0848e860..2c34aabc 100644 --- a/Documentation/public-inbox-index.pod +++ b/Documentation/public-inbox-index.pod @@ -162,6 +162,23 @@ See L for description and caveats. Available in public-inbox 1.6.0+. +=item --update-extindex=EXTINDEX, -E + +Update the given external index (L. +Either the configured section name (e.g. C) or a directory name +may be specified. + +Defaults to C if C<[extindex "all"]> is configured, +otherwise no external indices are updated. + +May be specified multiple times in rare cases multiple external +indices are configured. + +=item --no-update-extindex + +Do not update the C external index by default. This negates +all uses of C<-E> / C<--update-extindex=> on the command-line. + =back =head1 FILES @@ -297,4 +314,4 @@ License: AGPL-3.0+ L =head1 SEE ALSO -L, L +L, L, L diff --git a/script/public-inbox-index b/script/public-inbox-index index 8a61817c..f10bb5ad 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -17,7 +17,7 @@ options: --no-fsync speed up indexing, risk corruption on power outage -L LEVEL `basic', `medium', or `full' (default: full) - -E EIDX update EIDX (e.g. `all') + -E EXTINDEX update extindex (default: `all') --all index all configured inboxes --compact | -c run public-inbox-compact(1) after indexing --sequential-shard index Xapian shards sequentially for slow storage @@ -32,12 +32,16 @@ options: BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes) See public-inbox-index(1) man page for full documentation. EOF -my $opt = { quiet => -1, compact => 0, max_size => undef, fsync => 1 }; +my $opt = { + quiet => -1, compact => 0, max_size => undef, fsync => 1, + 'update-extindex' => [], # ":s@" optional arg sets '' if no arg given +}; GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune fsync|sync! xapian_only|xapian-only indexlevel|index-level|L=s max_size|max-size=s batch_size|batch-size=s sequential_shard|seq-shard|sequential-shard + no-update-extindex update-extindex|E=s@ skip-docdata all help|h)) or die $help; if ($opt->{help}) { print $help; exit 0 }; @@ -56,7 +60,31 @@ my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); PublicInbox::Admin::require_or_die('-index'); unless (@ibxs) { print STDERR $help; exit 1 } +my (@eidx_dir, %eidx_seen); +my $update_extindex = $opt->{'update-extindex'}; +if (!scalar(@$update_extindex) && (my $ALL = $cfg->ALL)) { + # extindex and normal inboxes may have different owners + push(@$update_extindex, 'all') if -w $ALL->{topdir}; +} +@$update_extindex = () if $opt->{'no-update-extindex'}; +if (scalar @$update_extindex) { + PublicInbox::Admin::require_or_die('-search'); + require PublicInbox::ExtSearchIdx; +} +for my $ei_name (@$update_extindex) { + my $es = $cfg->lookup_ei($ei_name); + my $topdir; + if (!$es && -d $ei_name) { # allow dirname or config section name + $topdir = $ei_name; + } elsif ($es) { + $topdir = $es->{topdir}; + } else { + die "extindex `$ei_name' not configured or found\n"; + } + $eidx_seen{$topdir} //= push(@eidx_dir, $topdir); +} my $mods = {}; +my @eidx_unconfigured; foreach my $ibx (@ibxs) { # detect_indexlevel may also set $ibx->{-skip_docdata} my $detected = PublicInbox::Admin::detect_indexlevel($ibx); @@ -64,7 +92,14 @@ foreach my $ibx (@ibxs) { $ibx->{indexlevel} //= $opt->{indexlevel} // ($opt->{xapian_only} ? 'full' : $detected); PublicInbox::Admin::scan_ibx_modules($mods, $ibx); + if (@eidx_dir && $ibx->{-unconfigured}) { + push @eidx_unconfigured, " $ibx->{inboxdir}\n"; + } } +warn <{compact} = 0 if !$mods->{'Search::Xapian'}; @@ -96,4 +131,12 @@ EOL local $copt->{jobs} = 0 if $ibx_opt->{sequential_shard}; PublicInbox::Xapcmd::run($ibx, 'compact', $copt); } + next if $ibx->{-unconfigured}; + last if $ibx_opt->{quit}; + for my $dir (@eidx_dir) { + my $eidx = PublicInbox::ExtSearchIdx->new($dir); + $eidx->attach_inbox($ibx); + $eidx->eidx_sync($ibx_opt); + last if $ibx_opt->{quit}; + } }