From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 2/2] compact: support codesearch indices
Date: Wed, 3 May 2023 03:11:14 +0000 [thread overview]
Message-ID: <20230503031114.3315491-3-e@80x24.org> (raw)
In-Reply-To: <20230503031114.3315491-1-e@80x24.org>
This is much easier to support than xcpdb since it's 1:1 and
doesn't follow a different sharding scheme than the inboxes and
extindices.
---
lib/PublicInbox/Admin.pm | 21 +++++++++++++++++----
lib/PublicInbox/Xapcmd.pm | 21 ++++++++++++++-------
script/public-inbox-compact | 20 +++++++++++---------
t/cindex.t | 7 +++++++
4 files changed, 49 insertions(+), 20 deletions(-)
diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm
index 96c6652c..72ac9420 100644
--- a/lib/PublicInbox/Admin.pm
+++ b/lib/PublicInbox/Admin.pm
@@ -50,6 +50,7 @@ sub resolve_any_idxdir ($$) {
}
sub resolve_eidxdir ($) { resolve_any_idxdir($_[0], 'ei.lock') }
+sub resolve_cidxdir ($) { resolve_any_idxdir($_[0], 'cidx.lock') }
sub resolve_inboxdir {
my ($cd, $ver) = @_;
@@ -97,12 +98,22 @@ sub resolve_inboxes ($;$$) {
$cfg or die "--all specified, but $cfgfile not readable\n";
@$argv and die "--all specified, but directories specified\n";
}
- my (@old, @ibxs, @eidx);
+ my (@old, @ibxs, @eidx, @cidx);
+ if ($opt->{-cidx_ok}) {
+ require PublicInbox::CodeSearchIdx;
+ @$argv = grep {
+ if (defined(my $d = resolve_cidxdir($_))) {
+ push @cidx, PublicInbox::CodeSearchIdx->new(
+ $d, $opt);
+ undef;
+ } else {
+ 1;
+ }
+ } @$argv;
+ }
if ($opt->{-eidx_ok}) {
require PublicInbox::ExtSearchIdx;
- my $i = -1;
@$argv = grep {
- $i++;
if (defined(my $ei = resolve_eidxdir($_))) {
$ei = PublicInbox::ExtSearchIdx->new($ei, $opt);
push @eidx, $ei;
@@ -124,6 +135,7 @@ sub resolve_inboxes ($;$$) {
warn "W: $ibx->{name} $ibx->{inboxdir}: $!\n";
}
});
+ # TODO: no way to configure cindex in config file, yet
} else { # directories specified on the command-line
my @dirs = @$argv;
push @dirs, '.' if !@dirs && $opt->{-use_cwd};
@@ -164,7 +176,8 @@ sub resolve_inboxes ($;$$) {
die "-V$min_ver inboxes not supported by $0\n\t",
join("\n\t", @old), "\n";
}
- $opt->{-eidx_ok} ? (\@ibxs, \@eidx) : @ibxs;
+ ($opt->{-eidx_ok} || $opt->{-cidx_ok}) ? (\@ibxs, \@eidx, \@cidx)
+ : @ibxs;
}
my @base_mod = ();
diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm
index 3a4c5622..f3eb8e4e 100644
--- a/lib/PublicInbox/Xapcmd.pm
+++ b/lib/PublicInbox/Xapcmd.pm
@@ -1,7 +1,7 @@
# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
package PublicInbox::Xapcmd;
-use strict;
+use v5.12;
use PublicInbox::Spawn qw(which popen_rd);
use PublicInbox::Syscall;
use PublicInbox::Admin qw(setup_signals);
@@ -75,7 +75,7 @@ sub commit_changes ($$$$) {
$tmp = undef;
if (!$opt->{-coarse_lock}) {
$opt->{-skip_lock} = 1;
- $im //= $ibx if $ibx->can('eidx_sync');
+ $im //= $ibx if $ibx->can('eidx_sync') || $ibx->can('cidx_run');
if ($im->can('count_shards')) { # v2w or eidx
my $pr = $opt->{-progress};
my $n = $im->count_shards;
@@ -93,6 +93,8 @@ sub commit_changes ($$$$) {
local %ENV = (%ENV, %$env) if $env;
if ($ibx->can('eidx_sync')) {
$ibx->eidx_sync($opt);
+ } elsif ($ibx->can('cidx_run')) {
+ $ibx->cidx_run($opt);
} else {
PublicInbox::Admin::index_inbox($ibx, $im, $opt);
}
@@ -117,7 +119,8 @@ sub runnable_or_die ($) {
sub prepare_reindex ($$) {
my ($ibx, $opt) = @_;
- if ($ibx->can('eidx_sync')) { # no prep needed for ExtSearchIdx
+ if ($ibx->can('eidx_sync') || $ibx->can('cidx_run')) {
+ # no prep needed for ExtSearchIdx nor CodeSearchIdx
} elsif ($ibx->version == 1) {
my $dir = $ibx->search->xdir(1);
my $xdb = $PublicInbox::Search::X{Database}->new($dir);
@@ -186,7 +189,9 @@ sub prepare_run {
my $tmp = {}; # old shard dir => File::Temp->newdir object or undef
my @queue; # ([old//src,newdir]) - list of args for cpdb() or compact()
my ($old, $misc_ok);
- if ($ibx->can('eidx_sync')) {
+ if ($ibx->can('cidx_run')) {
+ $old = $ibx->xdir(1);
+ } elsif ($ibx->can('eidx_sync')) {
$misc_ok = 1;
$old = $ibx->xdir(1);
} elsif (my $srch = $ibx->search) {
@@ -261,15 +266,17 @@ sub run {
my $cb = \&$task;
PublicInbox::Admin::progress_prepare($opt ||= {});
my $dir;
- for my $fld (qw(inboxdir topdir)) {
+ for my $fld (qw(inboxdir topdir cidx_dir)) {
my $d = $ibx->{$fld} // next;
-d $d or die "$fld=$d does not exist\n";
$dir = $d;
last;
}
- check_compact() if $opt->{compact} && $ibx->search;
+ check_compact() if $opt->{compact} &&
+ ($ibx->can('cidx_run') || $ibx->search);
- if (!$ibx->can('eidx_sync') && !$opt->{-coarse_lock}) {
+ if (!$ibx->can('eidx_sync') && $ibx->can('version') &&
+ !$opt->{-coarse_lock}) {
# per-epoch ranges for v2
# v1:{ from => $OID }, v2:{ from => [ $OID, $OID, $OID ] } }
$opt->{reindex} = { from => $ibx->version == 1 ? '' : [] };
diff --git a/script/public-inbox-compact b/script/public-inbox-compact
index 80d0224b..1062be5a 100755
--- a/script/public-inbox-compact
+++ b/script/public-inbox-compact
@@ -1,12 +1,12 @@
#!perl -w
-# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-use strict;
-use v5.10.1;
+use v5.12;
use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
-my $opt = { compact => 1, -coarse_lock => 1, -eidx_ok => 1 };
+my $opt = { compact => 1, -coarse_lock => 1,
+ -eidx_ok => 1, -cidx_ok => 1 };
my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term:
-usage: public-inbox-compact <INBOX_DIR|EXTINDEX_DIR>
+usage: public-inbox-compact <INBOX_DIR|EXTINDEX_DIR|CINDEX_DIR>
Compact Xapian DBs in an inbox
@@ -31,12 +31,14 @@ PublicInbox::Admin::progress_prepare($opt);
require PublicInbox::InboxWritable;
require PublicInbox::Xapcmd;
my $cfg = PublicInbox::Config->new;
-my ($ibxs, $eidxs) = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg);
-unless ($ibxs) { print STDERR $help; exit 1 }
+my ($ibxs, $eidxs, $cidxs) =
+ PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg);
+unless (@$ibxs || @$eidxs || @$cidxs) { print STDERR $help; exit 1 }
for my $ibx (@$ibxs) {
$ibx = PublicInbox::InboxWritable->new($ibx);
PublicInbox::Xapcmd::run($ibx, 'compact', $opt);
}
-for my $eidx (@$eidxs) {
- PublicInbox::Xapcmd::run($eidx, 'compact', $opt);
+for my $ibxish (@$eidxs, @$cidxs) {
+ my $restore = $ibxish->can('prep_umask') ? $ibxish->prep_umask : undef;
+ PublicInbox::Xapcmd::run($ibxish, 'compact', $opt);
}
diff --git a/t/cindex.t b/t/cindex.t
index 8b89ebff..b0d6f204 100644
--- a/t/cindex.t
+++ b/t/cindex.t
@@ -76,6 +76,13 @@ ok(!-d "$zp/.git/public-inbox-cindex", 'no cindex in original coderepo');
ok(run_script([qw(-cindex -L medium --dangerous -q -d),
"$tmp/med", $zp, "$tmp/wt0"]), 'cindex external medium');
+
+SKIP: {
+ have_xapian_compact;
+ ok(run_script([qw(-compact -q), "$tmp/ext"]), 'compact on full');
+ ok(run_script([qw(-compact -q), "$tmp/med"]), 'compact on medium');
+}
+
my $no_metadata_set = sub {
my ($i, $extra, $xdb) = @_;
for my $xdb (@$xdb) {
prev parent reply other threads:[~2023-05-03 3:11 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-05-03 3:11 [PATCH 0/2] compaction support for codesearch indices Eric Wong
2023-05-03 3:11 ` [PATCH 1/2] admin: hoist out resolve_any_idxdir from resolve_{inboxdir,eidxdir} Eric Wong
2023-05-03 3:11 ` Eric Wong [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230503031114.3315491-3-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).