unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 2/2] compact: support codesearch indices
Date: Wed,  3 May 2023 03:11:14 +0000	[thread overview]
Message-ID: <20230503031114.3315491-3-e@80x24.org> (raw)
In-Reply-To: <20230503031114.3315491-1-e@80x24.org>

This is much easier to support than xcpdb since it's 1:1 and
doesn't follow a different sharding scheme than the inboxes and
extindices.
---
 lib/PublicInbox/Admin.pm    | 21 +++++++++++++++++----
 lib/PublicInbox/Xapcmd.pm   | 21 ++++++++++++++-------
 script/public-inbox-compact | 20 +++++++++++---------
 t/cindex.t                  |  7 +++++++
 4 files changed, 49 insertions(+), 20 deletions(-)

diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm
index 96c6652c..72ac9420 100644
--- a/lib/PublicInbox/Admin.pm
+++ b/lib/PublicInbox/Admin.pm
@@ -50,6 +50,7 @@ sub resolve_any_idxdir ($$) {
 }
 
 sub resolve_eidxdir ($) { resolve_any_idxdir($_[0], 'ei.lock') }
+sub resolve_cidxdir ($) { resolve_any_idxdir($_[0], 'cidx.lock') }
 
 sub resolve_inboxdir {
 	my ($cd, $ver) = @_;
@@ -97,12 +98,22 @@ sub resolve_inboxes ($;$$) {
 		$cfg or die "--all specified, but $cfgfile not readable\n";
 		@$argv and die "--all specified, but directories specified\n";
 	}
-	my (@old, @ibxs, @eidx);
+	my (@old, @ibxs, @eidx, @cidx);
+	if ($opt->{-cidx_ok}) {
+		require PublicInbox::CodeSearchIdx;
+		@$argv = grep {
+			if (defined(my $d = resolve_cidxdir($_))) {
+				push @cidx, PublicInbox::CodeSearchIdx->new(
+							$d, $opt);
+				undef;
+			} else {
+				1;
+			}
+		} @$argv;
+	}
 	if ($opt->{-eidx_ok}) {
 		require PublicInbox::ExtSearchIdx;
-		my $i = -1;
 		@$argv = grep {
-			$i++;
 			if (defined(my $ei = resolve_eidxdir($_))) {
 				$ei = PublicInbox::ExtSearchIdx->new($ei, $opt);
 				push @eidx, $ei;
@@ -124,6 +135,7 @@ sub resolve_inboxes ($;$$) {
 				warn "W: $ibx->{name} $ibx->{inboxdir}: $!\n";
 			}
 		});
+		# TODO: no way to configure cindex in config file, yet
 	} else { # directories specified on the command-line
 		my @dirs = @$argv;
 		push @dirs, '.' if !@dirs && $opt->{-use_cwd};
@@ -164,7 +176,8 @@ sub resolve_inboxes ($;$$) {
 		die "-V$min_ver inboxes not supported by $0\n\t",
 		    join("\n\t", @old), "\n";
 	}
-	$opt->{-eidx_ok} ? (\@ibxs, \@eidx) : @ibxs;
+	($opt->{-eidx_ok} || $opt->{-cidx_ok}) ? (\@ibxs, \@eidx, \@cidx)
+						: @ibxs;
 }
 
 my @base_mod = ();
diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm
index 3a4c5622..f3eb8e4e 100644
--- a/lib/PublicInbox/Xapcmd.pm
+++ b/lib/PublicInbox/Xapcmd.pm
@@ -1,7 +1,7 @@
 # Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 package PublicInbox::Xapcmd;
-use strict;
+use v5.12;
 use PublicInbox::Spawn qw(which popen_rd);
 use PublicInbox::Syscall;
 use PublicInbox::Admin qw(setup_signals);
@@ -75,7 +75,7 @@ sub commit_changes ($$$$) {
 	$tmp = undef;
 	if (!$opt->{-coarse_lock}) {
 		$opt->{-skip_lock} = 1;
-		$im //= $ibx if $ibx->can('eidx_sync');
+		$im //= $ibx if $ibx->can('eidx_sync') || $ibx->can('cidx_run');
 		if ($im->can('count_shards')) { # v2w or eidx
 			my $pr = $opt->{-progress};
 			my $n = $im->count_shards;
@@ -93,6 +93,8 @@ sub commit_changes ($$$$) {
 		local %ENV = (%ENV, %$env) if $env;
 		if ($ibx->can('eidx_sync')) {
 			$ibx->eidx_sync($opt);
+		} elsif ($ibx->can('cidx_run')) {
+			$ibx->cidx_run($opt);
 		} else {
 			PublicInbox::Admin::index_inbox($ibx, $im, $opt);
 		}
@@ -117,7 +119,8 @@ sub runnable_or_die ($) {
 
 sub prepare_reindex ($$) {
 	my ($ibx, $opt) = @_;
-	if ($ibx->can('eidx_sync')) { # no prep needed for ExtSearchIdx
+	if ($ibx->can('eidx_sync') || $ibx->can('cidx_run')) {
+		# no prep needed for ExtSearchIdx nor CodeSearchIdx
 	} elsif ($ibx->version == 1) {
 		my $dir = $ibx->search->xdir(1);
 		my $xdb = $PublicInbox::Search::X{Database}->new($dir);
@@ -186,7 +189,9 @@ sub prepare_run {
 	my $tmp = {}; # old shard dir => File::Temp->newdir object or undef
 	my @queue; # ([old//src,newdir]) - list of args for cpdb() or compact()
 	my ($old, $misc_ok);
-	if ($ibx->can('eidx_sync')) {
+	if ($ibx->can('cidx_run')) {
+		$old = $ibx->xdir(1);
+	} elsif ($ibx->can('eidx_sync')) {
 		$misc_ok = 1;
 		$old = $ibx->xdir(1);
 	} elsif (my $srch = $ibx->search) {
@@ -261,15 +266,17 @@ sub run {
 	my $cb = \&$task;
 	PublicInbox::Admin::progress_prepare($opt ||= {});
 	my $dir;
-	for my $fld (qw(inboxdir topdir)) {
+	for my $fld (qw(inboxdir topdir cidx_dir)) {
 		my $d = $ibx->{$fld} // next;
 		-d $d or die "$fld=$d does not exist\n";
 		$dir = $d;
 		last;
 	}
-	check_compact() if $opt->{compact} && $ibx->search;
+	check_compact() if $opt->{compact} &&
+				($ibx->can('cidx_run') || $ibx->search);
 
-	if (!$ibx->can('eidx_sync') && !$opt->{-coarse_lock}) {
+	if (!$ibx->can('eidx_sync') && $ibx->can('version') &&
+					!$opt->{-coarse_lock}) {
 		# per-epoch ranges for v2
 		# v1:{ from => $OID }, v2:{ from => [ $OID, $OID, $OID ] } }
 		$opt->{reindex} = { from => $ibx->version == 1 ? '' : [] };
diff --git a/script/public-inbox-compact b/script/public-inbox-compact
index 80d0224b..1062be5a 100755
--- a/script/public-inbox-compact
+++ b/script/public-inbox-compact
@@ -1,12 +1,12 @@
 #!perl -w
-# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-use strict;
-use v5.10.1;
+use v5.12;
 use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
-my $opt = { compact => 1, -coarse_lock => 1, -eidx_ok => 1 };
+my $opt = { compact => 1, -coarse_lock => 1,
+	-eidx_ok => 1, -cidx_ok => 1 };
 my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term:
-usage: public-inbox-compact <INBOX_DIR|EXTINDEX_DIR>
+usage: public-inbox-compact <INBOX_DIR|EXTINDEX_DIR|CINDEX_DIR>
 
   Compact Xapian DBs in an inbox
 
@@ -31,12 +31,14 @@ PublicInbox::Admin::progress_prepare($opt);
 require PublicInbox::InboxWritable;
 require PublicInbox::Xapcmd;
 my $cfg = PublicInbox::Config->new;
-my ($ibxs, $eidxs) = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg);
-unless ($ibxs) { print STDERR $help; exit 1 }
+my ($ibxs, $eidxs, $cidxs) =
+	PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg);
+unless (@$ibxs || @$eidxs || @$cidxs) { print STDERR $help; exit 1 }
 for my $ibx (@$ibxs) {
 	$ibx = PublicInbox::InboxWritable->new($ibx);
 	PublicInbox::Xapcmd::run($ibx, 'compact', $opt);
 }
-for my $eidx (@$eidxs) {
-	PublicInbox::Xapcmd::run($eidx, 'compact', $opt);
+for my $ibxish (@$eidxs, @$cidxs) {
+	my $restore = $ibxish->can('prep_umask') ? $ibxish->prep_umask : undef;
+	PublicInbox::Xapcmd::run($ibxish, 'compact', $opt);
 }
diff --git a/t/cindex.t b/t/cindex.t
index 8b89ebff..b0d6f204 100644
--- a/t/cindex.t
+++ b/t/cindex.t
@@ -76,6 +76,13 @@ ok(!-d "$zp/.git/public-inbox-cindex", 'no cindex in original coderepo');
 ok(run_script([qw(-cindex -L medium --dangerous -q -d),
 	"$tmp/med", $zp, "$tmp/wt0"]), 'cindex external medium');
 
+
+SKIP: {
+	have_xapian_compact;
+	ok(run_script([qw(-compact -q), "$tmp/ext"]), 'compact on full');
+	ok(run_script([qw(-compact -q), "$tmp/med"]), 'compact on medium');
+}
+
 my $no_metadata_set = sub {
 	my ($i, $extra, $xdb) = @_;
 	for my $xdb (@$xdb) {

      parent reply	other threads:[~2023-05-03  3:11 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-03  3:11 [PATCH 0/2] compaction support for codesearch indices Eric Wong
2023-05-03  3:11 ` [PATCH 1/2] admin: hoist out resolve_any_idxdir from resolve_{inboxdir,eidxdir} Eric Wong
2023-05-03  3:11 ` Eric Wong [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230503031114.3315491-3-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).