unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH 0/2] compaction support for codesearch indices
@ 2023-05-03  3:11 Eric Wong
  2023-05-03  3:11 ` [PATCH 1/2] admin: hoist out resolve_any_idxdir from resolve_{inboxdir,eidxdir} Eric Wong
  2023-05-03  3:11 ` [PATCH 2/2] compact: support codesearch indices Eric Wong
  0 siblings, 2 replies; 3+ messages in thread
From: Eric Wong @ 2023-05-03  3:11 UTC (permalink / raw)
  To: meta

I'm not 100% sure I like the cindex name, but oh well...
Compaction is important for the space-challenged.

Eric Wong (2):
  admin: hoist out resolve_any_idxdir from resolve_{inboxdir,eidxdir}
  compact: support codesearch indices

 lib/PublicInbox/Admin.pm    | 57 +++++++++++++++++++------------------
 lib/PublicInbox/Xapcmd.pm   | 21 +++++++++-----
 script/public-inbox-compact | 20 +++++++------
 t/cindex.t                  |  7 +++++
 4 files changed, 61 insertions(+), 44 deletions(-)

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 1/2] admin: hoist out resolve_any_idxdir from resolve_{inboxdir,eidxdir}
  2023-05-03  3:11 [PATCH 0/2] compaction support for codesearch indices Eric Wong
@ 2023-05-03  3:11 ` Eric Wong
  2023-05-03  3:11 ` [PATCH 2/2] compact: support codesearch indices Eric Wong
  1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2023-05-03  3:11 UTC (permalink / raw)
  To: meta

This bit of common code will be handy for the upcoming
resolve_cidxdir, too.
---
 lib/PublicInbox/Admin.pm | 36 ++++++++++++------------------------
 1 file changed, 12 insertions(+), 24 deletions(-)

diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm
index da34a3bd..96c6652c 100644
--- a/lib/PublicInbox/Admin.pm
+++ b/lib/PublicInbox/Admin.pm
@@ -28,12 +28,12 @@ sub setup_signals {
 	};
 }
 
-sub resolve_eidxdir {
-	my ($cd) = @_;
+sub resolve_any_idxdir ($$) {
+	my ($cd, $lock_bn) = @_;
 	my $try = $cd // '.';
 	my $root_dev_ino;
-	while (1) { # favor v2, first
-		if (-f "$try/ei.lock") {
+	while (1) {
+		if (-f "$try/$lock_bn") { # inbox.lock, ei.lock, cidx.lock
 			return rel2abs_collapsed($try);
 		} elsif (-d $try) {
 			my @try = stat _;
@@ -49,28 +49,16 @@ sub resolve_eidxdir {
 	}
 }
 
+sub resolve_eidxdir ($) { resolve_any_idxdir($_[0], 'ei.lock') }
+
 sub resolve_inboxdir {
 	my ($cd, $ver) = @_;
-	my $try = $cd // '.';
-	my $root_dev_ino;
-	while (1) { # favor v2, first
-		if (-f "$try/inbox.lock") {
-			$$ver = 2 if $ver;
-			return rel2abs_collapsed($try);
-		} elsif (-d $try) {
-			my @try = stat _;
-			$root_dev_ino //= do {
-				my @root = stat('/') or die "stat /: $!\n";
-				"$root[0]\0$root[1]";
-			};
-			last if "$try[0]\0$try[1]" eq $root_dev_ino;
-			$try .= '/..'; # continue, cd up
-		} else {
-			die "`$try' is not a directory\n";
-		}
-	}
-	my $dir = resolve_git_dir($cd);
-	$$ver = 1 if $ver;
+	my $dir;
+	if (defined($dir = resolve_any_idxdir($cd, 'inbox.lock'))) { # try v2
+		$$ver = 2 if $ver;
+	} elsif (defined($dir = resolve_git_dir($cd))) { # try v1
+		$$ver = 1 if $ver;
+	} # else: not an inbox at all
 	$dir;
 }
 

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] compact: support codesearch indices
  2023-05-03  3:11 [PATCH 0/2] compaction support for codesearch indices Eric Wong
  2023-05-03  3:11 ` [PATCH 1/2] admin: hoist out resolve_any_idxdir from resolve_{inboxdir,eidxdir} Eric Wong
@ 2023-05-03  3:11 ` Eric Wong
  1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2023-05-03  3:11 UTC (permalink / raw)
  To: meta

This is much easier to support than xcpdb since it's 1:1 and
doesn't follow a different sharding scheme than the inboxes and
extindices.
---
 lib/PublicInbox/Admin.pm    | 21 +++++++++++++++++----
 lib/PublicInbox/Xapcmd.pm   | 21 ++++++++++++++-------
 script/public-inbox-compact | 20 +++++++++++---------
 t/cindex.t                  |  7 +++++++
 4 files changed, 49 insertions(+), 20 deletions(-)

diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm
index 96c6652c..72ac9420 100644
--- a/lib/PublicInbox/Admin.pm
+++ b/lib/PublicInbox/Admin.pm
@@ -50,6 +50,7 @@ sub resolve_any_idxdir ($$) {
 }
 
 sub resolve_eidxdir ($) { resolve_any_idxdir($_[0], 'ei.lock') }
+sub resolve_cidxdir ($) { resolve_any_idxdir($_[0], 'cidx.lock') }
 
 sub resolve_inboxdir {
 	my ($cd, $ver) = @_;
@@ -97,12 +98,22 @@ sub resolve_inboxes ($;$$) {
 		$cfg or die "--all specified, but $cfgfile not readable\n";
 		@$argv and die "--all specified, but directories specified\n";
 	}
-	my (@old, @ibxs, @eidx);
+	my (@old, @ibxs, @eidx, @cidx);
+	if ($opt->{-cidx_ok}) {
+		require PublicInbox::CodeSearchIdx;
+		@$argv = grep {
+			if (defined(my $d = resolve_cidxdir($_))) {
+				push @cidx, PublicInbox::CodeSearchIdx->new(
+							$d, $opt);
+				undef;
+			} else {
+				1;
+			}
+		} @$argv;
+	}
 	if ($opt->{-eidx_ok}) {
 		require PublicInbox::ExtSearchIdx;
-		my $i = -1;
 		@$argv = grep {
-			$i++;
 			if (defined(my $ei = resolve_eidxdir($_))) {
 				$ei = PublicInbox::ExtSearchIdx->new($ei, $opt);
 				push @eidx, $ei;
@@ -124,6 +135,7 @@ sub resolve_inboxes ($;$$) {
 				warn "W: $ibx->{name} $ibx->{inboxdir}: $!\n";
 			}
 		});
+		# TODO: no way to configure cindex in config file, yet
 	} else { # directories specified on the command-line
 		my @dirs = @$argv;
 		push @dirs, '.' if !@dirs && $opt->{-use_cwd};
@@ -164,7 +176,8 @@ sub resolve_inboxes ($;$$) {
 		die "-V$min_ver inboxes not supported by $0\n\t",
 		    join("\n\t", @old), "\n";
 	}
-	$opt->{-eidx_ok} ? (\@ibxs, \@eidx) : @ibxs;
+	($opt->{-eidx_ok} || $opt->{-cidx_ok}) ? (\@ibxs, \@eidx, \@cidx)
+						: @ibxs;
 }
 
 my @base_mod = ();
diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm
index 3a4c5622..f3eb8e4e 100644
--- a/lib/PublicInbox/Xapcmd.pm
+++ b/lib/PublicInbox/Xapcmd.pm
@@ -1,7 +1,7 @@
 # Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 package PublicInbox::Xapcmd;
-use strict;
+use v5.12;
 use PublicInbox::Spawn qw(which popen_rd);
 use PublicInbox::Syscall;
 use PublicInbox::Admin qw(setup_signals);
@@ -75,7 +75,7 @@ sub commit_changes ($$$$) {
 	$tmp = undef;
 	if (!$opt->{-coarse_lock}) {
 		$opt->{-skip_lock} = 1;
-		$im //= $ibx if $ibx->can('eidx_sync');
+		$im //= $ibx if $ibx->can('eidx_sync') || $ibx->can('cidx_run');
 		if ($im->can('count_shards')) { # v2w or eidx
 			my $pr = $opt->{-progress};
 			my $n = $im->count_shards;
@@ -93,6 +93,8 @@ sub commit_changes ($$$$) {
 		local %ENV = (%ENV, %$env) if $env;
 		if ($ibx->can('eidx_sync')) {
 			$ibx->eidx_sync($opt);
+		} elsif ($ibx->can('cidx_run')) {
+			$ibx->cidx_run($opt);
 		} else {
 			PublicInbox::Admin::index_inbox($ibx, $im, $opt);
 		}
@@ -117,7 +119,8 @@ sub runnable_or_die ($) {
 
 sub prepare_reindex ($$) {
 	my ($ibx, $opt) = @_;
-	if ($ibx->can('eidx_sync')) { # no prep needed for ExtSearchIdx
+	if ($ibx->can('eidx_sync') || $ibx->can('cidx_run')) {
+		# no prep needed for ExtSearchIdx nor CodeSearchIdx
 	} elsif ($ibx->version == 1) {
 		my $dir = $ibx->search->xdir(1);
 		my $xdb = $PublicInbox::Search::X{Database}->new($dir);
@@ -186,7 +189,9 @@ sub prepare_run {
 	my $tmp = {}; # old shard dir => File::Temp->newdir object or undef
 	my @queue; # ([old//src,newdir]) - list of args for cpdb() or compact()
 	my ($old, $misc_ok);
-	if ($ibx->can('eidx_sync')) {
+	if ($ibx->can('cidx_run')) {
+		$old = $ibx->xdir(1);
+	} elsif ($ibx->can('eidx_sync')) {
 		$misc_ok = 1;
 		$old = $ibx->xdir(1);
 	} elsif (my $srch = $ibx->search) {
@@ -261,15 +266,17 @@ sub run {
 	my $cb = \&$task;
 	PublicInbox::Admin::progress_prepare($opt ||= {});
 	my $dir;
-	for my $fld (qw(inboxdir topdir)) {
+	for my $fld (qw(inboxdir topdir cidx_dir)) {
 		my $d = $ibx->{$fld} // next;
 		-d $d or die "$fld=$d does not exist\n";
 		$dir = $d;
 		last;
 	}
-	check_compact() if $opt->{compact} && $ibx->search;
+	check_compact() if $opt->{compact} &&
+				($ibx->can('cidx_run') || $ibx->search);
 
-	if (!$ibx->can('eidx_sync') && !$opt->{-coarse_lock}) {
+	if (!$ibx->can('eidx_sync') && $ibx->can('version') &&
+					!$opt->{-coarse_lock}) {
 		# per-epoch ranges for v2
 		# v1:{ from => $OID }, v2:{ from => [ $OID, $OID, $OID ] } }
 		$opt->{reindex} = { from => $ibx->version == 1 ? '' : [] };
diff --git a/script/public-inbox-compact b/script/public-inbox-compact
index 80d0224b..1062be5a 100755
--- a/script/public-inbox-compact
+++ b/script/public-inbox-compact
@@ -1,12 +1,12 @@
 #!perl -w
-# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-use strict;
-use v5.10.1;
+use v5.12;
 use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
-my $opt = { compact => 1, -coarse_lock => 1, -eidx_ok => 1 };
+my $opt = { compact => 1, -coarse_lock => 1,
+	-eidx_ok => 1, -cidx_ok => 1 };
 my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term:
-usage: public-inbox-compact <INBOX_DIR|EXTINDEX_DIR>
+usage: public-inbox-compact <INBOX_DIR|EXTINDEX_DIR|CINDEX_DIR>
 
   Compact Xapian DBs in an inbox
 
@@ -31,12 +31,14 @@ PublicInbox::Admin::progress_prepare($opt);
 require PublicInbox::InboxWritable;
 require PublicInbox::Xapcmd;
 my $cfg = PublicInbox::Config->new;
-my ($ibxs, $eidxs) = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg);
-unless ($ibxs) { print STDERR $help; exit 1 }
+my ($ibxs, $eidxs, $cidxs) =
+	PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg);
+unless (@$ibxs || @$eidxs || @$cidxs) { print STDERR $help; exit 1 }
 for my $ibx (@$ibxs) {
 	$ibx = PublicInbox::InboxWritable->new($ibx);
 	PublicInbox::Xapcmd::run($ibx, 'compact', $opt);
 }
-for my $eidx (@$eidxs) {
-	PublicInbox::Xapcmd::run($eidx, 'compact', $opt);
+for my $ibxish (@$eidxs, @$cidxs) {
+	my $restore = $ibxish->can('prep_umask') ? $ibxish->prep_umask : undef;
+	PublicInbox::Xapcmd::run($ibxish, 'compact', $opt);
 }
diff --git a/t/cindex.t b/t/cindex.t
index 8b89ebff..b0d6f204 100644
--- a/t/cindex.t
+++ b/t/cindex.t
@@ -76,6 +76,13 @@ ok(!-d "$zp/.git/public-inbox-cindex", 'no cindex in original coderepo');
 ok(run_script([qw(-cindex -L medium --dangerous -q -d),
 	"$tmp/med", $zp, "$tmp/wt0"]), 'cindex external medium');
 
+
+SKIP: {
+	have_xapian_compact;
+	ok(run_script([qw(-compact -q), "$tmp/ext"]), 'compact on full');
+	ok(run_script([qw(-compact -q), "$tmp/med"]), 'compact on medium');
+}
+
 my $no_metadata_set = sub {
 	my ($i, $extra, $xdb) = @_;
 	for my $xdb (@$xdb) {

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-05-03  3:11 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-03  3:11 [PATCH 0/2] compaction support for codesearch indices Eric Wong
2023-05-03  3:11 ` [PATCH 1/2] admin: hoist out resolve_any_idxdir from resolve_{inboxdir,eidxdir} Eric Wong
2023-05-03  3:11 ` [PATCH 2/2] compact: support codesearch indices Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).