From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF, T_SCC_BODY_TEXT_LINE shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 237F01F68D for ; Wed, 3 May 2023 03:11:15 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1683083475; bh=JLipHn6mZO0C1bILD8155/K2o4qGEqafd6fBwdB7D8c=; h=From:To:Subject:Date:In-Reply-To:References:From; b=yC2AoQpFN0OL5wjMzQ2LKOq1RXfWRGPlqziyCmm+4/OqkEEJAIzEuIimMJ51DEl3b tlLQ8E42LTqrzUjflU/6jRljAf66RprAHBHVy6cgJkLoh1VKGTavT5LeLZXT91WHMb fzWXye4e/T9kryRH2Or07sGIEND7FAYdSrbSn5ls= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 2/2] compact: support codesearch indices Date: Wed, 3 May 2023 03:11:14 +0000 Message-Id: <20230503031114.3315491-3-e@80x24.org> In-Reply-To: <20230503031114.3315491-1-e@80x24.org> References: <20230503031114.3315491-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This is much easier to support than xcpdb since it's 1:1 and doesn't follow a different sharding scheme than the inboxes and extindices. --- lib/PublicInbox/Admin.pm | 21 +++++++++++++++++---- lib/PublicInbox/Xapcmd.pm | 21 ++++++++++++++------- script/public-inbox-compact | 20 +++++++++++--------- t/cindex.t | 7 +++++++ 4 files changed, 49 insertions(+), 20 deletions(-) diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm index 96c6652c..72ac9420 100644 --- a/lib/PublicInbox/Admin.pm +++ b/lib/PublicInbox/Admin.pm @@ -50,6 +50,7 @@ sub resolve_any_idxdir ($$) { } sub resolve_eidxdir ($) { resolve_any_idxdir($_[0], 'ei.lock') } +sub resolve_cidxdir ($) { resolve_any_idxdir($_[0], 'cidx.lock') } sub resolve_inboxdir { my ($cd, $ver) = @_; @@ -97,12 +98,22 @@ sub resolve_inboxes ($;$$) { $cfg or die "--all specified, but $cfgfile not readable\n"; @$argv and die "--all specified, but directories specified\n"; } - my (@old, @ibxs, @eidx); + my (@old, @ibxs, @eidx, @cidx); + if ($opt->{-cidx_ok}) { + require PublicInbox::CodeSearchIdx; + @$argv = grep { + if (defined(my $d = resolve_cidxdir($_))) { + push @cidx, PublicInbox::CodeSearchIdx->new( + $d, $opt); + undef; + } else { + 1; + } + } @$argv; + } if ($opt->{-eidx_ok}) { require PublicInbox::ExtSearchIdx; - my $i = -1; @$argv = grep { - $i++; if (defined(my $ei = resolve_eidxdir($_))) { $ei = PublicInbox::ExtSearchIdx->new($ei, $opt); push @eidx, $ei; @@ -124,6 +135,7 @@ sub resolve_inboxes ($;$$) { warn "W: $ibx->{name} $ibx->{inboxdir}: $!\n"; } }); + # TODO: no way to configure cindex in config file, yet } else { # directories specified on the command-line my @dirs = @$argv; push @dirs, '.' if !@dirs && $opt->{-use_cwd}; @@ -164,7 +176,8 @@ sub resolve_inboxes ($;$$) { die "-V$min_ver inboxes not supported by $0\n\t", join("\n\t", @old), "\n"; } - $opt->{-eidx_ok} ? (\@ibxs, \@eidx) : @ibxs; + ($opt->{-eidx_ok} || $opt->{-cidx_ok}) ? (\@ibxs, \@eidx, \@cidx) + : @ibxs; } my @base_mod = (); diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm index 3a4c5622..f3eb8e4e 100644 --- a/lib/PublicInbox/Xapcmd.pm +++ b/lib/PublicInbox/Xapcmd.pm @@ -1,7 +1,7 @@ # Copyright (C) all contributors # License: AGPL-3.0+ package PublicInbox::Xapcmd; -use strict; +use v5.12; use PublicInbox::Spawn qw(which popen_rd); use PublicInbox::Syscall; use PublicInbox::Admin qw(setup_signals); @@ -75,7 +75,7 @@ sub commit_changes ($$$$) { $tmp = undef; if (!$opt->{-coarse_lock}) { $opt->{-skip_lock} = 1; - $im //= $ibx if $ibx->can('eidx_sync'); + $im //= $ibx if $ibx->can('eidx_sync') || $ibx->can('cidx_run'); if ($im->can('count_shards')) { # v2w or eidx my $pr = $opt->{-progress}; my $n = $im->count_shards; @@ -93,6 +93,8 @@ sub commit_changes ($$$$) { local %ENV = (%ENV, %$env) if $env; if ($ibx->can('eidx_sync')) { $ibx->eidx_sync($opt); + } elsif ($ibx->can('cidx_run')) { + $ibx->cidx_run($opt); } else { PublicInbox::Admin::index_inbox($ibx, $im, $opt); } @@ -117,7 +119,8 @@ sub runnable_or_die ($) { sub prepare_reindex ($$) { my ($ibx, $opt) = @_; - if ($ibx->can('eidx_sync')) { # no prep needed for ExtSearchIdx + if ($ibx->can('eidx_sync') || $ibx->can('cidx_run')) { + # no prep needed for ExtSearchIdx nor CodeSearchIdx } elsif ($ibx->version == 1) { my $dir = $ibx->search->xdir(1); my $xdb = $PublicInbox::Search::X{Database}->new($dir); @@ -186,7 +189,9 @@ sub prepare_run { my $tmp = {}; # old shard dir => File::Temp->newdir object or undef my @queue; # ([old//src,newdir]) - list of args for cpdb() or compact() my ($old, $misc_ok); - if ($ibx->can('eidx_sync')) { + if ($ibx->can('cidx_run')) { + $old = $ibx->xdir(1); + } elsif ($ibx->can('eidx_sync')) { $misc_ok = 1; $old = $ibx->xdir(1); } elsif (my $srch = $ibx->search) { @@ -261,15 +266,17 @@ sub run { my $cb = \&$task; PublicInbox::Admin::progress_prepare($opt ||= {}); my $dir; - for my $fld (qw(inboxdir topdir)) { + for my $fld (qw(inboxdir topdir cidx_dir)) { my $d = $ibx->{$fld} // next; -d $d or die "$fld=$d does not exist\n"; $dir = $d; last; } - check_compact() if $opt->{compact} && $ibx->search; + check_compact() if $opt->{compact} && + ($ibx->can('cidx_run') || $ibx->search); - if (!$ibx->can('eidx_sync') && !$opt->{-coarse_lock}) { + if (!$ibx->can('eidx_sync') && $ibx->can('version') && + !$opt->{-coarse_lock}) { # per-epoch ranges for v2 # v1:{ from => $OID }, v2:{ from => [ $OID, $OID, $OID ] } } $opt->{reindex} = { from => $ibx->version == 1 ? '' : [] }; diff --git a/script/public-inbox-compact b/script/public-inbox-compact index 80d0224b..1062be5a 100755 --- a/script/public-inbox-compact +++ b/script/public-inbox-compact @@ -1,12 +1,12 @@ #!perl -w -# Copyright (C) 2018-2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ -use strict; -use v5.10.1; +use v5.12; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); -my $opt = { compact => 1, -coarse_lock => 1, -eidx_ok => 1 }; +my $opt = { compact => 1, -coarse_lock => 1, + -eidx_ok => 1, -cidx_ok => 1 }; my $help = < +usage: public-inbox-compact Compact Xapian DBs in an inbox @@ -31,12 +31,14 @@ PublicInbox::Admin::progress_prepare($opt); require PublicInbox::InboxWritable; require PublicInbox::Xapcmd; my $cfg = PublicInbox::Config->new; -my ($ibxs, $eidxs) = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); -unless ($ibxs) { print STDERR $help; exit 1 } +my ($ibxs, $eidxs, $cidxs) = + PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); +unless (@$ibxs || @$eidxs || @$cidxs) { print STDERR $help; exit 1 } for my $ibx (@$ibxs) { $ibx = PublicInbox::InboxWritable->new($ibx); PublicInbox::Xapcmd::run($ibx, 'compact', $opt); } -for my $eidx (@$eidxs) { - PublicInbox::Xapcmd::run($eidx, 'compact', $opt); +for my $ibxish (@$eidxs, @$cidxs) { + my $restore = $ibxish->can('prep_umask') ? $ibxish->prep_umask : undef; + PublicInbox::Xapcmd::run($ibxish, 'compact', $opt); } diff --git a/t/cindex.t b/t/cindex.t index 8b89ebff..b0d6f204 100644 --- a/t/cindex.t +++ b/t/cindex.t @@ -76,6 +76,13 @@ ok(!-d "$zp/.git/public-inbox-cindex", 'no cindex in original coderepo'); ok(run_script([qw(-cindex -L medium --dangerous -q -d), "$tmp/med", $zp, "$tmp/wt0"]), 'cindex external medium'); + +SKIP: { + have_xapian_compact; + ok(run_script([qw(-compact -q), "$tmp/ext"]), 'compact on full'); + ok(run_script([qw(-compact -q), "$tmp/med"]), 'compact on medium'); +} + my $no_metadata_set = sub { my ($i, $extra, $xdb) = @_; for my $xdb (@$xdb) {