From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id EE7681F50B for ; Tue, 21 Mar 2023 23:07:45 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1679440066; bh=+3ZXPk3NGMhJZcgx+c3e+5psDJTmYMKiF2cKVYC9nBo=; h=From:To:Subject:Date:In-Reply-To:References:From; b=fRFYjfNtf8xHazqT/FBIDXxDGMe58ifEFgjzeCn7YG/+k0j0wIK8TPlS8UkX6o9J3 +2JtU5zbhhpRin7DQodqP5OqzRiKfwyh2d6RAP5kyBQbZhTSIq8SJpEa36nb4D4P4n z8oPRXQrXTIYoNo+XJOjE404wuy/Xm3hy9LTACAU= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 12/28] cindex: implement --exclude= like -clone Date: Tue, 21 Mar 2023 23:07:27 +0000 Message-Id: <20230321230743.3020032-12-e@80x24.org> In-Reply-To: <20230321230743.3020032-1-e@80x24.org> References: <20230321230701.3019936-1-e@80x24.org> <20230321230743.3020032-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This is to ensure we can exclude certain repos which are expensive-to-index (e.g. `**/deps.git', `**/transparency-logs/**'). --- lib/PublicInbox/CodeSearchIdx.pm | 10 +++++++++- script/public-inbox-cindex | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm index 587f0b81..97c563bd 100644 --- a/lib/PublicInbox/CodeSearchIdx.pm +++ b/lib/PublicInbox/CodeSearchIdx.pm @@ -27,7 +27,7 @@ use File::Spec (); use PublicInbox::SHA qw(sha256_hex); use PublicInbox::Search qw(xap_terms); use PublicInbox::SearchIdx qw(add_val); -use PublicInbox::Config; +use PublicInbox::Config qw(glob2re); use PublicInbox::Spawn qw(spawn popen_rd); use PublicInbox::OnDestroy; use Socket qw(MSG_EOR); @@ -566,6 +566,14 @@ sub cidx_run { # main entry point } warn "E: canonicalized and attempting to continue\n"; } + if (defined(my $excl = $self->{-opt}->{exclude})) { + my $re = '(?:'.join('\\z|', map { + glob2re($_) // qr/\A\Q$_\E/ + } @$excl).'\\z)'; + @{$self->{git_dirs}} = grep { + $_ =~ /$re/ ? (warn("# excluding $_\n"), 0) : 1; + } @{$self->{git_dirs}}; + } local $self->{nchange} = 0; local $LIVE_JOBS = $self->{-opt}->{jobs} || PublicInbox::IPC::detect_nproc() || 2; diff --git a/script/public-inbox-cindex b/script/public-inbox-cindex index 166c8261..420ef4de 100755 --- a/script/public-inbox-cindex +++ b/script/public-inbox-cindex @@ -26,7 +26,7 @@ EOF my $opt = { fsync => 1, scan => 1 }; # --no-scan is hidden GetOptions($opt, qw(quiet|q verbose|v+ reindex jobs|j=i fsync|sync! dangerous indexlevel|index-level|L=s batch_size|batch-size=s - project-list=s + project-list=s exclude=s@ d=s update|u scan! prune dry-run|n C=s@ help|h)) or die $help; if ($opt->{help}) { print $help; exit 0 };