From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.1 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF, T_SCC_BODY_TEXT_LINE shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 722CF1F452; Thu, 20 Apr 2023 00:53:30 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1681952010; bh=zx6ypxpIC9vvbXFpnXHrdxg6ox3a+rt3Qu1OSFqr0Gw=; h=Date:From:To:Subject:References:In-Reply-To:From; b=ydimev+ygby89z4B0iXBoY0IrbUCX+G9hC5/uH+wjtO7rw+0R/PXo+dmaOfRYwxPT 5PB2fotlWCyKvv0sRuJVgJjIAFGeJYpGf/pr9mwmfIM/UDV+FcRWZ1guU2PDOFxjY9 A+yywAFEilbCcFT+dxT516m4+thwuIpcnAStX3dY= Date: Thu, 20 Apr 2023 00:53:30 +0000 From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 2/1] cindex: limit parallelism of extensions.objectFormat check Message-ID: <20230420005330.M128120@dcvr> References: <20230419215448.2879268-1-e@80x24.org> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline In-Reply-To: <20230419215448.2879268-1-e@80x24.org> List-Id: We can't safely spawn all `git config' processes of every indexed git directory at once due to system resource limits (RLIMIT_NPROC, RLIMIT_NOFILE). So queue them up and limit parallelism that way. --- lib/PublicInbox/CodeSearchIdx.pm | 33 +++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm index 54dbf785..97123133 100644 --- a/lib/PublicInbox/CodeSearchIdx.pm +++ b/lib/PublicInbox/CodeSearchIdx.pm @@ -61,6 +61,7 @@ our ( %HEXLEN2TMPGIT, # ((40|64) => PublicInbox::Git for prune) %ALT_FH, # '', or 'sha256' => tmp IO for TMPGIT alternates $TMPDIR, # File::Temp->newdir object + @PRUNE_QUEUE, # GIT_DIRs to prepare for pruning ); # stop walking history if we see >$SEEN_MAX existing commits, this assumes @@ -779,9 +780,11 @@ sub prep_umask ($) { } } -sub prep_alternate { # awaitpid callback for config extensions.objectFormat +sub prep_alternate_end { # awaitpid callback for config extensions.objectFormat my ($pid, $objdir, $out, $send_prune) = @_; my $status = $? >> 8; + my $next_dir = shift(@PRUNE_QUEUE); + prep_alternate_start($next_dir, $send_prune) if defined($next_dir); my $fmt; if ($status == 1) { # unset, default is '' (SHA-1) $fmt = 'sha1'; @@ -801,7 +804,20 @@ EOM open $ALT_FH{$fmt}, '>', $f or die "open($f): $!"; } say { $ALT_FH{$fmt} } $out or die "say: $!"; - # send_prune fires on the last one +} + +sub prep_alternate_start { + my ($git_dir, $send_prune) = @_; + my $o = $git_dir.'/objects'; + while (!-d $o) { + $git_dir = shift(@PRUNE_QUEUE) // return + $o = $git_dir.'/objects'; + } + my $cmd = [ 'git', "--git-dir=$git_dir", + qw(config extensions.objectFormat) ]; + open my $out, '+>', undef or die "open(tmp): $!"; + my $pid = spawn($cmd, undef, { 1 => $out }); + awaitpid($pid, \&prep_alternate_end, $o, $out, $send_prune); } sub init_prune ($) { @@ -812,14 +828,9 @@ sub init_prune ($) { require PublicInbox::Import; $TMPDIR = File::Temp->newdir('cidx-all-git-XXXX', TMPDIR => 1); my $send_prune = PublicInbox::OnDestroy->new($$, \&send_prune, $self); - my $cmd = [ 'git', undef, 'config', 'extensions.objectFormat' ]; - for (@{$self->{git_dirs}}) { - my $o = $_.'/objects'; - next if !-d $o; - $cmd->[1] = "--git-dir=$_"; - open my $out, '+>', undef or die "open(tmp): $!"; - my $pid = spawn($cmd, undef, { 1 => $out }); - awaitpid($pid, \&prep_alternate, $o, $out, $send_prune); + @PRUNE_QUEUE = @{$self->{git_dirs}}; + for (1..$LIVE_JOBS) { + prep_alternate_start(shift(@PRUNE_QUEUE) // last, $send_prune); } } @@ -846,7 +857,7 @@ sub cidx_run { # main entry point local $LIVE = {}; local $PRUNE_DONE = []; local $IDX_TODO = []; - local ($DO_QUIT, $REINDEX, $TXN_BYTES, @GIT_DIR_GONE, + local ($DO_QUIT, $REINDEX, $TXN_BYTES, @GIT_DIR_GONE, @PRUNE_QUEUE, $GIT_TODO, $REPO_CTX, %ALT_FH, $TMPDIR, %HEXLEN2TMPGIT); local $BATCH_BYTES = $self->{-opt}->{batch_size} // $PublicInbox::SearchIdx::BATCH_BYTES;