unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH] extindex: support --no-multi-pack-index
@ 2024-04-28 21:15 Eric Wong
  0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2024-04-28 21:15 UTC (permalink / raw)
  To: meta

git multi-pack-index files were creating swap storms and OOM-ing
on my system; so providing an option to disable it seems prudent
given the minor startup time regression.
---
 Documentation/public-inbox-extindex.pod | 13 +++++++++++++
 Documentation/public-inbox-index.pod    |  7 +++++++
 lib/PublicInbox/ExtSearchIdx.pm         |  2 +-
 script/public-inbox-extindex            |  2 +-
 script/public-inbox-index               |  1 +
 t/extsearch.t                           |  9 +++++++++
 6 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/Documentation/public-inbox-extindex.pod b/Documentation/public-inbox-extindex.pod
index b53e45ed..2db7d7e9 100644
--- a/Documentation/public-inbox-extindex.pod
+++ b/Documentation/public-inbox-extindex.pod
@@ -80,6 +80,19 @@ doubles the size of the already-large Xapian database.
 Used with C<--reindex>, it will only look for new and stale
 entries and not touch already-indexed messages.
 
+=item --no-multi-pack-index
+
+Disable writing a L<git-multi-pack-index(1)> file to save memory.
+Normally, enabling multi-pack-index speeds up startup time of
+subsequent L<git-cat-file(1)> processes by 3-4%, but generating
+this file requires several GB of memory with large repos.
+
+Unlike the C<core.multiPackIndex> directive in git, it's still
+possible to read existing multi-pack-index files if they are
+created elsewhere.
+
+Available in public-inbox 2.0.0+
+
 =back
 
 =head1 FILES
diff --git a/Documentation/public-inbox-index.pod b/Documentation/public-inbox-index.pod
index 14f157a5..f1a2180a 100644
--- a/Documentation/public-inbox-index.pod
+++ b/Documentation/public-inbox-index.pod
@@ -192,6 +192,13 @@ external indices are configured.
 Do not update the C<all> external index by default.  This negates
 all uses of C<-E> / C<--update-extindex=> on the command-line.
 
+=item --no-multi-pack-index
+
+Disables writing the multi-pack-index when using L</--update-extindex>.
+See L<public-inbox-extindex(1)/--no-multi-pack-index> for details.
+
+Available in public-inbox 2.0.0+
+
 =item --since=DATESTRING
 
 =item --after=DATESTRING
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index 763a124c..774fa47b 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -1287,7 +1287,7 @@ sub idx_init { # similar to V2Writable
 	($has_new || $prune_nr || $new ne '') and
 		$self->{mg}->write_alternates($mode, $alt, $new);
 	my $restore = $self->with_umask;
-	if ($git_midx) {
+	if ($git_midx && ($opt->{'multi-pack-index'} // 1)) {
 		my @cmd = ('multi-pack-index');
 		push @cmd, '--no-progress' if ($opt->{quiet}//0) > 1;
 		my $lk = $self->lock_for_scope;
diff --git a/script/public-inbox-extindex b/script/public-inbox-extindex
index bee824b1..2e5a5d2c 100755
--- a/script/public-inbox-extindex
+++ b/script/public-inbox-extindex
@@ -32,7 +32,7 @@ GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i
 		indexlevel|index-level|L=s max_size|max-size=s
 		batch_size|batch-size=s
 		dedupe:s@ gc commit-interval=i watch scan! dry-run|n
-		all C=s@ help|h))
+		multi-pack-index! all C=s@ help|h))
 	or die $help;
 if ($opt->{help}) { print $help; exit 0 };
 die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0;
diff --git a/script/public-inbox-index b/script/public-inbox-index
index 74232ebf..a13e44bf 100755
--- a/script/public-inbox-index
+++ b/script/public-inbox-index
@@ -44,6 +44,7 @@ GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune
 		batch_size|batch-size=s
 		since|after=s until|before=s
 		sequential-shard|seq-shard
+		multi-pack-index!
 		no-update-extindex update-extindex|E=s@
 		fast-noop|F skip-docdata all C=s@ help|h))
 	or die $help;
diff --git a/t/extsearch.t b/t/extsearch.t
index 090f6db5..797aa8f5 100644
--- a/t/extsearch.t
+++ b/t/extsearch.t
@@ -559,6 +559,15 @@ EOM
 	for (@xdb) {
 		ok(!$_->get_metadata('indexlevel'), 'no indexlevel in >0 shard')
 	}
+	my $mpi = "$d/ALL.git/objects/pack/multi-pack-index";
+	SKIP: {
+		skip 'git too old for for multi-pack-index', 2 if !-f $mpi;
+		unlink glob("$d/ALL.git/objects/pack/*");
+		ok run_script([qw(-extindex --all -L medium -j3
+				--no-multi-pack-index), $d]),
+				'test --no-multi-pack-index';
+		ok !-f $mpi, '--no-multi-pack-index respected';
+	}
 }
 
 test_lei(sub {

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2024-04-28 21:15 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-04-28 21:15 [PATCH] extindex: support --no-multi-pack-index Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).