unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH] index|extindex: support --dangerous flag
@ 2022-03-07 10:57 Eric Wong
  0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2022-03-07 10:57 UTC (permalink / raw)
  To: meta

This enables Xapian::DB_DANGEROUS to support in-place updates.
This can speed up the initial index and reduce I/O at the cost
of preventing concurrent readers and being unsafe in the face of
any abnormal terminations.  This is more dangerous than
--no-fsync.  --no-fsync is only unsafe in the event of a power
loss or kernel crash; --dangerous is unsafe even on SIGKILL.
---
 Documentation/public-inbox-extindex.pod | 14 ++++++++++++--
 Documentation/public-inbox-index.pod    |  9 ++++++++-
 lib/PublicInbox/ExtSearchIdx.pm         |  3 ++-
 lib/PublicInbox/MiscIdx.pm              |  1 +
 lib/PublicInbox/SearchIdx.pm            |  9 ++++++++-
 script/public-inbox-extindex            |  4 ++--
 script/public-inbox-index               |  3 ++-
 t/extsearch.t                           |  5 +++--
 t/indexlevels-mirror.t                  |  4 ++--
 9 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/Documentation/public-inbox-extindex.pod b/Documentation/public-inbox-extindex.pod
index 9731dfb0..f71a90e5 100644
--- a/Documentation/public-inbox-extindex.pod
+++ b/Documentation/public-inbox-extindex.pod
@@ -24,7 +24,17 @@ along with L<DBD::SQLite> and L<DBI> Perl modules.
 
 =item --jobs=JOBS
 
-... TODO, see L<public-inbox-index(5)>
+=item --no-fsync
+
+=item --dangerous
+
+=item --rethread
+
+=item --max-size SIZE
+
+=item --batch-size SIZE
+
+These switches behave as they do for L<public-inbox-index(1)>
 
 =item --all
 
@@ -119,7 +129,7 @@ L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/>
 
 =head1 COPYRIGHT
 
-Copyright 2021 all contributors L<mailto:meta@public-inbox.org>
+Copyright all contributors L<mailto:meta@public-inbox.org>
 
 License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
 
diff --git a/Documentation/public-inbox-index.pod b/Documentation/public-inbox-index.pod
index c92b6de4..011ade3c 100644
--- a/Documentation/public-inbox-index.pod
+++ b/Documentation/public-inbox-index.pod
@@ -150,6 +150,13 @@ data to accumulate, resulting on latency spikes from writeback.
 
 Available in public-inbox 1.6.0+.
 
+=item --dangerous
+
+Speed up initial index by using in-place updates and denying support for
+concurrent readers.  This is only effective with Xapian 1.4+.
+
+Available in public-inbox 1.8.0+
+
 =item --sequential-shard
 
 Sets or overrides L</publicinbox.indexSequentialShard> on a
@@ -324,7 +331,7 @@ L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/>
 
 =head1 COPYRIGHT
 
-Copyright 2016-2021 all contributors L<mailto:meta@public-inbox.org>
+Copyright all contributors L<mailto:meta@public-inbox.org>
 
 License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
 
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index 4b46fa16..7c44a1a4 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Detached/external index cross inbox search indexing support
@@ -59,6 +59,7 @@ sub new {
 		nproc_shards({ nproc => $opt->{jobs} });
 	my $oidx = PublicInbox::OverIdx->new("$self->{xpfx}/over.sqlite3");
 	$self->{-no_fsync} = $oidx->{-no_fsync} = 1 if !$opt->{fsync};
+	$self->{-dangerous} = 1 if $opt->{dangerous};
 	$self->{oidx} = $oidx;
 	$self
 }
diff --git a/lib/PublicInbox/MiscIdx.pm b/lib/PublicInbox/MiscIdx.pm
index dc15442d..5faf5c66 100644
--- a/lib/PublicInbox/MiscIdx.pm
+++ b/lib/PublicInbox/MiscIdx.pm
@@ -31,6 +31,7 @@ sub new {
 	PublicInbox::Syscall::nodatacow_dir($mi_dir);
 	my $flags = $PublicInbox::SearchIdx::DB_CREATE_OR_OPEN;
 	$flags |= $PublicInbox::SearchIdx::DB_NO_SYNC if $eidx->{-no_fsync};
+	$flags |= $PublicInbox::SearchIdx::DB_DANGEROUS if $eidx->{-dangerous};
 	$json //= PublicInbox::Config::json();
 	bless {
 		mi_dir => $mi_dir,
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 95b14c3a..85fae4ad 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -30,6 +30,7 @@ our @EXPORT_OK = qw(log2stack is_ancestor check_size prepare_stack
 my $X = \%PublicInbox::Search::X;
 our ($DB_CREATE_OR_OPEN, $DB_OPEN);
 our $DB_NO_SYNC = 0;
+our $DB_DANGEROUS = 0;
 our $BATCH_BYTES = $ENV{XAPIAN_FLUSH_THRESHOLD} ? 0x7fffffff :
 	# assume a typical 64-bit system has 8x more RAM than a
 	# typical 32-bit system:
@@ -115,7 +116,10 @@ sub load_xapian_writable () {
 	my $ver = (eval($xap.'::major_version()') << 16) |
 		(eval($xap.'::minor_version()') << 8) |
 		eval($xap.'::revision()');
-	$DB_NO_SYNC = 0x4 if $ver >= 0x10400;
+	if ($ver >= 0x10400) {
+		$DB_NO_SYNC = 0x4;
+		$DB_DANGEROUS = 0x10;
+	}
 	# Xapian v1.2.21..v1.2.24 were missing close-on-exec on OFD locks
 	$X->{CLOEXEC_UNSET} = 1 if $ver >= 0x010215 && $ver <= 0x010218;
 	1;
@@ -142,6 +146,9 @@ sub idx_acquire {
 			require PublicInbox::Syscall;
 			PublicInbox::Syscall::nodatacow_dir($dir);
 			$self->{-set_has_threadid_once} = 1;
+			if (($self->{ibx} // $self->{eidx})->{-dangerous}) {
+				$flag |= $DB_DANGEROUS;
+			}
 		}
 	}
 	return unless defined $flag;
diff --git a/script/public-inbox-extindex b/script/public-inbox-extindex
index c63f5dc2..bee824b1 100755
--- a/script/public-inbox-extindex
+++ b/script/public-inbox-extindex
@@ -1,5 +1,5 @@
 #!perl -w
-# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 use strict;
 use v5.10.1;
@@ -28,7 +28,7 @@ See public-inbox-extindex(1) man page for full documentation.
 EOF
 my $opt = { quiet => -1, compact => 0, fsync => 1, scan => 1 };
 GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i
-		fsync|sync! fast
+		fsync|sync! fast dangerous
 		indexlevel|index-level|L=s max_size|max-size=s
 		batch_size|batch-size=s
 		dedupe:s@ gc commit-interval=i watch scan! dry-run|n
diff --git a/script/public-inbox-index b/script/public-inbox-index
index 053d8b94..a04be9fc 100755
--- a/script/public-inbox-index
+++ b/script/public-inbox-index
@@ -39,7 +39,7 @@ my $opt = {
 	'update-extindex' => [], # ":s@" optional arg sets '' if no arg given
 };
 GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune
-		fsync|sync! xapian_only|xapian-only
+		fsync|sync! xapian_only|xapian-only dangerous
 		indexlevel|index-level|L=s max_size|max-size=s
 		batch_size|batch-size=s
 		since|after=s until|before=s
@@ -126,6 +126,7 @@ for my $ibx (@ibxs) {
 		PublicInbox::Xapcmd::run($ibx, 'compact', $opt->{compact_opt});
 	}
 	$ibx->{-no_fsync} = 1 if !$opt->{fsync};
+	$ibx->{-dangerous} = 1 if $opt->{dangerous};
 	$ibx->{-skip_docdata} //= $opt->{'skip-docdata'};
 
 	my $ibx_opt = $opt;
diff --git a/t/extsearch.t b/t/extsearch.t
index dfc190e2..09cbdabe 100644
--- a/t/extsearch.t
+++ b/t/extsearch.t
@@ -1,5 +1,5 @@
 #!perl -w
-# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 use strict;
 use Test::More;
@@ -54,7 +54,8 @@ run_script(['-mda', '--no-precheck'], $env, { 0 => $fh }) or BAIL_OUT '-mda';
 
 run_script([qw(-index -Lbasic), "$home/v1test"]) or BAIL_OUT "index $?";
 
-ok(run_script([qw(-extindex --all), "$home/extindex"]), 'extindex init');
+ok(run_script([qw(-extindex --dangerous --all), "$home/extindex"]),
+	'extindex init');
 {
 	my $es = PublicInbox::ExtSearch->new("$home/extindex");
 	ok($es->has_threadid, '->has_threadid');
diff --git a/t/indexlevels-mirror.t b/t/indexlevels-mirror.t
index e606e79b..ac85643d 100644
--- a/t/indexlevels-mirror.t
+++ b/t/indexlevels-mirror.t
@@ -1,5 +1,5 @@
 #!perl -w
-# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 use strict;
 use v5.10.1;
@@ -34,7 +34,7 @@ my $import_index_incremental = sub {
 	local $ENV{PI_CONFIG} = "$tmpdir/config";
 
 	# index master (required for v1)
-	my @cmd = (qw(-index -j0), $ibx->{inboxdir}, "-L$level");
+	my @cmd = (qw(-index -j0 --dangerous), $ibx->{inboxdir}, "-L$level");
 	push @cmd, '-c' if have_xapian_compact;
 	ok(run_script(\@cmd, undef, { 2 => \$err }), 'index master');
 	my $ro_master = PublicInbox::Inbox->new({

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2022-03-07 10:57 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-07 10:57 [PATCH] index|extindex: support --dangerous flag Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).