unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 11/26] search: simplify initialization, add ->xdb_shards_flat
Date: Fri, 18 Dec 2020 12:09:35 +0000	[thread overview]
Message-ID: <20201218120950.23272-12-e@80x24.org> (raw)
In-Reply-To: <20201218120950.23272-1-e@80x24.org>

This reduces differences between v1 and v2 code, and
introduces ->xdb_shards_flat to provide read-only access
to shards without using Xapian::MultiDatabase.  This
will allow us to combine shards of several inboxes
AND extindexes for lei.
---
 lib/PublicInbox/ExtSearch.pm  |  6 ----
 lib/PublicInbox/LeiSearch.pm  |  5 ++-
 lib/PublicInbox/Search.pm     | 65 ++++++++++++++---------------------
 lib/PublicInbox/SearchIdx.pm  | 15 ++++----
 lib/PublicInbox/V2Writable.pm |  8 +----
 5 files changed, 34 insertions(+), 65 deletions(-)

diff --git a/lib/PublicInbox/ExtSearch.pm b/lib/PublicInbox/ExtSearch.pm
index 410ae958..7ce950bc 100644
--- a/lib/PublicInbox/ExtSearch.pm
+++ b/lib/PublicInbox/ExtSearch.pm
@@ -33,12 +33,6 @@ sub misc {
 
 sub search { $_[0] } # self
 
-# overrides PublicInbox::Search::_xdb
-sub _xdb {
-	my ($self) = @_;
-	$self->xdb_sharded;
-}
-
 # same as per-inbox ->over, for now...
 sub over {
 	my ($self) = @_;
diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm
index 9cfd6ea2..66c16e04 100644
--- a/lib/PublicInbox/LeiSearch.pm
+++ b/lib/PublicInbox/LeiSearch.pm
@@ -9,8 +9,7 @@ use PublicInbox::Search;
 
 sub combined_docid ($$) {
 	my ($self, $num) = @_;
-	my $nshard = ($self->{nshard} // 1);
-	($num - 1) * $nshard  + 1;
+	($num - 1) * $self->{nshard} + 1;
 }
 
 sub msg_keywords {
@@ -19,7 +18,7 @@ sub msg_keywords {
 	my $docid = ref($num) ? $num->get_docid : do {
 		# get combined docid from over.num:
 		# (not generic Xapian, only works with our sharding scheme)
-		my $nshard = $self->{nshard} // 1;
+		my $nshard = $self->{nshard};
 		($num - 1) * $nshard + $num % $nshard + 1;
 	};
 	my %kw;
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index b1d38fb9..bbc5e32f 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -191,41 +191,37 @@ sub xdir ($;$) {
 	}
 }
 
-sub xdb_sharded {
+# returns all shards as separate Xapian::Database objects w/o combining
+sub xdb_shards_flat ($) {
 	my ($self) = @_;
-	opendir(my $dh, $self->{xpfx}) or return; # not initialized yet
-
-	# We need numeric sorting so shard[0] is first for reading
-	# Xapian metadata, if needed
-	my $last = max(grep(/\A[0-9]+\z/, readdir($dh))) // return;
+	my $xpfx = $self->{xpfx};
 	my (@xdb, $slow_phrase);
-	for (0..$last) {
-		my $shard_dir = "$self->{xpfx}/$_";
-		if (-d $shard_dir && -r _) {
+	if ($xpfx =~ m/xapian${\SCHEMA_VERSION}\z/) {
+		@xdb = ($X{Database}->new($xpfx));
+		$self->{qp_flags} |= FLAG_PHRASE() if !-f "$xpfx/iamchert";
+	} else {
+		opendir(my $dh, $xpfx) or return (); # not initialized yet
+		# We need numeric sorting so shard[0] is first for reading
+		# Xapian metadata, if needed
+		my $last = max(grep(/\A[0-9]+\z/, readdir($dh))) // return ();
+		for (0..$last) {
+			my $shard_dir = "$self->{xpfx}/$_";
 			push @xdb, $X{Database}->new($shard_dir);
 			$slow_phrase ||= -f "$shard_dir/iamchert";
-		} else { # gaps from missing epochs throw off mdocid()
-			warn "E: $shard_dir missing or unreadable\n";
-			return;
 		}
+		$self->{qp_flags} |= FLAG_PHRASE() if !$slow_phrase;
 	}
-	$self->{qp_flags} |= FLAG_PHRASE() if !$slow_phrase;
-	$self->{nshard} = scalar(@xdb);
-	my $xdb = shift @xdb;
-	$xdb->add_database($_) for @xdb;
-	$xdb;
+	@xdb;
 }
 
 sub _xdb {
 	my ($self) = @_;
-	my $dir = xdir($self, 1);
 	$self->{qp_flags} //= $QP_FLAGS;
-	if ($self->{ibx_ver} >= 2) {
-		xdb_sharded($self);
-	} else {
-		$self->{qp_flags} |= FLAG_PHRASE() if !-f "$dir/iamchert";
-		$X{Database}->new($dir);
-	}
+	my @xdb = xdb_shards_flat($self) or return;
+	$self->{nshard} = scalar(@xdb);
+	my $xdb = shift @xdb;
+	$xdb->add_database($_) for @xdb;
+	$xdb;
 }
 
 # v2 Xapian docids don't conflict, so they're identical to
@@ -239,7 +235,7 @@ sub mdocid {
 
 sub mset_to_artnums {
 	my ($self, $mset) = @_;
-	my $nshard = $self->{nshard} // 1;
+	my $nshard = $self->{nshard};
 	[ map { mdocid($nshard, $_) } $mset->items ];
 }
 
@@ -251,25 +247,14 @@ sub xdb ($) {
 	};
 }
 
-sub xpfx_init ($) {
-	my ($self) = @_;
-	if ($self->{ibx_ver} == 1) {
-		$self->{xpfx} .= '/public-inbox/xapian' . SCHEMA_VERSION;
-	} else {
-		$self->{xpfx} .= '/xap'.SCHEMA_VERSION;
-	}
-}
-
 sub new {
 	my ($class, $ibx) = @_;
 	ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx";
-	my $self = bless {
-		xpfx => $ibx->{inboxdir}, # for xpfx_init
+	my $xap = $ibx->version > 1 ? 'xap' : 'public-inbox/xapian';
+	bless {
+		xpfx => "$ibx->{inboxdir}/$xap" . SCHEMA_VERSION,
 		altid => $ibx->{altid},
-		ibx_ver => $ibx->version,
 	}, $class;
-	xpfx_init($self);
-	$self;
 }
 
 sub reopen {
@@ -362,7 +347,7 @@ sub _enquire_once { # retry_reopen callback
 
 sub mset_to_smsg {
 	my ($self, $ibx, $mset) = @_;
-	my $nshard = $self->{nshard} // 1;
+	my $nshard = $self->{nshard};
 	my $i = 0;
 	my %order = map { mdocid($nshard, $_) => ++$i } $mset->items;
 	my @msgs = sort {
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 548f2114..7e2843e9 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -54,14 +54,11 @@ sub new {
 		}
 	}
 	$ibx = PublicInbox::InboxWritable->new($ibx);
-	my $self = bless {
-		ibx => $ibx,
-		xpfx => $inboxdir, # for xpfx_init
-		-altid => $altid,
-		ibx_ver => $version,
-		indexlevel => $indexlevel,
-	}, $class;
-	$self->xpfx_init;
+	my $self = PublicInbox::Search->new($ibx);
+	bless $self, $class;
+	$self->{ibx} = $ibx;
+	$self->{-altid} = $altid;
+	$self->{indexlevel} = $indexlevel;
 	$self->{-set_indexlevel_once} = 1 if $indexlevel eq 'medium';
 	if ($ibx->{-skip_docdata}) {
 		$self->{-set_skip_docdata_once} = 1;
@@ -408,7 +405,7 @@ sub add_xapian ($$$$) {
 
 sub _msgmap_init ($) {
 	my ($self) = @_;
-	die "BUG: _msgmap_init is only for v1\n" if $self->{ibx_ver} != 1;
+	die "BUG: _msgmap_init is only for v1\n" if $self->{ibx}->version != 1;
 	$self->{mm} //= eval {
 		require PublicInbox::Msgmap;
 		my $rw = $self->{ibx}->{-no_fsync} ? 2 : 1;
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index e8a5fbd2..7d41b0f6 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -73,13 +73,7 @@ sub count_shards ($) {
 		delete $ibx->{search};
 		$srch->{nshard} // 0
 	} else { # ExtSearchIdx
-		$self->{nshard} // do {
-			if ($self->xdb_sharded) {
-				$self->{nshard} // die 'BUG: {nshard} unset';
-			} else {
-				0;
-			}
-		}
+		$self->{nshard} ||= scalar($self->xdb_shards_flat);
 	}
 }
 

  parent reply	other threads:[~2020-12-18 12:09 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
2020-12-18 12:09 ` [PATCH 01/26] lei: FD-passing and IPC basics Eric Wong
2020-12-18 12:09 ` [PATCH 02/26] lei: proposed command-listing and options Eric Wong
2021-02-18 20:42   ` lei q --save-as=... requires too much thinking Eric Wong
2020-12-18 12:09 ` [PATCH 03/26] lei_store: local storage for Local Email Interface Eric Wong
2020-12-18 12:09 ` [PATCH 04/26] tests: more common JSON module loading Eric Wong
2020-12-18 12:09 ` [PATCH 05/26] lei: use spawn (vfork + execve) for lazy start Eric Wong
2020-12-18 12:09 ` [PATCH 06/26] lei: refine help/option parsing, implement "init" Eric Wong
2020-12-18 12:09 ` [PATCH 07/26] t/lei-oneshot: standalone oneshot (non-socket) test Eric Wong
2020-12-18 12:09 ` [PATCH 08/26] lei: ensure we run a restrictive umask Eric Wong
2020-12-18 12:09 ` [PATCH 09/26] lei: support `daemon-env' for modifying long-lived env Eric Wong
2020-12-18 12:09 ` [PATCH 10/26] lei_store: simplify git_epoch_max, slightly Eric Wong
2020-12-18 12:09 ` Eric Wong [this message]
2020-12-18 12:09 ` [PATCH 12/26] rename LeiDaemon package to PublicInbox::LEI Eric Wong
2020-12-18 12:09 ` [PATCH 13/26] lei: support pass-through for `lei config' Eric Wong
2020-12-18 12:09 ` [PATCH 14/26] lei: help: show actual paths being operated on Eric Wong
2020-12-18 12:09 ` [PATCH 15/26] lei: rename $client => $self and bless Eric Wong
2020-12-18 12:09 ` [PATCH 16/26] lei: micro-optimize startup time Eric Wong
2020-12-18 12:09 ` [PATCH 17/26] lei_store: relax GIT_COMMITTER_IDENT check Eric Wong
2020-12-18 12:09 ` [PATCH 18/26] lei_store: keyword extraction from mbox and Maildir Eric Wong
2020-12-18 12:09 ` [PATCH 19/26] on_destroy: generic localized END Eric Wong
2020-12-18 12:09 ` [PATCH 20/26] lei: restore default __DIE__ handler for event loop Eric Wong
2020-12-18 12:09 ` [PATCH 21/26] lei: drop $SIG{__DIE__}, add oneshot fallbacks Eric Wong
2020-12-18 12:09 ` [PATCH 22/26] lei: start working on bash completion Eric Wong
2020-12-18 12:09 ` [PATCH 23/26] build: add lei.sh + "make symlink-install" target Eric Wong
2020-12-18 12:09 ` [PATCH 24/26] lei: support for -$DIGIT and -$SIG CLI switches Eric Wong
2020-12-18 12:09 ` [PATCH 25/26] lei: revise output routines Eric Wong
2020-12-18 12:09 ` [PATCH 26/26] lei: extinbox: start implementing in config file Eric Wong
2020-12-18 20:23   ` Eric Wong
2020-12-27 20:02   ` [PATCH 27/26] lei_xsearch: cross-(inbox|extindex) search Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201218120950.23272-12-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).