From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id DAA0F1FB09 for ; Fri, 18 Dec 2020 12:09:51 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 11/26] search: simplify initialization, add ->xdb_shards_flat Date: Fri, 18 Dec 2020 12:09:35 +0000 Message-Id: <20201218120950.23272-12-e@80x24.org> In-Reply-To: <20201218120950.23272-1-e@80x24.org> References: <20201218120950.23272-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This reduces differences between v1 and v2 code, and introduces ->xdb_shards_flat to provide read-only access to shards without using Xapian::MultiDatabase. This will allow us to combine shards of several inboxes AND extindexes for lei. --- lib/PublicInbox/ExtSearch.pm | 6 ---- lib/PublicInbox/LeiSearch.pm | 5 ++- lib/PublicInbox/Search.pm | 65 ++++++++++++++--------------------- lib/PublicInbox/SearchIdx.pm | 15 ++++---- lib/PublicInbox/V2Writable.pm | 8 +---- 5 files changed, 34 insertions(+), 65 deletions(-) diff --git a/lib/PublicInbox/ExtSearch.pm b/lib/PublicInbox/ExtSearch.pm index 410ae958..7ce950bc 100644 --- a/lib/PublicInbox/ExtSearch.pm +++ b/lib/PublicInbox/ExtSearch.pm @@ -33,12 +33,6 @@ sub misc { sub search { $_[0] } # self -# overrides PublicInbox::Search::_xdb -sub _xdb { - my ($self) = @_; - $self->xdb_sharded; -} - # same as per-inbox ->over, for now... sub over { my ($self) = @_; diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm index 9cfd6ea2..66c16e04 100644 --- a/lib/PublicInbox/LeiSearch.pm +++ b/lib/PublicInbox/LeiSearch.pm @@ -9,8 +9,7 @@ use PublicInbox::Search; sub combined_docid ($$) { my ($self, $num) = @_; - my $nshard = ($self->{nshard} // 1); - ($num - 1) * $nshard + 1; + ($num - 1) * $self->{nshard} + 1; } sub msg_keywords { @@ -19,7 +18,7 @@ sub msg_keywords { my $docid = ref($num) ? $num->get_docid : do { # get combined docid from over.num: # (not generic Xapian, only works with our sharding scheme) - my $nshard = $self->{nshard} // 1; + my $nshard = $self->{nshard}; ($num - 1) * $nshard + $num % $nshard + 1; }; my %kw; diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index b1d38fb9..bbc5e32f 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -191,41 +191,37 @@ sub xdir ($;$) { } } -sub xdb_sharded { +# returns all shards as separate Xapian::Database objects w/o combining +sub xdb_shards_flat ($) { my ($self) = @_; - opendir(my $dh, $self->{xpfx}) or return; # not initialized yet - - # We need numeric sorting so shard[0] is first for reading - # Xapian metadata, if needed - my $last = max(grep(/\A[0-9]+\z/, readdir($dh))) // return; + my $xpfx = $self->{xpfx}; my (@xdb, $slow_phrase); - for (0..$last) { - my $shard_dir = "$self->{xpfx}/$_"; - if (-d $shard_dir && -r _) { + if ($xpfx =~ m/xapian${\SCHEMA_VERSION}\z/) { + @xdb = ($X{Database}->new($xpfx)); + $self->{qp_flags} |= FLAG_PHRASE() if !-f "$xpfx/iamchert"; + } else { + opendir(my $dh, $xpfx) or return (); # not initialized yet + # We need numeric sorting so shard[0] is first for reading + # Xapian metadata, if needed + my $last = max(grep(/\A[0-9]+\z/, readdir($dh))) // return (); + for (0..$last) { + my $shard_dir = "$self->{xpfx}/$_"; push @xdb, $X{Database}->new($shard_dir); $slow_phrase ||= -f "$shard_dir/iamchert"; - } else { # gaps from missing epochs throw off mdocid() - warn "E: $shard_dir missing or unreadable\n"; - return; } + $self->{qp_flags} |= FLAG_PHRASE() if !$slow_phrase; } - $self->{qp_flags} |= FLAG_PHRASE() if !$slow_phrase; - $self->{nshard} = scalar(@xdb); - my $xdb = shift @xdb; - $xdb->add_database($_) for @xdb; - $xdb; + @xdb; } sub _xdb { my ($self) = @_; - my $dir = xdir($self, 1); $self->{qp_flags} //= $QP_FLAGS; - if ($self->{ibx_ver} >= 2) { - xdb_sharded($self); - } else { - $self->{qp_flags} |= FLAG_PHRASE() if !-f "$dir/iamchert"; - $X{Database}->new($dir); - } + my @xdb = xdb_shards_flat($self) or return; + $self->{nshard} = scalar(@xdb); + my $xdb = shift @xdb; + $xdb->add_database($_) for @xdb; + $xdb; } # v2 Xapian docids don't conflict, so they're identical to @@ -239,7 +235,7 @@ sub mdocid { sub mset_to_artnums { my ($self, $mset) = @_; - my $nshard = $self->{nshard} // 1; + my $nshard = $self->{nshard}; [ map { mdocid($nshard, $_) } $mset->items ]; } @@ -251,25 +247,14 @@ sub xdb ($) { }; } -sub xpfx_init ($) { - my ($self) = @_; - if ($self->{ibx_ver} == 1) { - $self->{xpfx} .= '/public-inbox/xapian' . SCHEMA_VERSION; - } else { - $self->{xpfx} .= '/xap'.SCHEMA_VERSION; - } -} - sub new { my ($class, $ibx) = @_; ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx"; - my $self = bless { - xpfx => $ibx->{inboxdir}, # for xpfx_init + my $xap = $ibx->version > 1 ? 'xap' : 'public-inbox/xapian'; + bless { + xpfx => "$ibx->{inboxdir}/$xap" . SCHEMA_VERSION, altid => $ibx->{altid}, - ibx_ver => $ibx->version, }, $class; - xpfx_init($self); - $self; } sub reopen { @@ -362,7 +347,7 @@ sub _enquire_once { # retry_reopen callback sub mset_to_smsg { my ($self, $ibx, $mset) = @_; - my $nshard = $self->{nshard} // 1; + my $nshard = $self->{nshard}; my $i = 0; my %order = map { mdocid($nshard, $_) => ++$i } $mset->items; my @msgs = sort { diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 548f2114..7e2843e9 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -54,14 +54,11 @@ sub new { } } $ibx = PublicInbox::InboxWritable->new($ibx); - my $self = bless { - ibx => $ibx, - xpfx => $inboxdir, # for xpfx_init - -altid => $altid, - ibx_ver => $version, - indexlevel => $indexlevel, - }, $class; - $self->xpfx_init; + my $self = PublicInbox::Search->new($ibx); + bless $self, $class; + $self->{ibx} = $ibx; + $self->{-altid} = $altid; + $self->{indexlevel} = $indexlevel; $self->{-set_indexlevel_once} = 1 if $indexlevel eq 'medium'; if ($ibx->{-skip_docdata}) { $self->{-set_skip_docdata_once} = 1; @@ -408,7 +405,7 @@ sub add_xapian ($$$$) { sub _msgmap_init ($) { my ($self) = @_; - die "BUG: _msgmap_init is only for v1\n" if $self->{ibx_ver} != 1; + die "BUG: _msgmap_init is only for v1\n" if $self->{ibx}->version != 1; $self->{mm} //= eval { require PublicInbox::Msgmap; my $rw = $self->{ibx}->{-no_fsync} ? 2 : 1; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index e8a5fbd2..7d41b0f6 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -73,13 +73,7 @@ sub count_shards ($) { delete $ibx->{search}; $srch->{nshard} // 0 } else { # ExtSearchIdx - $self->{nshard} // do { - if ($self->xdb_sharded) { - $self->{nshard} // die 'BUG: {nshard} unset'; - } else { - 0; - } - } + $self->{nshard} ||= scalar($self->xdb_shards_flat); } }