From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 710591FB05 for ; Mon, 23 Nov 2020 07:06:03 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 06/12] miscidx: put grokmirror manifest entries in Xapian docdata Date: Mon, 23 Nov 2020 07:05:56 +0000 Message-Id: <20201123070602.9698-7-e@80x24.org> In-Reply-To: <20201123070602.9698-1-e@80x24.org> References: <20201123070602.9698-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This should make it possible for us quickly generate manifest.js.gz files with less random I/O and process spawning in the WWW code. --- lib/PublicInbox/MiscIdx.pm | 15 +++++++++++++++ script/public-inbox-extindex | 1 + t/extsearch.t | 7 ++++++- t/miscsearch.t | 3 +++ 4 files changed, 25 insertions(+), 1 deletion(-) diff --git a/lib/PublicInbox/MiscIdx.pm b/lib/PublicInbox/MiscIdx.pm index edc70f9b..9dcc96b7 100644 --- a/lib/PublicInbox/MiscIdx.pm +++ b/lib/PublicInbox/MiscIdx.pm @@ -20,6 +20,7 @@ use PublicInbox::Spawn qw(nodatacow_dir); use Carp qw(croak); use File::Path (); use PublicInbox::MiscSearch; +use PublicInbox::Config; sub new { my ($class, $eidx) = @_; @@ -97,6 +98,20 @@ EOF } } index_text($self, $ibx->{name}, 1, 'XNAME'); + my $data = {}; + if (defined(my $max = $ibx->max_git_epoch)) { # v2 + my $desc = $ibx->description; + my $pfx = "/$ibx->{name}/git/"; + for my $epoch (0..$max) { + my $git = $ibx->git_epoch($epoch) or return; + if (my $ent = $git->manifest_entry($epoch, $desc)) { + $data->{"$pfx$epoch.git"} = $ent; + } + } + } elsif (my $ent = $ibx->git->manifest_entry) { # v1 + $data->{"/$ibx->{name}"} = $ent; + } + $doc->set_data(PublicInbox::Config::json()->encode($data)); if (defined $docid) { $xdb->replace_document($docid, $doc); } else { diff --git a/script/public-inbox-extindex b/script/public-inbox-extindex index 78d6d9d9..20a0737c 100644 --- a/script/public-inbox-extindex +++ b/script/public-inbox-extindex @@ -38,6 +38,7 @@ require PublicInbox::Admin; my $cfg = PublicInbox::Config->new; my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); PublicInbox::Admin::require_or_die(qw(-search)); +PublicInbox::Config::json() or die "Cpanel::JSON::XS or similar missing\n"; PublicInbox::Admin::progress_prepare($opt); my $env = PublicInbox::Admin::index_prepare($opt, $cfg); local %ENV = (%ENV, %$env) if $env; diff --git a/t/extsearch.t b/t/extsearch.t index e28e2f71..dc825bf4 100644 --- a/t/extsearch.t +++ b/t/extsearch.t @@ -4,7 +4,9 @@ use strict; use Test::More; use PublicInbox::TestCommon; +use PublicInbox::Config; use Fcntl qw(:seek); +my $json = PublicInbox::Config::json() or plan skip_all => 'JSON missing'; require_git(2.6); require_mods(qw(DBD::SQLite Search::Xapian)); use_ok 'PublicInbox::ExtSearch'; @@ -73,6 +75,9 @@ my $es = PublicInbox::ExtSearch->new("$home/eindex"); } my $misc = $es->misc; -is(scalar($misc->mset('')->items), 2, 'two inboxes'); +my @it = $misc->mset('')->items; +is(scalar(@it), 2, 'two inboxes'); +like($it[0]->get_document->get_data, qr/v2test/, 'docdata matched v2'); +like($it[1]->get_document->get_data, qr/v1test/, 'docdata matched v1'); done_testing; diff --git a/t/miscsearch.t b/t/miscsearch.t index 45a19da9..0ba79194 100644 --- a/t/miscsearch.t +++ b/t/miscsearch.t @@ -50,5 +50,8 @@ is(scalar($mset->items), 1, 'match partial address'); $mset = $ms->mset('hope'); is(scalar($mset->items), 1, 'match name'); +my $mi = ($mset->items)[0]; +my $doc = $mi->get_document; +is($doc->get_data, '{}', 'stored empty data'); done_testing;