From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 911C81FC60 for ; Sat, 10 Aug 2024 09:00:15 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1723280415; bh=zeMFWhFZK+qyo8IR5biCNWpLJSKRBneQhWUNm7BVuEk=; h=From:To:Subject:Date:In-Reply-To:References:From; b=IrqGAqAsFE2q0DFomoEIu4/vwFsQuxvtZ2h1MNWj48i1kWjRBGcYW6o01pSXFc9ta SU/6pHXS2HId8VsF3tgtQlyljKHKPFNYaDCv5soO1T48rxUvKsoNPXuGqxOcZfaIrO onul9/QrbzQzqrKYX+XQKxey/McQvOt2e1dUBgHw= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 10/11] extindex: support extindex.*.indexheader Date: Sat, 10 Aug 2024 09:00:11 +0000 Message-ID: <20240810090012.23269-11-e@80x24.org> In-Reply-To: <20240810090012.23269-1-e@80x24.org> References: <20240810090012.23269-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Per-inbox (publicinbox.*.indexheader) is next... --- lib/PublicInbox/Config.pm | 3 ++- lib/PublicInbox/ExtSearchIdx.pm | 17 ++++++++++++++++- lib/PublicInbox/SearchIdx.pm | 1 + t/extsearch.t | 34 ++++++++++++++++++++++++++++++++- 4 files changed, 52 insertions(+), 3 deletions(-) diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index 3af5f23c..b40e96f1 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -565,12 +565,13 @@ sub _fill_ei ($$) { my $v = get_1($self, "$pfx.$k") // next; $es->{$k} = $v; } - for my $k (qw(coderepo hide url infourl)) { + for my $k (qw(coderepo hide url infourl indexheader altid)) { my $v = $self->{"$pfx.$k"} // next; $es->{$k} = _array($v); } return unless valid_foo_name($name, 'extindex'); $es->{name} = $name; + $es->load_extra_indexers($es); $es; } diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index 68700c8b..094821a3 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -35,6 +35,7 @@ use PublicInbox::Eml; use PublicInbox::DS qw(now add_timer); use DBI qw(:sql_types); # SQL_BLOB use PublicInbox::Admin qw(fmt_localtime); +use PublicInbox::Config qw(rel2abs_collapsed); sub new { my (undef, $dir, $opt) = @_; @@ -86,7 +87,21 @@ sub _ibx_attach { # each_inbox callback sub attach_config { my ($self, $cfg, $ibxs) = @_; $self->{cfg} = $cfg; - my $types; + my ($types, $ro); + + # lookup extindex.$NAME. + my $eidx_dir = rel2abs_collapsed($self->{topdir}); + for my $k (grep(/\Aextindex\.(?:.+)\.topdir\z/, keys %$cfg)) { + next if rel2abs_collapsed($cfg->{$k}) ne $eidx_dir; + my $n = substr($k, length('extindex.'), -length('.topdir')); + $ro = $cfg->lookup_ei($n) and last; + } + + # and copy from read-only to our read-write $self + for my $f (qw(altid indexheader)) { + $self->{$f} = $ro->{$f} if defined $ro->{$f}; + } + if ($ibxs) { for my $ibx (@$ibxs) { $self->{ibx_map}->{$ibx->eidx_key} //= do { diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index b2576e52..53c16e55 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -1169,6 +1169,7 @@ sub eidx_shard_new { creat => 1, }, $class; $self->{-set_indexlevel_once} = 1 if $self->{indexlevel} eq 'medium'; + $self->load_extra_indexers($eidx); $self; } diff --git a/t/extsearch.t b/t/extsearch.t index 16d75f63..0ea5bc5b 100644 --- a/t/extsearch.t +++ b/t/extsearch.t @@ -8,7 +8,7 @@ use PublicInbox::InboxWritable; require_git(2.6); require_mods(qw(json DBD::SQLite Xapian)); use autodie qw(chmod open rename truncate unlink); -require PublicInbox::Search; +use PublicInbox::Search; use_ok 'PublicInbox::ExtSearch'; use_ok 'PublicInbox::ExtSearchIdx'; use_ok 'PublicInbox::OverIdx'; @@ -26,6 +26,7 @@ ok(run_script([qw(-init -Lbasic -V2 v2test --newsgroup v2.example), "$home/v2test", 'http://example.com/v2test', $v2addr ]), 'v2test init'); my $env = { ORIGINAL_RECIPIENT => $v2addr }; my $eml = eml_load('t/utf8.eml'); +my $eidxdir = "$home/extindex"; $eml->header_set('List-Id', ''); @@ -592,4 +593,35 @@ test_lei(sub { 'noted unindexed extindex is unsupported'); }); +if ('indexheader support') { + xsys_e [qw(git config extindex.all.indexheader + boolean_term:xarchiveshash:X-Archives-Hash)], + { GIT_CONFIG => $cfg_path }; + my $eml = eml_load('t/plack-qp.eml'); + $eml->header_set('X-Archives-Hash', 'deadbeefcafe'); + $in = \($eml->as_string); + $env->{ORIGINAL_RECIPIENT} = $v2addr; + run_script([qw(-mda --no-precheck)], $env, { 0 => $in }) or + xbail '-mda'; + ok run_script([qw(-extindex --all -vvv), $eidxdir]), + 'extindex update'; + $es = PublicInbox::Config->new($cfg_path)->ALL; + my $mset = $es->mset('xarchiveshash:deadbeefcafe'); + is $mset->size, 1, 'extindex.*.indexheader works'; + local $PublicInbox::Search::XHC = eval { + require PublicInbox::XhcMset; + PublicInbox::XapClient::start_helper('-j0'); + } or xbail "no XHC: $@"; + my @args; + $es->async_mset('xarchiveshash:deadbeefcafe', {} , sub { @args = @_ }); + is scalar(@args), 2, 'no extra args on hit'; + is $args[0]->size, 1, 'async mset hit works'; + ok !$args[1], 'no error on hit'; + @args = (); + $es->async_mset('xarchiveshash:cafebeefdead', {} , sub { @args = @_ }); + is scalar(@args), 2, 'no extra args on miss'; + is $args[0]->size, 0, 'async mset miss works'; + ok !$args[1], 'no error on miss'; +} + done_testing;