From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 3E2D32009A for ; Tue, 27 Oct 2020 07:55:00 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 40/52] script: add preliminary eindex implementation Date: Tue, 27 Oct 2020 07:54:41 +0000 Message-Id: <20201027075453.19163-41-e@80x24.org> In-Reply-To: <20201027075453.19163-1-e@80x24.org> References: <20201027075453.19163-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Not documented, yet, but it runs... --- MANIFEST | 1 + script/public-inbox-eindex | 43 ++++++++++++++++++++++++++++++++++++++ t/extsearch.t | 26 +++++++++++++++++++++++ 3 files changed, 70 insertions(+) create mode 100644 script/public-inbox-eindex diff --git a/MANIFEST b/MANIFEST index 418a2f17..10561cd2 100644 --- a/MANIFEST +++ b/MANIFEST @@ -225,6 +225,7 @@ sa_config/user/.spamassassin/user_prefs script/public-inbox-compact script/public-inbox-convert script/public-inbox-edit +script/public-inbox-eindex script/public-inbox-httpd script/public-inbox-imapd script/public-inbox-index diff --git a/script/public-inbox-eindex b/script/public-inbox-eindex new file mode 100644 index 00000000..c26edb93 --- /dev/null +++ b/script/public-inbox-eindex @@ -0,0 +1,43 @@ +#!perl -w +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ +# Basic tool to create a Xapian search index for a public-inbox. +use strict; +use v5.10.1; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +my $help = < -1, compact => 0, max_size => undef, fsync => 1 }; +GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i + fsync|sync! + indexlevel|index-level|L=s max_size|max-size=s + batch_size|batch-size=s + skip-docdata all help|h)) + or die $help; +if ($opt->{help}) { print $help; exit 0 }; +die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; + +# require lazily to speed up --help +my $eidx_dir = shift(@ARGV) // die "E: $help"; +require PublicInbox::Admin; +my $cfg = PublicInbox::Config->new; +my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); +PublicInbox::Admin::require_or_die(qw(-search)); +require PublicInbox::ExtSearchIdx; +my $eidx = PublicInbox::ExtSearchIdx->new($eidx_dir, $opt); +$eidx->attach_inbox($_) for @ibxs; +$eidx->eidx_sync($opt); diff --git a/t/extsearch.t b/t/extsearch.t index 54927c50..dfec6b6f 100644 --- a/t/extsearch.t +++ b/t/extsearch.t @@ -4,9 +4,35 @@ use strict; use Test::More; use PublicInbox::TestCommon; +use Fcntl qw(:seek); require_git(2.6); require_mods(qw(DBD::SQLite Search::Xapian)); use_ok 'PublicInbox::ExtSearch'; use_ok 'PublicInbox::ExtSearchIdx'; +my ($home, $for_destroy) = tmpdir(); +local $ENV{HOME} = $home; +mkdir "$home/.public-inbox" or BAIL_OUT $!; +open my $fh, '>', "$home/.public-inbox/config" or BAIL_OUT $!; +print $fh < $v2addr }; +open($fh, '<', 't/utf8.eml') or BAIL_OUT("open t/utf8.eml: $!"); +run_script(['-mda', '--no-precheck'], $env, { 0 => $fh }) or BAIL_OUT '-mda'; + +ok(run_script([qw(-init -V1 v1test), "$home/v1test", + 'http://example.com/v1test', $v1addr ]), 'v1test init'); +$env = { ORIGINAL_RECIPIENT => $v1addr }; +seek($fh, 0, SEEK_SET) or BAIL_OUT $!; +run_script(['-mda', '--no-precheck'], $env, { 0 => $fh }) or BAIL_OUT '-mda'; +run_script(['-index', "$home/v1test"]) or BAIL_OUT "index $?"; + +ok(run_script([qw(-eindex --all), "$home/eindex"]), 'eindex init'); done_testing;