From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 14AF41FFB1 for ; Fri, 18 Dec 2020 12:09:53 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 26/26] lei: extinbox: start implementing in config file Date: Fri, 18 Dec 2020 12:09:50 +0000 Message-Id: <20201218120950.23272-27-e@80x24.org> In-Reply-To: <20201218120950.23272-1-e@80x24.org> References: <20201218120950.23272-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: They need to be indexed by MiscIdx, but MiscIdx still needs more work to support faster config loading when dealing with ~100K data sources. --- lib/PublicInbox/LEI.pm | 19 ++++----- lib/PublicInbox/LeiExtinbox.pm | 52 ++++++++++++++++++++++++ t/lei.t | 72 ++++++++++++++++++++++++++++++++-- 3 files changed, 130 insertions(+), 13 deletions(-) create mode 100644 lib/PublicInbox/LeiExtinbox.pm diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 97c5d91b..b254e2c5 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -8,7 +8,7 @@ package PublicInbox::LEI; use strict; use v5.10.1; -use parent qw(PublicInbox::DS); +use parent qw(PublicInbox::DS PublicInbox::LeiExtinbox); use Getopt::Long (); use Socket qw(AF_UNIX SOCK_STREAM pack_sockaddr_un); use Errno qw(EAGAIN ECONNREFUSED ENOENT); @@ -79,12 +79,12 @@ our %CMD = ( # sorted in order of importance/use: 'add-extinbox' => [ 'URL_OR_PATHNAME', 'add/set priority of a publicinbox|extindex for extra matches', - qw(prio=i) ], + qw(boost=i quiet|q) ], 'ls-extinbox' => [ '[FILTER...]', 'list publicinbox|extindex locations', - qw(format|f=s z local remote) ], + qw(format|f=s z|0 local remote quiet|q) ], 'forget-extinbox' => [ '{URL_OR_PATHNAME|--prune}', 'exclude further results from a publicinbox|extindex', - qw(prune) ], + qw(prune quiet|q) ], 'ls-query' => [ '[FILTER...]', 'list saved search queries', qw(name-only format|f=s z) ], @@ -107,7 +107,7 @@ our %CMD = ( # sorted in order of importance/use: # code repos are used for `show' to solve blobs from patch mails 'add-coderepo' => [ 'PATHNAME', 'add or set priority of a git code repo', - qw(prio=i) ], + qw(boost=i) ], 'ls-coderepo' => [ '[FILTER_TERMS...]', 'list known code repos', qw(format|f=s z) ], 'forget-coderepo' => [ 'PATHNAME', @@ -197,7 +197,7 @@ my %OPTDESC = ( 'sort|s=s@' => [ 'VAL|internaldate,date,relevance,docid', "order of results `--output'-dependent"], -'prio=i' => 'priority of query source', +'boost=i' => 'increase/decrease priority of results (default: 0)', 'local' => 'limit operations to the local filesystem', 'local!' => 'exclude results from the local filesystem', @@ -217,8 +217,7 @@ my %OPTDESC = ( 'by-mid|mid:s' => [ 'MID', 'match only by Message-ID, ignoring contents' ], 'jobs:i' => 'set parallelism level', -# xargs, env, use "-0", git(1) uses "-z". Should we support z|0 everywhere? -'z' => 'use NUL \\0 instead of newline (CR) to delimit lines', +# xargs, env, use "-0", git(1) uses "-z". We support z|0 everywhere 'z|0' => 'use NUL \\0 instead of newline (CR) to delimit lines', # note: no "--ignore-environment" / "-i" support like env(1) since that @@ -455,7 +454,9 @@ sub _lei_store ($;$) { $cfg->{-lei_store} //= do { require PublicInbox::LeiStore; PublicInbox::SearchIdx::load_xapian_writable(); - defined(my $dir = $cfg->{'leistore.dir'}) or return; + my $dir = $cfg->{'leistore.dir'}; + $dir //= _store_path($self->{env}) if $creat; + return unless $dir; PublicInbox::LeiStore->new($dir, { creat => $creat }); }; } diff --git a/lib/PublicInbox/LeiExtinbox.pm b/lib/PublicInbox/LeiExtinbox.pm new file mode 100644 index 00000000..2f52b115 --- /dev/null +++ b/lib/PublicInbox/LeiExtinbox.pm @@ -0,0 +1,52 @@ +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ + +# *-extinbox commands of lei +package PublicInbox::LeiExtinbox; +use strict; +use v5.10.1; +use parent qw(Exporter); +our @EXPORT = qw(lei_ls_extinbox lei_add_extinbox lei_forget_extinbox); + +sub lei_ls_extinbox { + my ($self, @argv) = @_; + my $stor = $self->_lei_store(0); + my $cfg = $self->_lei_cfg(0); + my $out = $self->{1}; + my ($OFS, $ORS) = $self->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n"); + my (%boost, @loc); + for my $sec (grep(/\Aextinbox\./, @{$cfg->{-section_order}})) { + my $loc = substr($sec, length('extinbox.')); + $boost{$loc} = $cfg->{"$sec.boost"}; + push @loc, $loc; + } + my $out = $self->{1}; + use sort 'stable'; + # highest boost first, but stable for alphabetic tie break + for (sort { $boost{$b} <=> $boost{$a} } sort keys %boost) { + # TODO: use miscidx and show docid so forget/set is easier + print $out $_, $OFS, 'boost=', $boost{$_}, $ORS; + } +} + +sub lei_add_extinbox { + my ($self, $url_or_dir) = @_; + my $cfg = $self->_lei_cfg(1); + if ($url_or_dir !~ m!\Ahttps?://!) { + $url_or_dir = File::Spec->canonpath($url_or_dir); + } + my $new_boost = $self->{opt}->{boost} // 0; + my $key = "extinbox.$url_or_dir.boost"; + my $cur_boost = $cfg->{$key}; + return if defined($cur_boost) && $cur_boost == $new_boost; # idempotent + $self->lei_config($key, $new_boost); + my $stor = $self->_lei_store(1); + # TODO: add to MiscIdx + $stor->done; +} + +sub lei_forget_extinbox { + # TODO +} + +1; diff --git a/t/lei.t b/t/lei.t index 30f9d2b6..a95a0efc 100644 --- a/t/lei.t +++ b/t/lei.t @@ -7,17 +7,18 @@ use Test::More; use PublicInbox::TestCommon; use PublicInbox::Config; use File::Path qw(rmtree); +require_git 2.6; require_mods(qw(json DBD::SQLite Search::Xapian)); my $LEI = 'lei'; my $opt = { 1 => \(my $out = ''), 2 => \(my $err = '') }; my $lei = sub { - my ($cmd, $env, $opt) = @_; + my ($cmd, $env, $xopt) = @_; $out = $err = ''; if (!ref($cmd)) { - ($env, $opt) = grep { (!defined) || ref } @_; - $cmd = [ grep { defined } @_ ]; + ($env, $xopt) = grep { (!defined) || ref } @_; + $cmd = [ grep { defined && !ref } @_ ]; } - run_script([$LEI, @$cmd], $env, $opt); + run_script([$LEI, @$cmd], $env, $xopt // $opt); }; my ($home, $for_destroy) = tmpdir(); @@ -29,6 +30,8 @@ local $ENV{FOO} = 'BAR'; mkdir "$home/xdg_run", 0700 or BAIL_OUT "mkdir: $!"; my $home_trash = [ "$home/.local", "$home/.config" ]; my $cleanup = sub { rmtree([@$home_trash, @_]) }; +my $config_file = "$home/.config/lei/config"; +my $store_dir = "$home/.local/share/lei"; my $test_help = sub { ok(!$lei->([], undef, $opt), 'no args fails'); @@ -118,10 +121,71 @@ my $test_config = sub { ok(!-f "$home/config/f", 'no file created'); }; +my $setup_publicinboxes = sub { + state $done = ''; + return if $done eq $home; + use PublicInbox::InboxWritable; + for my $V (1, 2) { + run_script([qw(-init -Lmedium), "-V$V", "t$V", + '--newsgroup', "t.$V", + "$home/t$V", "http://example.com/t$V", + "t$V\@example.com" ]) or BAIL_OUT "init v$V"; + } + my $cfg = PublicInbox::Config->new; + my $seen = 0; + $cfg->each_inbox(sub { + my ($ibx) = @_; + my $im = PublicInbox::InboxWritable->new($ibx)->importer(0); + my $V = $ibx->version; + my @eml = glob('t/*.eml'); + push(@eml, 't/data/0001.patch') if $V == 2; + for (@eml) { + next if $_ eq 't/psgi_v2-old.eml'; # dup mid + $im->add(eml_load($_)) or BAIL_OUT "v$V add $_"; + $seen++; + } + $im->done; + if ($V == 1) { + run_script(['-index', $ibx->{inboxdir}]) or + BAIL_OUT 'index v1'; + } + }); + $done = $home; + $seen || BAIL_OUT 'no imports'; +}; + +my $test_extinbox = sub { + $setup_publicinboxes->(); + $cleanup->(); + $lei->('ls-extinbox'); + is($out.$err, '', 'ls-extinbox no output, yet'); + ok(!-e $config_file && !-e $store_dir, + 'nothing created by ls-extinbox'); + + my $cfg = PublicInbox::Config->new; + $cfg->each_inbox(sub { + my ($ibx) = @_; + ok($lei->(qw(add-extinbox -q), $ibx->{inboxdir}), + 'added extinbox'); + is($out.$err, '', 'no output'); + }); + ok(-s $config_file && -e $store_dir, + 'add-extinbox created config + store'); + my $lcfg = PublicInbox::Config->new($config_file); + $cfg->each_inbox(sub { + my ($ibx) = @_; + is($lcfg->{"extinbox.$ibx->{inboxdir}.boost"}, 0, + "configured boost on $ibx->{name}"); + }); + $lei->('ls-extinbox'); + like($out, qr/boost=0\n/s, 'ls-extinbox has output'); +}; + my $test_lei_common = sub { $test_help->(); $test_config->(); $test_init->(); + $test_extinbox->(); }; my $test_lei_oneshot = $ENV{TEST_LEI_ONESHOT};