* [PATCH 01/26] lei: FD-passing and IPC basics
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
@ 2020-12-18 12:09 ` Eric Wong
2020-12-18 12:09 ` [PATCH 02/26] lei: proposed command-listing and options Eric Wong
` (24 subsequent siblings)
25 siblings, 0 replies; 30+ messages in thread
From: Eric Wong @ 2020-12-18 12:09 UTC (permalink / raw)
To: meta
The start of lei, a Local Email Interface. It'll support a
daemon via FD passing to avoid startup time penalties if
IO::FDPass is installed, but fall back to a slow one-shot mode
if not.
Compared to traditional socket daemon, FD passing should allow
us to eventually do stuff like run "git show" and still have
proper terminal support for pager and color.
---
MANIFEST | 3 +
lib/PublicInbox/Daemon.pm | 6 +-
lib/PublicInbox/LeiDaemon.pm | 303 +++++++++++++++++++++++++++++++++++
script/lei | 58 +++++++
t/lei.t | 80 +++++++++
5 files changed, 448 insertions(+), 2 deletions(-)
create mode 100644 lib/PublicInbox/LeiDaemon.pm
create mode 100755 script/lei
create mode 100644 t/lei.t
diff --git a/MANIFEST b/MANIFEST
index ac442606..7536b7c2 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -159,6 +159,7 @@ lib/PublicInbox/InboxIdle.pm
lib/PublicInbox/InboxWritable.pm
lib/PublicInbox/Isearch.pm
lib/PublicInbox/KQNotify.pm
+lib/PublicInbox/LeiDaemon.pm
lib/PublicInbox/Linkify.pm
lib/PublicInbox/Listener.pm
lib/PublicInbox/Lock.pm
@@ -226,6 +227,7 @@ sa_config/Makefile
sa_config/README
sa_config/root/etc/spamassassin/public-inbox.pre
sa_config/user/.spamassassin/user_prefs
+script/lei
script/public-inbox-compact
script/public-inbox-convert
script/public-inbox-edit
@@ -316,6 +318,7 @@ t/indexlevels-mirror.t
t/init.t
t/iso-2202-jp.eml
t/kqnotify.t
+t/lei.t
t/linkify.t
t/main-bin/spamc
t/mda-mime.eml
diff --git a/lib/PublicInbox/Daemon.pm b/lib/PublicInbox/Daemon.pm
index a2171535..6b92b60d 100644
--- a/lib/PublicInbox/Daemon.pm
+++ b/lib/PublicInbox/Daemon.pm
@@ -1,7 +1,9 @@
# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-# contains common daemon code for the httpd, imapd, and nntpd servers.
-# This may be used for read-only IMAP server if we decide to implement it.
+#
+# Contains common daemon code for the httpd, imapd, and nntpd servers
+# and designed for handling thousands of untrusted clients over slow
+# and/or lossy connections.
package PublicInbox::Daemon;
use strict;
use warnings;
diff --git a/lib/PublicInbox/LeiDaemon.pm b/lib/PublicInbox/LeiDaemon.pm
new file mode 100644
index 00000000..ae40b3a6
--- /dev/null
+++ b/lib/PublicInbox/LeiDaemon.pm
@@ -0,0 +1,303 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Backend for `lei' (local email interface). Unlike the C10K-oriented
+# PublicInbox::Daemon, this is designed exclusively to handle trusted
+# local clients with read/write access to the FS and use as many
+# system resources as the local user has access to.
+package PublicInbox::LeiDaemon;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::DS);
+use Getopt::Long ();
+use Errno qw(EAGAIN ECONNREFUSED ENOENT);
+use POSIX qw(setsid);
+use IO::Socket::UNIX;
+use IO::Handle ();
+use Sys::Syslog qw(syslog openlog);
+use PublicInbox::Syscall qw($SFD_NONBLOCK EPOLLIN EPOLLONESHOT);
+use PublicInbox::Sigfd;
+use PublicInbox::DS qw(now);
+use PublicInbox::Spawn qw(spawn);
+our $quit = sub { exit(shift // 0) };
+my $glp = Getopt::Long::Parser->new;
+$glp->configure(qw(gnu_getopt no_ignore_case auto_abbrev));
+
+sub x_it ($$) { # pronounced "exit"
+ my ($client, $code) = @_;
+ if (my $sig = ($code & 127)) {
+ kill($sig, $client->{pid} // $$);
+ } else {
+ $code >>= 8;
+ if (my $sock = $client->{sock}) {
+ say $sock "exit=$code";
+ } else { # for oneshot
+ $quit->($code);
+ }
+ }
+}
+
+sub emit ($$$) {
+ my ($client, $channel, $buf) = @_;
+ print { $client->{$channel} } $buf or warn "print FD[$channel]: $!";
+}
+
+sub fail ($$;$) {
+ my ($client, $buf, $exit_code) = @_;
+ $buf .= "\n" unless $buf =~ /\n\z/s;
+ emit($client, 2, $buf);
+ x_it($client, ($exit_code // 1) << 8);
+ undef;
+}
+
+sub _help ($;$) {
+ my ($client, $channel) = @_;
+ emit($client, $channel //= 1, <<EOF);
+usage: lei COMMAND [OPTIONS]
+
+...
+EOF
+ x_it($client, $channel == 2 ? 1 << 8 : 0); # stderr => failure
+}
+
+sub assert_args ($$$;$@) {
+ my ($client, $argv, $proto, $opt, @spec) = @_;
+ $opt //= {};
+ push @spec, qw(help|h);
+ $glp->getoptionsfromarray($argv, $opt, @spec) or
+ return fail($client, 'bad arguments or options');
+ if ($opt->{help}) {
+ _help($client);
+ undef;
+ } else {
+ my ($nreq, $rest) = split(/;/, $proto);
+ $nreq = (($nreq // '') =~ tr/$/$/);
+ my $argc = scalar(@$argv);
+ my $tot = ($rest // '') eq '@' ? $argc : ($proto =~ tr/$/$/);
+ return 1 if $argc <= $tot && $argc >= $nreq;
+ _help($client, 2);
+ undef
+ }
+}
+
+sub dispatch {
+ my ($client, $cmd, @argv) = @_;
+ local $SIG{__WARN__} = sub { emit($client, 2, "@_") };
+ local $SIG{__DIE__} = 'DEFAULT';
+ if (defined $cmd) {
+ my $func = "lei_$cmd";
+ $func =~ tr/-/_/;
+ if (my $cb = __PACKAGE__->can($func)) {
+ $client->{cmd} = $cmd;
+ $cb->($client, \@argv);
+ } elsif (grep(/\A-/, $cmd, @argv)) {
+ assert_args($client, [ $cmd, @argv ], '');
+ } else {
+ fail($client, "`$cmd' is not an lei command");
+ }
+ } else {
+ _help($client, 2);
+ }
+}
+
+sub lei_daemon_pid {
+ my ($client, $argv) = @_;
+ assert_args($client, $argv, '') and emit($client, 1, "$$\n");
+}
+
+sub lei_DBG_pwd {
+ my ($client, $argv) = @_;
+ assert_args($client, $argv, '') and
+ emit($client, 1, "$client->{env}->{PWD}\n");
+}
+
+sub lei_DBG_cwd {
+ my ($client, $argv) = @_;
+ require Cwd;
+ assert_args($client, $argv, '') and emit($client, 1, Cwd::cwd()."\n");
+}
+
+sub lei_DBG_false { x_it($_[0], 1 << 8) }
+
+sub lei_daemon_stop {
+ my ($client, $argv) = @_;
+ assert_args($client, $argv, '') and $quit->(0);
+}
+
+sub lei_help { _help($_[0]) }
+
+sub reap_exec { # dwaitpid callback
+ my ($client, $pid) = @_;
+ x_it($client, $?);
+}
+
+sub lei_git { # support passing through random git commands
+ my ($client, $argv) = @_;
+ my %opt = map { $_ => $client->{$_} } (0..2);
+ my $pid = spawn(['git', @$argv], $client->{env}, \%opt);
+ PublicInbox::DS::dwaitpid($pid, \&reap_exec, $client);
+}
+
+sub accept_dispatch { # Listener {post_accept} callback
+ my ($sock) = @_; # ignore other
+ $sock->blocking(1);
+ $sock->autoflush(1);
+ my $client = { sock => $sock };
+ vec(my $rin = '', fileno($sock), 1) = 1;
+ # `say $sock' triggers "die" in lei(1)
+ for my $i (0..2) {
+ if (select(my $rout = $rin, undef, undef, 1)) {
+ my $fd = IO::FDPass::recv(fileno($sock));
+ if ($fd >= 0) {
+ my $rdr = ($fd == 0 ? '<&=' : '>&=');
+ if (open(my $fh, $rdr, $fd)) {
+ $client->{$i} = $fh;
+ } else {
+ say $sock "open($rdr$fd) (FD=$i): $!";
+ return;
+ }
+ } else {
+ say $sock "recv FD=$i: $!";
+ return;
+ }
+ } else {
+ say $sock "timed out waiting to recv FD=$i";
+ return;
+ }
+ }
+ # $ARGV_STR = join("]\0[", @ARGV);
+ # $ENV_STR = join('', map { "$_=$ENV{$_}\0" } keys %ENV);
+ # $line = "$$\0\0>$ARGV_STR\0\0>$ENV_STR\0\0";
+ my ($client_pid, $argv, $env) = do {
+ local $/ = "\0\0\0"; # yes, 3 NULs at EOL, not 2
+ chomp(my $line = <$sock>);
+ split(/\0\0>/, $line, 3);
+ };
+ my %env = map { split(/=/, $_, 2) } split(/\0/, $env);
+ if (chdir($env{PWD})) {
+ $client->{env} = \%env;
+ $client->{pid} = $client_pid;
+ eval { dispatch($client, split(/\]\0\[/, $argv)) };
+ say $sock $@ if $@;
+ } else {
+ say $sock "chdir($env{PWD}): $!"; # implicit close
+ }
+}
+
+sub noop {}
+
+# lei(1) calls this when it can't connect
+sub lazy_start ($$) {
+ my ($path, $err) = @_;
+ if ($err == ECONNREFUSED) {
+ unlink($path) or die "unlink($path): $!";
+ } elsif ($err != ENOENT) {
+ die "connect($path): $!";
+ }
+ my $umask = umask(077) // die("umask(077): $!");
+ my $l = IO::Socket::UNIX->new(Local => $path,
+ Listen => 1024,
+ Type => SOCK_STREAM) or
+ $err = $!;
+ umask($umask) or die("umask(restore): $!");
+ $l or return $err;
+ my @st = stat($path) or die "stat($path): $!";
+ my $dev_ino_expect = pack('dd', $st[0], $st[1]); # dev+ino
+ pipe(my ($eof_r, $eof_w)) or die "pipe: $!";
+ my $oldset = PublicInbox::Sigfd::block_signals();
+ my $pid = fork // die "fork: $!";
+ if ($pid) {
+ PublicInbox::Sigfd::sig_setmask($oldset);
+ return; # client will connect to $path
+ }
+ openlog($path, 'pid', 'user');
+ local $SIG{__DIE__} = sub {
+ syslog('crit', "@_");
+ exit $! if $!;
+ exit $? >> 8 if $? >> 8;
+ exit 255;
+ };
+ local $SIG{__WARN__} = sub { syslog('warning', "@_") };
+ open(STDIN, '+<', '/dev/null') or die "redirect stdin failed: $!\n";
+ open STDOUT, '>&STDIN' or die "redirect stdout failed: $!\n";
+ open STDERR, '>&STDIN' or die "redirect stderr failed: $!\n";
+ setsid();
+ $pid = fork // die "fork: $!";
+ exit if $pid;
+ $0 = "lei-daemon $path";
+ require PublicInbox::Listener;
+ require PublicInbox::EOFpipe;
+ $l->blocking(0);
+ $eof_w->blocking(0);
+ $eof_r->blocking(0);
+ my $listener = PublicInbox::Listener->new($l, \&accept_dispatch, $l);
+ my $exit_code;
+ local $quit = sub {
+ $exit_code //= shift;
+ my $tmp = $listener or exit($exit_code);
+ unlink($path) if defined($path);
+ syswrite($eof_w, '.');
+ $l = $listener = $path = undef;
+ $tmp->close if $tmp; # DS::close
+ PublicInbox::DS->SetLoopTimeout(1000);
+ };
+ PublicInbox::EOFpipe->new($eof_r, sub {}, undef);
+ my $sig = {
+ CHLD => \&PublicInbox::DS::enqueue_reap,
+ QUIT => $quit,
+ INT => $quit,
+ TERM => $quit,
+ HUP => \&noop,
+ USR1 => \&noop,
+ USR2 => \&noop,
+ };
+ my $sigfd = PublicInbox::Sigfd->new($sig, $SFD_NONBLOCK);
+ local %SIG = (%SIG, %$sig) if !$sigfd;
+ if ($sigfd) { # TODO: use inotify/kqueue to detect unlinked sockets
+ PublicInbox::DS->SetLoopTimeout(5000);
+ } else {
+ # wake up every second to accept signals if we don't
+ # have signalfd or IO::KQueue:
+ PublicInbox::Sigfd::sig_setmask($oldset);
+ PublicInbox::DS->SetLoopTimeout(1000);
+ }
+ PublicInbox::DS->SetPostLoopCallback(sub {
+ my ($dmap, undef) = @_;
+ if (@st = defined($path) ? stat($path) : ()) {
+ if ($dev_ino_expect ne pack('dd', $st[0], $st[1])) {
+ warn "$path dev/ino changed, quitting\n";
+ $path = undef;
+ }
+ } elsif (defined($path)) {
+ warn "stat($path): $!, quitting ...\n";
+ undef $path; # don't unlink
+ $quit->();
+ }
+ return 1 if defined($path);
+ my $now = now();
+ my $n = 0;
+ for my $s (values %$dmap) {
+ $s->can('busy') or next;
+ if ($s->busy($now)) {
+ ++$n;
+ } else {
+ $s->close;
+ }
+ }
+ $n; # true: continue, false: stop
+ });
+ PublicInbox::DS->EventLoop;
+ exit($exit_code // 0);
+}
+
+# for users w/o IO::FDPass
+sub oneshot {
+ dispatch({
+ 0 => *STDIN{IO},
+ 1 => *STDOUT{IO},
+ 2 => *STDERR{IO},
+ env => \%ENV
+ }, @ARGV);
+}
+
+1;
diff --git a/script/lei b/script/lei
new file mode 100755
index 00000000..1b5af3a1
--- /dev/null
+++ b/script/lei
@@ -0,0 +1,58 @@
+#!perl -w
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use Cwd qw(cwd);
+use IO::Socket::UNIX;
+
+if (eval { require IO::FDPass; 1 }) { # use daemon to reduce load time
+ my $path = do {
+ my $runtime_dir = ($ENV{XDG_RUNTIME_DIR} // '') . '/lei';
+ if ($runtime_dir eq '/lei') {
+ require File::Spec;
+ $runtime_dir = File::Spec->tmpdir."/lei-$<";
+ }
+ unless (-d $runtime_dir && -w _) {
+ require File::Path;
+ File::Path::mkpath($runtime_dir, 0, 0700);
+ }
+ "$runtime_dir/sock";
+ };
+ my $sock = IO::Socket::UNIX->new(Peer => $path, Type => SOCK_STREAM);
+ unless ($sock) { # start the daemon if not started
+ my $err = $!;
+ require PublicInbox::LeiDaemon;
+ $err = PublicInbox::LeiDaemon::lazy_start($path, $err);
+ # try connecting again anyways, unlink+bind may be racy
+ $sock = IO::Socket::UNIX->new(Peer => $path,
+ Type => SOCK_STREAM) // die
+ "connect($path): $! (bind($path): $err)";
+ }
+ my $pwd = $ENV{PWD};
+ my $cwd = cwd();
+ if ($pwd) { # prefer ENV{PWD} if it's a symlink to real cwd
+ my @st_cwd = stat($cwd) or die "stat(cwd=$cwd): $!\n";
+ my @st_pwd = stat($pwd);
+ # make sure st_dev/st_ino match for {PWD} to be valid
+ $pwd = $cwd if (!@st_pwd || $st_pwd[1] != $st_cwd[1] ||
+ $st_pwd[0] != $st_cwd[0]);
+ } else {
+ $pwd = $cwd;
+ }
+ local $ENV{PWD} = $pwd;
+ $sock->autoflush(1);
+ IO::FDPass::send(fileno($sock), $_) for (0..2);
+ my $buf = "$$\0\0>" . join("]\0[", @ARGV) . "\0\0>";
+ while (my ($k, $v) = each %ENV) { $buf .= "$k=$v\0" }
+ $buf .= "\0\0";
+ print $sock $buf or die "print(sock, buf): $!";
+ local $/ = "\n";
+ while (my $line = <$sock>) {
+ $line =~ /\Aexit=([0-9]+)\n\z/ and exit($1 + 0);
+ die $line;
+ }
+} else { # for systems lacking IO::FDPass
+ require PublicInbox::LeiDaemon;
+ PublicInbox::LeiDaemon::oneshot();
+}
diff --git a/t/lei.t b/t/lei.t
new file mode 100644
index 00000000..feee9270
--- /dev/null
+++ b/t/lei.t
@@ -0,0 +1,80 @@
+#!perl -w
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use Test::More;
+use PublicInbox::TestCommon;
+use PublicInbox::Config;
+my $json = PublicInbox::Config::json() or plan skip_all => 'JSON missing';
+require_mods(qw(DBD::SQLite Search::Xapian));
+my ($home, $for_destroy) = tmpdir();
+my $opt = { 1 => \(my $out = ''), 2 => \(my $err = '') };
+
+SKIP: {
+ require_mods('IO::FDPass', 51);
+ local $ENV{XDG_RUNTIME_DIR} = "$home/xdg_run";
+ mkdir "$home/xdg_run", 0700 or BAIL_OUT "mkdir: $!";
+ my $sock = "$ENV{XDG_RUNTIME_DIR}/lei/sock";
+
+ ok(run_script([qw(lei daemon-pid)], undef, $opt), 'daemon-pid');
+ is($err, '', 'no error from daemon-pid');
+ like($out, qr/\A[0-9]+\n\z/s, 'pid returned') or BAIL_OUT;
+ chomp(my $pid = $out);
+ ok(kill(0, $pid), 'pid is valid');
+ ok(-S $sock, 'sock created');
+
+ ok(!run_script([qw(lei)], undef, $opt), 'no args fails');
+ is($? >> 8, 1, '$? is 1');
+ is($out, '', 'nothing in stdout');
+ like($err, qr/^usage:/sm, 'usage in stderr');
+
+ for my $arg (['-h'], ['--help'], ['help'], [qw(daemon-pid --help)]) {
+ $out = $err = '';
+ ok(run_script(['lei', @$arg], undef, $opt), "lei @$arg");
+ like($out, qr/^usage:/sm, "usage in stdout (@$arg)");
+ is($err, '', "nothing in stderr (@$arg)");
+ }
+
+ ok(!run_script([qw(lei DBG-false)], undef, $opt), 'false(1) emulation');
+ is($? >> 8, 1, '$? set correctly');
+ is($err, '', 'no error from false(1) emulation');
+
+ for my $arg ([''], ['--halp'], ['halp'], [qw(daemon-pid --halp)]) {
+ $out = $err = '';
+ ok(!run_script(['lei', @$arg], undef, $opt), "lei @$arg");
+ is($? >> 8, 1, '$? set correctly');
+ isnt($err, '', 'something in stderr');
+ is($out, '', 'nothing in stdout');
+ }
+
+ $out = '';
+ ok(run_script([qw(lei daemon-pid)], undef, $opt), 'daemon-pid');
+ chomp(my $pid_again = $out);
+ is($pid, $pid_again, 'daemon-pid idempotent');
+
+ ok(run_script([qw(lei daemon-stop)], undef, $opt), 'daemon-stop');
+ is($out, '', 'no output from daemon-stop');
+ is($err, '', 'no error from daemon-stop');
+ for (0..100) {
+ kill(0, $pid) or last;
+ tick();
+ }
+ ok(!-S $sock, 'sock gone');
+ ok(!kill(0, $pid), 'pid gone after stop');
+
+ ok(run_script([qw(lei daemon-pid)], undef, $opt), 'daemon-pid');
+ chomp(my $new_pid = $out);
+ ok(kill(0, $new_pid), 'new pid is running');
+ ok(-S $sock, 'sock exists again');
+ unlink $sock or BAIL_OUT "unlink $!";
+ for (0..100) {
+ kill('CHLD', $new_pid) or last;
+ tick();
+ }
+ ok(!kill(0, $new_pid), 'daemon exits after unlink');
+};
+
+require_ok 'PublicInbox::LeiDaemon';
+
+done_testing;
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 02/26] lei: proposed command-listing and options
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
2020-12-18 12:09 ` [PATCH 01/26] lei: FD-passing and IPC basics Eric Wong
@ 2020-12-18 12:09 ` Eric Wong
2021-02-18 20:42 ` lei q --save-as=... requires too much thinking Eric Wong
2020-12-18 12:09 ` [PATCH 03/26] lei_store: local storage for Local Email Interface Eric Wong
` (23 subsequent siblings)
25 siblings, 1 reply; 30+ messages in thread
From: Eric Wong @ 2020-12-18 12:09 UTC (permalink / raw)
To: meta
In an attempt to ensure a coherent UI/UX, we'll try to document
all proposed commands and options in one place for easy reference
---
lib/PublicInbox/LeiDaemon.pm | 137 +++++++++++++++++++++++++++++++++++
1 file changed, 137 insertions(+)
diff --git a/lib/PublicInbox/LeiDaemon.pm b/lib/PublicInbox/LeiDaemon.pm
index ae40b3a6..d0c53416 100644
--- a/lib/PublicInbox/LeiDaemon.pm
+++ b/lib/PublicInbox/LeiDaemon.pm
@@ -23,6 +23,143 @@ our $quit = sub { exit(shift // 0) };
my $glp = Getopt::Long::Parser->new;
$glp->configure(qw(gnu_getopt no_ignore_case auto_abbrev));
+# TBD: this is a documentation mechanism to show a subcommand
+# (may) pass options through to another command:
+sub pass_through { () }
+
+# TODO: generate shell completion + help using %CMD and %OPTDESC
+# command => [ positional_args, 1-line description, Getopt::Long option spec ]
+our %CMD = ( # sorted in order of importance/use:
+'query' => [ 'SEARCH-TERMS...', 'search for messages matching terms', qw(
+ save-as=s output|o=s format|f=s dedupe|d=s thread|t augment|a
+ limit|n=i sort|s=s reverse|r offset=i remote local! extinbox!
+ since|after=s until|before=s) ],
+
+'show' => [ '{MID|OID}', 'show a given object (Message-ID or object ID)',
+ qw(type=s solve! format|f=s dedupe|d=s thread|t remote local!),
+ pass_through('git show') ],
+
+'add-extinbox' => [ 'URL-OR-PATHNAME',
+ 'add/set priority of a publicinbox|extindex for extra matches',
+ qw(prio=i) ],
+'ls-extinbox' => [ '[FILTER]', 'list publicinbox|extindex sources',
+ qw(format|f=s z local remote) ],
+'forget-extinbox' => [ '{URL-OR-PATHNAME|--prune}',
+ 'exclude further results from a publicinbox|extindex',
+ qw(prune) ],
+
+'ls-query' => [ '[FILTER]', 'list saved search queries',
+ qw(name-only format|f=s z) ],
+'rm-query' => [ 'QUERY_NAME', 'remove a saved search' ],
+'mv-query' => [ qw(OLD_NAME NEW_NAME), 'rename a saved search' ],
+
+'plonk' => [ '{--thread|--from=IDENT}',
+ 'exclude mail matching From: or thread from non-Message-ID searches',
+ qw(thread|t from|f=s mid=s oid=s) ],
+'mark' => [ 'MESSAGE-FLAGS', 'set/unset flags on message(s) from stdin',
+ qw(stdin| oid=s exact by-mid|mid:s) ],
+'forget' => [ '--stdin', 'exclude message(s) on stdin from query results',
+ qw(stdin| oid=s exact by-mid|mid:s) ],
+
+'purge-mailsource' => [ '{URL-OR-PATHNAME|--all}',
+ 'remove imported messages from IMAP, Maildirs, and MH',
+ qw(exact! all jobs:i indexed) ],
+
+# code repos are used for `show' to solve blobs from patch mails
+'add-coderepo' => [ 'PATHNAME', 'add or set priority of a git code repo',
+ qw(prio=i) ],
+'ls-coderepo' => [ '[FILTER]', 'list known code repos', qw(format|f=s z) ],
+'forget-coderepo' => [ 'PATHNAME',
+ 'stop using repo to solve blobs from patches',
+ qw(prune) ],
+
+'add-watch' => [ '[URL_OR_PATHNAME]',
+ 'watch for new messages and flag changes',
+ qw(import! flags! interval=s recursive|r exclude=s include=s) ],
+'ls-watch' => [ '[FILTER]', 'list active watches with numbers and status',
+ qw(format|f=s z) ],
+'pause-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote) ],
+'resume-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote) ],
+'forget-watch' => [ '{WATCH_NUMBER|--prune}', 'stop and forget a watch',
+ qw(prune) ],
+
+'import' => [ '{URL_OR_PATHNAME|--stdin}',
+ 'one-shot import/update from URL or filesystem',
+ qw(stdin| limit|n=i offset=i recursive|r exclude=s include=s !flags),
+ ],
+
+'config' => [ '[ANYTHING...]',
+ 'git-config(1) wrapper for ~/.config/lei/config',
+ pass_through('git config') ],
+'daemon-stop' => [ undef, 'stop the lei-daemon' ],
+'daemon-pid' => [ undef, 'show the PID of the lei-daemon' ],
+'help' => [ '[SUBCOMMAND]', 'show help' ],
+
+# XXX do we need this?
+# 'git' => [ '[ANYTHING...]', 'git(1) wrapper', pass_through('git') ],
+
+'reorder-local-store-and-break-history' => [ '[REFNAME]',
+ 'rewrite git history in an attempt to improve compression',
+ 'gc!' ]
+); # @CMD
+
+# switch descriptions, try to keep consistent across commands
+# $spec: Getopt::Long option specification
+# $spec => [@ALLOWED_VALUES (default is first), $description],
+# $spec => $description
+# "$SUB_COMMAND TAB $spec" => as above
+my $stdin_formats = [ qw(auto raw mboxrd mboxcl2 mboxcl mboxo),
+ 'specify message input format' ];
+my $ls_format = [ qw(plain json null), 'listing output format' ];
+my $show_format = [ qw(plain raw html mboxrd mboxcl2 mboxcl),
+ 'message/object output format' ];
+
+my %OPTDESC = (
+'solve!' => 'do not attempt to reconstruct blobs from emails',
+'save-as=s' => 'save a search terms by given name',
+
+'type=s' => [qw(any mid git), 'disambiguate type' ],
+
+'dedupe|d=s' => [qw(content oid mid), 'deduplication strategy'],
+'thread|t' => 'every message in the same thread as the actual match(es)',
+'augment|a' => 'augment --output destination instead of clobbering',
+
+'output|o=s' => "destination (e.g. `/path/to/Maildir', or `-' for stdout)",
+
+'mark format|f=s' => $stdin_formats,
+'forget format|f=s' => $stdin_formats,
+'query format|f=s' => [qw(maildir mboxrd mboxcl2 mboxcl html oid),
+ q[specify output format (default: determined by --output)]],
+'ls-query format|f=s' => $ls_format,
+'ls-extinbox format|f=s' => $ls_format,
+
+'limit|n=i' => 'integer limit on number of matches (default: 10000)',
+'offset=i' => 'search result offset (default: 0)',
+
+'sort|s=s@' => [qw(internaldate date relevance docid),
+ "order of results `--output'-dependent)"],
+
+'prio=i' => 'priority of query source',
+
+'local' => 'limit operations to the local filesystem',
+'local!' => 'exclude results from the local filesystem',
+'remote' => 'limit operations to those requiring network access',
+'remote!' => 'prevent operations requiring network access',
+
+'mid=s' => 'specify the Message-ID of a message',
+'oid=s' => 'specify the git object ID of a message',
+
+'recursive|r' => 'scan directories/mailboxes/newsgroups recursively',
+'exclude=s' => 'exclude mailboxes/newsgroups based on pattern',
+'include=s' => 'include mailboxes/newsgroups based on pattern',
+
+'exact' => 'operate on exact header matches only',
+'exact!' => 'rely on content match instead of exact header matches',
+
+'by-mid|mid:s' => 'match only by Message-ID, ignoring contents',
+'jobs:i' => 'set parallelism level',
+); # %OPTDESC
+
sub x_it ($$) { # pronounced "exit"
my ($client, $code) = @_;
if (my $sig = ($code & 127)) {
^ permalink raw reply related [flat|nested] 30+ messages in thread
* lei q --save-as=... requires too much thinking
2020-12-18 12:09 ` [PATCH 02/26] lei: proposed command-listing and options Eric Wong
@ 2021-02-18 20:42 ` Eric Wong
0 siblings, 0 replies; 30+ messages in thread
From: Eric Wong @ 2021-02-18 20:42 UTC (permalink / raw)
To: meta
Eric Wong <e@80x24.org> wrote:
> +'query' => [ 'SEARCH-TERMS...', 'search for messages matching terms', qw(
s/query/q/
> + save-as=s output|o=s format|f=s dedupe|d=s thread|t augment|a
Naming things is hard, so I don't think "lei q --save-as="
needs to exist.
Just the "-o DESTINATION" can be tracked and used to infer the
metadata used to make the saved search. Maildir outputs can
store that information in a ".lei" subdir of the Maildir itself;
mboxes and IMAP folders will have it stored in SQLite somewhere.
As with externals, there'll be basename shortcuts, so saved
searches go into ~/lei-saved/foo/; one can just specify "foo"
the next time around as long as the basename is unique.
This is following what was done with externals, lei doesn't
allow the user to name the external like git remotes (e.g.
[remote "origin"] or [publicinbox "foo"]). The normalized URL
or pathname is the name of the external; which saves cognitive
overhead for my feeble brain.
Of course, basename-only expansion and bash completions go a
long way towards making URLs/paths usable.
> +'ls-query' => [ '[FILTER]', 'list saved search queries',
> + qw(name-only format|f=s z) ],
> +'rm-query' => [ 'QUERY_NAME', 'remove a saved search' ],
> +'mv-query' => [ qw(OLD_NAME NEW_NAME), 'rename a saved search' ],
These would operate on pathnames and URLs.
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 03/26] lei_store: local storage for Local Email Interface
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
2020-12-18 12:09 ` [PATCH 01/26] lei: FD-passing and IPC basics Eric Wong
2020-12-18 12:09 ` [PATCH 02/26] lei: proposed command-listing and options Eric Wong
@ 2020-12-18 12:09 ` Eric Wong
2020-12-18 12:09 ` [PATCH 04/26] tests: more common JSON module loading Eric Wong
` (22 subsequent siblings)
25 siblings, 0 replies; 30+ messages in thread
From: Eric Wong @ 2020-12-18 12:09 UTC (permalink / raw)
To: meta
Still unstable, this builds off the equally unstable extindex :P
This will be used for caching/memoization of traditional mail
stores (IMAP, Maildir, etc) while providing indexing via Xapian,
along with compression, and checksumming from git.
Most notably, this adds the ability to add/remove per-message
keywords (draft, seen, flagged, answered) as described in the
JMAP specification (RFC 8621 section 4.1.1).
We'll use `.' (a single period) as an $eidx_key since it's an
invalid {inboxdir} or {newsgroup} name.
---
MANIFEST | 3 +
lib/PublicInbox/ExtSearch.pm | 4 +-
lib/PublicInbox/ExtSearchIdx.pm | 35 +++++-
lib/PublicInbox/Import.pm | 4 +
lib/PublicInbox/LeiDaemon.pm | 2 +-
lib/PublicInbox/LeiSearch.pm | 40 ++++++
lib/PublicInbox/LeiStore.pm | 197 ++++++++++++++++++++++++++++++
lib/PublicInbox/OverIdx.pm | 10 ++
lib/PublicInbox/SearchIdx.pm | 47 ++++++-
lib/PublicInbox/SearchIdxShard.pm | 33 +++++
lib/PublicInbox/V2Writable.pm | 2 +-
t/lei_store.t | 74 +++++++++++
12 files changed, 441 insertions(+), 10 deletions(-)
create mode 100644 lib/PublicInbox/LeiSearch.pm
create mode 100644 lib/PublicInbox/LeiStore.pm
create mode 100644 t/lei_store.t
diff --git a/MANIFEST b/MANIFEST
index 7536b7c2..9eb97d14 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -160,6 +160,8 @@ lib/PublicInbox/InboxWritable.pm
lib/PublicInbox/Isearch.pm
lib/PublicInbox/KQNotify.pm
lib/PublicInbox/LeiDaemon.pm
+lib/PublicInbox/LeiSearch.pm
+lib/PublicInbox/LeiStore.pm
lib/PublicInbox/Linkify.pm
lib/PublicInbox/Listener.pm
lib/PublicInbox/Lock.pm
@@ -319,6 +321,7 @@ t/init.t
t/iso-2202-jp.eml
t/kqnotify.t
t/lei.t
+t/lei_store.t
t/linkify.t
t/main-bin/spamc
t/mda-mime.eml
diff --git a/lib/PublicInbox/ExtSearch.pm b/lib/PublicInbox/ExtSearch.pm
index 2a560935..410ae958 100644
--- a/lib/PublicInbox/ExtSearch.pm
+++ b/lib/PublicInbox/ExtSearch.pm
@@ -17,13 +17,13 @@ use DBI qw(:sql_types); # SQL_BLOB
use parent qw(PublicInbox::Search);
sub new {
- my (undef, $topdir) = @_;
+ my ($class, $topdir) = @_;
$topdir = File::Spec->canonpath($topdir);
bless {
topdir => $topdir,
# xpfx => 'ei15'
xpfx => "$topdir/ei".PublicInbox::Search::SCHEMA_VERSION
- }, __PACKAGE__;
+ }, $class;
}
sub misc {
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index c6fb398b..a070df85 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -894,18 +894,31 @@ sub idx_init { # similar to V2Writable
return if $self->{idx_shards};
$self->git->cleanup;
-
+ my $mode = 0644;
my $ALL = $self->git->{git_dir}; # ALL.git
- PublicInbox::Import::init_bare($ALL) unless -d $ALL;
+ my $old = -d $ALL;
+ if ($opt->{-private}) { # LeiStore
+ $mode = 0600;
+ if (!$old) {
+ umask 077; # don't bother restoring
+ PublicInbox::Import::init_bare($ALL);
+ $self->git->qx(qw(config core.sharedRepository 0600));
+ }
+ } else {
+ PublicInbox::Import::init_bare($ALL) unless $old;
+ }
my $info_dir = "$ALL/objects/info";
my $alt = "$info_dir/alternates";
- my $mode = 0644;
my (@old, @new, %seen); # seen: st_dev + st_ino
if (-e $alt) {
open(my $fh, '<', $alt) or die "open $alt: $!";
$mode = (stat($fh))[2] & 07777;
while (my $line = <$fh>) {
chomp(my $d = $line);
+
+ # expand relative path (/local/ stuff)
+ substr($d, 0, 3) eq '../' and
+ $d = "$ALL/objects/$d";
if (my @st = stat($d)) {
next if $seen{"$st[0]\0$st[1]"}++;
} else {
@@ -915,6 +928,22 @@ sub idx_init { # similar to V2Writable
push @old, $line;
}
}
+
+ # for LeiStore, and possibly some mirror-only state
+ if (opendir(my $dh, my $local = "$self->{topdir}/local")) {
+ # highest numbered epoch first
+ for my $n (sort { $b <=> $a } map { substr($_, 0, -4) + 0 }
+ grep(/\A[0-9]+\.git\z/, readdir($dh))) {
+ my $d = "$local/$n.git/objects"; # absolute path
+ if (my @st = stat($d)) {
+ next if $seen{"$st[0]\0$st[1]"}++;
+ # favor relative paths for rename-friendliness
+ push @new, "../../local/$n.git/objects\n";
+ } else {
+ warn "W: stat($d) failed: $!\n";
+ }
+ }
+ }
for my $ibx (@{$self->{ibx_list}}) {
my $line = $ibx->git->{git_dir} . "/objects\n";
chomp(my $d = $line);
diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index 2cb4896a..b7be4c46 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -405,6 +405,10 @@ sub add {
if ($smsg) {
$smsg->{blob} = $self->get_mark(":$blob");
$smsg->{raw_bytes} = $n;
+ if (my $oidx = delete $smsg->{-oidx}) { # used by LeiStore
+ return if $oidx->blob_exists($smsg->{blob});
+ }
+ # XXX do we need this? it's in git at this point
$smsg->{-raw_email} = \$raw_email;
}
my $ref = $self->{ref};
diff --git a/lib/PublicInbox/LeiDaemon.pm b/lib/PublicInbox/LeiDaemon.pm
index d0c53416..b4b1ac59 100644
--- a/lib/PublicInbox/LeiDaemon.pm
+++ b/lib/PublicInbox/LeiDaemon.pm
@@ -42,7 +42,7 @@ our %CMD = ( # sorted in order of importance/use:
'add-extinbox' => [ 'URL-OR-PATHNAME',
'add/set priority of a publicinbox|extindex for extra matches',
qw(prio=i) ],
-'ls-extinbox' => [ '[FILTER]', 'list publicinbox|extindex sources',
+'ls-extinbox' => [ '[FILTER]', 'list publicinbox|extindex locations',
qw(format|f=s z local remote) ],
'forget-extinbox' => [ '{URL-OR-PATHNAME|--prune}',
'exclude further results from a publicinbox|extindex',
diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm
new file mode 100644
index 00000000..9cfd6ea2
--- /dev/null
+++ b/lib/PublicInbox/LeiSearch.pm
@@ -0,0 +1,40 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+package PublicInbox::LeiSearch;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::ExtSearch);
+use PublicInbox::Search;
+
+sub combined_docid ($$) {
+ my ($self, $num) = @_;
+ my $nshard = ($self->{nshard} // 1);
+ ($num - 1) * $nshard + 1;
+}
+
+sub msg_keywords {
+ my ($self, $num) = @_; # num_or_mitem
+ my $xdb = $self->xdb; # set {nshard};
+ my $docid = ref($num) ? $num->get_docid : do {
+ # get combined docid from over.num:
+ # (not generic Xapian, only works with our sharding scheme)
+ my $nshard = $self->{nshard} // 1;
+ ($num - 1) * $nshard + $num % $nshard + 1;
+ };
+ my %kw;
+ eval {
+ my $end = $xdb->termlist_end($docid);
+ my $cur = $xdb->termlist_begin($docid);
+ for (; $cur != $end; $cur++) {
+ $cur->skip_to('K');
+ last if $cur == $end;
+ my $kw = $cur->get_termname;
+ $kw =~ s/\AK//s and $kw{$kw} = undef;
+ }
+ };
+ warn "E: #$docid ($num): $@\n" if $@;
+ wantarray ? sort(keys(%kw)) : \%kw;
+}
+
+1;
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
new file mode 100644
index 00000000..56f668b8
--- /dev/null
+++ b/lib/PublicInbox/LeiStore.pm
@@ -0,0 +1,197 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# Local storage (cache/memo) for lei(1), suitable for personal/private
+# mail iff on encrypted device/FS. Based on v2, but only deduplicates
+# based on git OID.
+#
+# for xref3, the following are constant: $eidx_key = '.', $xnum = -1
+package PublicInbox::LeiStore;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::Lock);
+use PublicInbox::SearchIdx qw(crlf_adjust);
+use PublicInbox::ExtSearchIdx;
+use PublicInbox::Import;
+use PublicInbox::InboxWritable;
+use PublicInbox::V2Writable;
+use PublicInbox::ContentHash qw(content_hash);
+use PublicInbox::MID qw(mids);
+use PublicInbox::LeiSearch;
+
+sub new {
+ my (undef, $dir, $opt) = @_;
+ my $eidx = PublicInbox::ExtSearchIdx->new($dir, $opt);
+ bless { priv_eidx => $eidx }, __PACKAGE__;
+}
+
+sub git { $_[0]->{priv_eidx}->git } # read-only
+
+sub packing_factor { $PublicInbox::V2Writable::PACKING_FACTOR }
+
+sub rotate_bytes {
+ $_[0]->{rotate_bytes} // ((1024 * 1024 * 1024) / $_[0]->packing_factor)
+}
+
+sub git_pfx { "$_[0]->{priv_eidx}->{topdir}/local" };
+
+sub git_epoch_max {
+ my ($self) = @_;
+ my $pfx = $self->git_pfx;
+ my $max = 0;
+ return $max unless -d $pfx ;
+ opendir my $dh, $pfx or die "opendir $pfx: $!\n";
+ while (defined(my $git_dir = readdir($dh))) {
+ $git_dir =~ m!\A([0-9]+)\.git\z! or next;
+ $max = $1 + 0 if $1 > $max;
+ }
+ $max;
+}
+
+sub importer {
+ my ($self) = @_;
+ my $max;
+ my $im = $self->{im};
+ if ($im) {
+ return $im if $im->{bytes_added} < $self->rotate_bytes;
+
+ delete $self->{im};
+ $im->done;
+ undef $im;
+ $self->checkpoint;
+ $max = $self->git_epoch_max + 1;
+ }
+ my $pfx = $self->git_pfx;
+ $max //= $self->git_epoch_max;
+ while (1) {
+ my $latest = "$pfx/$max.git";
+ my $old = -e $latest;
+ my $git = PublicInbox::Git->new($latest);
+ PublicInbox::Import::init_bare({ git => $git });
+ $git->qx(qw(config core.sharedRepository 0600)) if !$old;
+ my $packed_bytes = $git->packed_bytes;
+ my $unpacked_bytes = $packed_bytes / $self->packing_factor;
+ if ($unpacked_bytes >= $self->rotate_bytes) {
+ $max++;
+ next;
+ }
+ chomp(my $i = $git->qx(qw(var GIT_COMMITTER_IDENT)));
+ die "$git->{git_dir} GIT_COMMITTER_IDENT failed\n" if $?;
+ my ($n, $e) = ($i =~ /\A(.+) <([^>]+)> [0-9]+ [-\+]?[0-9]+$/g)
+ or die "could not extract name/email from `$i'\n";
+ $self->{im} = $im = PublicInbox::Import->new($git, $n, $e);
+ $im->{bytes_added} = int($packed_bytes / $self->packing_factor);
+ $im->{lock_path} = undef;
+ $im->{path_type} = 'v2';
+ return $im;
+ }
+}
+
+sub search {
+ PublicInbox::LeiSearch->new($_[0]->{priv_eidx}->{topdir});
+}
+
+sub eidx_init {
+ my ($self) = @_;
+ my $eidx = $self->{priv_eidx};
+ $eidx->idx_init({-private => 1});
+ $eidx;
+}
+
+sub _docids_for ($$) {
+ my ($self, $eml) = @_;
+ my %docids;
+ my $chash = content_hash($eml);
+ my $eidx = eidx_init($self);
+ my $oidx = $eidx->{oidx};
+ my $im = $self->{im};
+ for my $mid (@{mids($eml)}) {
+ my ($id, $prev);
+ while (my $cur = $oidx->next_by_mid($mid, \$id, \$prev)) {
+ my $oid = $cur->{blob};
+ my $docid = $cur->{num};
+ my $bref = $im ? $im->cat_blob($oid) : undef;
+ $bref //= $eidx->git->cat_file($oid) // do {
+ warn "W: $oid (#$docid) <$mid> not found\n";
+ next;
+ };
+ local $self->{current_info} = $oid;
+ my $x = PublicInbox::Eml->new($bref);
+ $docids{$docid} = $docid if content_hash($x) eq $chash;
+ }
+ }
+ sort { $a <=> $b } values %docids;
+}
+
+sub set_eml_keywords {
+ my ($self, $eml, @kw) = @_;
+ my $eidx = eidx_init($self);
+ my @docids = _docids_for($self, $eml);
+ for my $docid (@docids) {
+ $eidx->idx_shard($docid)->shard_set_keywords($docid, @kw);
+ }
+ \@docids;
+}
+
+sub add_eml_keywords {
+ my ($self, $eml, @kw) = @_;
+ my $eidx = eidx_init($self);
+ my @docids = _docids_for($self, $eml);
+ for my $docid (@docids) {
+ $eidx->idx_shard($docid)->shard_add_keywords($docid, @kw);
+ }
+ \@docids;
+}
+
+sub remove_eml_keywords {
+ my ($self, $eml, @kw) = @_;
+ my $eidx = eidx_init($self);
+ my @docids = _docids_for($self, $eml);
+ for my $docid (@docids) {
+ $eidx->idx_shard($docid)->shard_remove_keywords($docid, @kw);
+ }
+ \@docids;
+}
+
+sub add_eml {
+ my ($self, $eml) = @_;
+ my $eidx = eidx_init($self);
+ my $oidx = $eidx->{oidx};
+ my $smsg = bless { -oidx => $oidx }, 'PublicInbox::Smsg';
+ my $im = $self->importer;
+ $im->add($eml, undef, $smsg) or return; # duplicate returns undef
+ my $msgref = delete $smsg->{-raw_email};
+ $smsg->{bytes} = $smsg->{raw_bytes} + crlf_adjust($$msgref);
+
+ local $self->{current_info} = $smsg->{blob};
+ if (my @docids = _docids_for($self, $eml)) {
+ for my $docid (@docids) {
+ my $idx = $eidx->idx_shard($docid);
+ $oidx->add_xref3($docid, -1, $smsg->{blob}, '.');
+ $idx->shard_add_eidx_info($docid, '.', $eml); # List-Id
+ }
+ } else {
+ $smsg->{num} = $oidx->adj_counter('eidx_docid', '+');
+ $oidx->add_overview($eml, $smsg);
+ $oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.');
+ my $idx = $eidx->idx_shard($smsg->{num});
+ $idx->index_raw($msgref, $eml, $smsg);
+ }
+ $smsg->{blob}
+}
+
+sub done {
+ my ($self) = @_;
+ my $err = '';
+ if (my $im = delete($self->{im})) {
+ eval { $im->done };
+ if ($@) {
+ $err .= "import done: $@\n";
+ warn $err;
+ }
+ }
+ $self->{priv_eidx}->done;
+ die $err if $err;
+}
+
+1;
diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm
index 4a39bf53..c8630ddb 100644
--- a/lib/PublicInbox/OverIdx.pm
+++ b/lib/PublicInbox/OverIdx.pm
@@ -684,4 +684,14 @@ DELETE FROM eidxq WHERE docid = ?
}
+sub blob_exists {
+ my ($self, $oidhex) = @_;
+ my $sth = $self->dbh->prepare_cached(<<'', undef, 1);
+SELECT COUNT(*) FROM xref3 WHERE oidbin = ?
+
+ $sth->bind_param(1, pack('H*', $oidhex), SQL_BLOB);
+ $sth->execute;
+ $sth->fetchrow_array;
+}
+
1;
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index b731f698..548f2114 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -1,6 +1,6 @@
# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-# based on notmuch, but with no concept of folders, files or flags
+# based on notmuch, but with no concept of folders, files
#
# Indexes mail with Xapian and our (SQLite-based) ::Msgmap for use
# with the web and NNTP interfaces. This index maintains thread
@@ -371,7 +371,7 @@ sub eml2doc ($$$;$) {
index_headers($self, $smsg);
if (defined(my $eidx_key = $smsg->{eidx_key})) {
- $doc->add_boolean_term('O'.$eidx_key);
+ $doc->add_boolean_term('O'.$eidx_key) if $eidx_key ne '.';
}
msg_iter($eml, \&index_xapian, [ $self, $doc ]);
index_ids($self, $doc, $eml, $mids);
@@ -467,7 +467,7 @@ sub add_eidx_info {
begin_txn_lazy($self);
my $doc = _get_doc($self, $docid) or return;
term_generator($self)->set_document($doc);
- $doc->add_boolean_term('O'.$eidx_key);
+ $doc->add_boolean_term('O'.$eidx_key) if $eidx_key ne '.';
index_list_id($self, $doc, $eml);
$self->{xdb}->replace_document($docid, $doc);
}
@@ -501,6 +501,47 @@ sub remove_eidx_info {
$self->{xdb}->replace_document($docid, $doc);
}
+sub set_keywords {
+ my ($self, $docid, @kw) = @_;
+ begin_txn_lazy($self);
+ my $doc = _get_doc($self, $docid) or return;
+ my %keep = map { $_ => 1 } @kw;
+ my %add = %keep;
+ my @rm;
+ my $end = $doc->termlist_end;
+ for (my $cur = $doc->termlist_begin; $cur != $end; $cur++) {
+ $cur->skip_to('K');
+ last if $cur == $end;
+ my $kw = $cur->get_termname;
+ $kw =~ s/\AK//s or next;
+ $keep{$kw} ? delete($add{$kw}) : push(@rm, $kw);
+ }
+ return unless (scalar(@rm) + scalar(keys %add));
+ $doc->remove_term('K'.$_) for @rm;
+ $doc->add_boolean_term('K'.$_) for (keys %add);
+ $self->{xdb}->replace_document($docid, $doc);
+}
+
+sub add_keywords {
+ my ($self, $docid, @kw) = @_;
+ begin_txn_lazy($self);
+ my $doc = _get_doc($self, $docid) or return;
+ $doc->add_boolean_term('K'.$_) for @kw;
+ $self->{xdb}->replace_document($docid, $doc);
+}
+
+sub remove_keywords {
+ my ($self, $docid, @kw) = @_;
+ begin_txn_lazy($self);
+ my $doc = _get_doc($self, $docid) or return;
+ my $replace;
+ eval {
+ $doc->remove_term('K'.$_);
+ $replace = 1
+ } for @kw;
+ $self->{xdb}->replace_document($docid, $doc) if $replace;
+}
+
sub get_val ($$) {
my ($doc, $col) = @_;
sortable_unserialise($doc->get_value($col));
diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm
index 2e654769..87b0bad6 100644
--- a/lib/PublicInbox/SearchIdxShard.pm
+++ b/lib/PublicInbox/SearchIdxShard.pm
@@ -89,6 +89,12 @@ sub shard_worker_loop ($$$$$) {
my ($len, $docid, $eidx_key) = split(/ /, $line, 3);
$self->remove_eidx_info($docid, $eidx_key,
eml($r, $len));
+ } elsif ($line =~ s/\A=K (\d+) //) {
+ $self->set_keywords($1 + 0, split(/ /, $line));
+ } elsif ($line =~ s/\A-K (\d+) //) {
+ $self->remove_keywords($1 + 0, split(/ /, $line));
+ } elsif ($line =~ s/\A\+K (\d+) //) {
+ $self->add_keywords($1 + 0, split(/ /, $line));
} elsif ($line =~ s/\AO ([^\n]+)//) {
my $over_fn = $1;
$over_fn =~ tr/\0/\n/;
@@ -210,6 +216,33 @@ sub shard_remove {
}
}
+sub shard_set_keywords {
+ my ($self, $docid, @kw) = @_;
+ if (my $w = $self->{w}) { # triggers remove_by_docid in a shard child
+ print $w "=K $docid @kw\n" or die "failed to write: $!";
+ } else { # same process
+ $self->set_keywords($docid, @kw);
+ }
+}
+
+sub shard_remove_keywords {
+ my ($self, $docid, @kw) = @_;
+ if (my $w = $self->{w}) { # triggers remove_by_docid in a shard child
+ print $w "-K $docid @kw\n" or die "failed to write: $!";
+ } else { # same process
+ $self->remove_keywords($docid, @kw);
+ }
+}
+
+sub shard_add_keywords {
+ my ($self, $docid, @kw) = @_;
+ if (my $w = $self->{w}) { # triggers remove_by_docid in a shard child
+ print $w "+K $docid @kw\n" or die "failed to write: $!";
+ } else { # same process
+ $self->add_keywords($docid, @kw);
+ }
+}
+
sub shard_over_check {
my ($self, $over) = @_;
if (my $w = $self->{w}) { # triggers remove_by_docid in a shard child
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 3e3b275f..e8a5fbd2 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -24,7 +24,7 @@ use File::Temp ();
my $OID = qr/[a-f0-9]{40,}/;
# an estimate of the post-packed size to the raw uncompressed size
-my $PACKING_FACTOR = 0.4;
+our $PACKING_FACTOR = 0.4;
# SATA storage lags behind what CPUs are capable of, so relying on
# nproc(1) can be misleading and having extra Xapian shards is a
diff --git a/t/lei_store.t b/t/lei_store.t
new file mode 100644
index 00000000..c18a9620
--- /dev/null
+++ b/t/lei_store.t
@@ -0,0 +1,74 @@
+#!perl -w
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use Test::More;
+use PublicInbox::TestCommon;
+require_mods(qw(DBD::SQLite Search::Xapian));
+require_git 2.6;
+require_ok 'PublicInbox::LeiStore';
+require_ok 'PublicInbox::ExtSearch';
+my ($home, $for_destroy) = tmpdir();
+my $opt = { 1 => \(my $out = ''), 2 => \(my $err = '') };
+my $store_dir = "$home/lst";
+my $lst = PublicInbox::LeiStore->new($store_dir, { creat => 1 });
+ok($lst, '->new');
+my $oid = $lst->add_eml(eml_load('t/data/0001.patch'));
+like($oid, qr/\A[0-9a-f]+\z/, 'add returned OID');
+my $eml = eml_load('t/data/0001.patch');
+is($lst->add_eml($eml), undef, 'idempotent');
+$lst->done;
+{
+ my $es = $lst->search;
+ my $msgs = $es->over->query_xover(0, 1000);
+ is(scalar(@$msgs), 1, 'one message');
+ is($msgs->[0]->{blob}, $oid, 'blob matches');
+ my $mset = $es->mset("mid:$msgs->[0]->{mid}");
+ is($mset->size, 1, 'search works');
+ is_deeply($es->mset_to_artnums($mset), [ $msgs->[0]->{num} ],
+ 'mset_to_artnums');
+ my @kw = $es->msg_keywords(($mset->items)[0]);
+ is_deeply(\@kw, [], 'no flags');
+}
+
+for my $parallel (0, 1) {
+ $lst->{priv_eidx}->{parallel} = $parallel;
+ my $docids = $lst->set_eml_keywords($eml, qw(seen draft));
+ is(scalar @$docids, 1, 'set keywords on one doc');
+ $lst->done;
+ my @kw = $lst->search->msg_keywords($docids->[0]);
+ is_deeply(\@kw, [qw(draft seen)], 'kw matches');
+
+ $docids = $lst->add_eml_keywords($eml, qw(seen draft));
+ $lst->done;
+ is(scalar @$docids, 1, 'idempotently added keywords to doc');
+ @kw = $lst->search->msg_keywords($docids->[0]);
+ is_deeply(\@kw, [qw(draft seen)], 'kw matches after noop');
+
+ $docids = $lst->remove_eml_keywords($eml, qw(seen draft));
+ is(scalar @$docids, 1, 'removed from one doc');
+ $lst->done;
+ @kw = $lst->search->msg_keywords($docids->[0]);
+ is_deeply(\@kw, [], 'kw matches after remove');
+
+ $docids = $lst->remove_eml_keywords($eml, qw(answered));
+ is(scalar @$docids, 1, 'removed from one doc (idempotently)');
+ $lst->done;
+ @kw = $lst->search->msg_keywords($docids->[0]);
+ is_deeply(\@kw, [], 'kw matches after remove (idempotent)');
+
+ $docids = $lst->add_eml_keywords($eml, qw(answered));
+ is(scalar @$docids, 1, 'added to empty doc');
+ $lst->done;
+ @kw = $lst->search->msg_keywords($docids->[0]);
+ is_deeply(\@kw, ['answered'], 'kw matches after add');
+
+ $docids = $lst->set_eml_keywords($eml);
+ is(scalar @$docids, 1, 'set to clobber');
+ $lst->done;
+ @kw = $lst->search->msg_keywords($docids->[0]);
+ is_deeply(\@kw, [], 'set clobbers all');
+}
+
+done_testing;
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 04/26] tests: more common JSON module loading
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
` (2 preceding siblings ...)
2020-12-18 12:09 ` [PATCH 03/26] lei_store: local storage for Local Email Interface Eric Wong
@ 2020-12-18 12:09 ` Eric Wong
2020-12-18 12:09 ` [PATCH 05/26] lei: use spawn (vfork + execve) for lazy start Eric Wong
` (21 subsequent siblings)
25 siblings, 0 replies; 30+ messages in thread
From: Eric Wong @ 2020-12-18 12:09 UTC (permalink / raw)
To: meta
We'll probably be using JSON more in the future, so make
it easier to require in tests
---
lib/PublicInbox/ManifestJsGz.pm | 2 +-
lib/PublicInbox/TestCommon.pm | 4 ++++
t/extsearch.t | 3 +--
t/lei.t | 3 +--
t/www_listing.t | 8 +++-----
5 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/lib/PublicInbox/ManifestJsGz.pm b/lib/PublicInbox/ManifestJsGz.pm
index 6d5b57ee..33df020a 100644
--- a/lib/PublicInbox/ManifestJsGz.pm
+++ b/lib/PublicInbox/ManifestJsGz.pm
@@ -11,7 +11,7 @@ use PublicInbox::Config;
use IO::Compress::Gzip qw(gzip);
use HTTP::Date qw(time2str);
-our $json = PublicInbox::Config::json();
+my $json = PublicInbox::Config::json();
# called by WwwListing
sub url_regexp {
diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm
index 299b9c6a..2116575b 100644
--- a/lib/PublicInbox/TestCommon.pm
+++ b/lib/PublicInbox/TestCommon.pm
@@ -75,6 +75,10 @@ sub require_mods {
my $maybe = pop @mods if $mods[-1] =~ /\A[0-9]+\z/;
my @need;
while (my $mod = shift(@mods)) {
+ if ($mod eq 'json') {
+ $mod = 'Cpanel::JSON::XS||JSON::MaybeXS||'.
+ 'JSON||JSON::PP'
+ }
if ($mod eq 'Search::Xapian') {
if (eval { require PublicInbox::Search } &&
PublicInbox::Search::load_xapian()) {
diff --git a/t/extsearch.t b/t/extsearch.t
index fb31b0ab..ffbc10e2 100644
--- a/t/extsearch.t
+++ b/t/extsearch.t
@@ -8,9 +8,8 @@ use PublicInbox::Config;
use PublicInbox::Search;
use PublicInbox::InboxWritable;
use Fcntl qw(:seek);
-my $json = PublicInbox::Config::json() or plan skip_all => 'JSON missing';
require_git(2.6);
-require_mods(qw(DBD::SQLite Search::Xapian));
+require_mods(qw(json DBD::SQLite Search::Xapian));
use_ok 'PublicInbox::ExtSearch';
use_ok 'PublicInbox::ExtSearchIdx';
use_ok 'PublicInbox::OverIdx';
diff --git a/t/lei.t b/t/lei.t
index feee9270..02f21322 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -6,8 +6,7 @@ use v5.10.1;
use Test::More;
use PublicInbox::TestCommon;
use PublicInbox::Config;
-my $json = PublicInbox::Config::json() or plan skip_all => 'JSON missing';
-require_mods(qw(DBD::SQLite Search::Xapian));
+require_mods(qw(json DBD::SQLite Search::Xapian));
my ($home, $for_destroy) = tmpdir();
my $opt = { 1 => \(my $out = ''), 2 => \(my $err = '') };
diff --git a/t/www_listing.t b/t/www_listing.t
index 63613371..94c1e5bb 100644
--- a/t/www_listing.t
+++ b/t/www_listing.t
@@ -7,14 +7,12 @@ use Test::More;
use PublicInbox::Spawn qw(which);
use PublicInbox::TestCommon;
use PublicInbox::Import;
-require_mods(qw(URI::Escape Plack::Builder Digest::SHA
+require_mods(qw(json URI::Escape Plack::Builder Digest::SHA
IO::Compress::Gzip IO::Uncompress::Gunzip HTTP::Tiny));
require PublicInbox::WwwListing;
require PublicInbox::ManifestJsGz;
-my $json = do {
- no warnings 'once';
- $PublicInbox::ManifestJsGz::json;
-} or plan skip_all => "JSON module missing";
+use PublicInbox::Config;
+my $json = PublicInbox::Config::json();
use_ok 'PublicInbox::Git';
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 05/26] lei: use spawn (vfork + execve) for lazy start
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
` (3 preceding siblings ...)
2020-12-18 12:09 ` [PATCH 04/26] tests: more common JSON module loading Eric Wong
@ 2020-12-18 12:09 ` Eric Wong
2020-12-18 12:09 ` [PATCH 06/26] lei: refine help/option parsing, implement "init" Eric Wong
` (20 subsequent siblings)
25 siblings, 0 replies; 30+ messages in thread
From: Eric Wong @ 2020-12-18 12:09 UTC (permalink / raw)
To: meta
This allows us to rely on FD_CLOEXEC being set on pipes
from prove(1), so forgetting `daemon-stop' won't cause
tests to hang.
Unfortunately, daemon tests will be slower with this.
---
lib/PublicInbox/LeiDaemon.pm | 12 +++++-------
script/lei | 14 ++++++++++----
2 files changed, 15 insertions(+), 11 deletions(-)
diff --git a/lib/PublicInbox/LeiDaemon.pm b/lib/PublicInbox/LeiDaemon.pm
index b4b1ac59..fd4d00d4 100644
--- a/lib/PublicInbox/LeiDaemon.pm
+++ b/lib/PublicInbox/LeiDaemon.pm
@@ -324,29 +324,27 @@ sub accept_dispatch { # Listener {post_accept} callback
sub noop {}
# lei(1) calls this when it can't connect
-sub lazy_start ($$) {
+sub lazy_start {
my ($path, $err) = @_;
if ($err == ECONNREFUSED) {
unlink($path) or die "unlink($path): $!";
} elsif ($err != ENOENT) {
die "connect($path): $!";
}
+ require IO::FDPass;
my $umask = umask(077) // die("umask(077): $!");
my $l = IO::Socket::UNIX->new(Local => $path,
Listen => 1024,
Type => SOCK_STREAM) or
$err = $!;
umask($umask) or die("umask(restore): $!");
- $l or return $err;
+ $l or return die "bind($path): $err";
my @st = stat($path) or die "stat($path): $!";
my $dev_ino_expect = pack('dd', $st[0], $st[1]); # dev+ino
pipe(my ($eof_r, $eof_w)) or die "pipe: $!";
my $oldset = PublicInbox::Sigfd::block_signals();
my $pid = fork // die "fork: $!";
- if ($pid) {
- PublicInbox::Sigfd::sig_setmask($oldset);
- return; # client will connect to $path
- }
+ return if $pid;
openlog($path, 'pid', 'user');
local $SIG{__DIE__} = sub {
syslog('crit', "@_");
@@ -360,7 +358,7 @@ sub lazy_start ($$) {
open STDERR, '>&STDIN' or die "redirect stderr failed: $!\n";
setsid();
$pid = fork // die "fork: $!";
- exit if $pid;
+ return if $pid;
$0 = "lei-daemon $path";
require PublicInbox::Listener;
require PublicInbox::EOFpipe;
diff --git a/script/lei b/script/lei
index 1b5af3a1..637c1951 100755
--- a/script/lei
+++ b/script/lei
@@ -21,13 +21,19 @@ if (eval { require IO::FDPass; 1 }) { # use daemon to reduce load time
};
my $sock = IO::Socket::UNIX->new(Peer => $path, Type => SOCK_STREAM);
unless ($sock) { # start the daemon if not started
- my $err = $!;
- require PublicInbox::LeiDaemon;
- $err = PublicInbox::LeiDaemon::lazy_start($path, $err);
+ my $err = $! + 0;
+ my $env = { PERL5LIB => join(':', @INC) };
+ my $cmd = [ $^X, qw[-MPublicInbox::LeiDaemon
+ -E PublicInbox::LeiDaemon::lazy_start(@ARGV)],
+ $path, $err ];
+ require PublicInbox::Spawn;
+ waitpid(PublicInbox::Spawn::spawn($cmd, $env), 0);
+ warn "lei-daemon exited with \$?=$?\n" if $?;
+
# try connecting again anyways, unlink+bind may be racy
$sock = IO::Socket::UNIX->new(Peer => $path,
Type => SOCK_STREAM) // die
- "connect($path): $! (bind($path): $err)";
+ "connect($path): $! (after attempted daemon start)";
}
my $pwd = $ENV{PWD};
my $cwd = cwd();
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 06/26] lei: refine help/option parsing, implement "init"
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
` (4 preceding siblings ...)
2020-12-18 12:09 ` [PATCH 05/26] lei: use spawn (vfork + execve) for lazy start Eric Wong
@ 2020-12-18 12:09 ` Eric Wong
2020-12-18 12:09 ` [PATCH 07/26] t/lei-oneshot: standalone oneshot (non-socket) test Eric Wong
` (19 subsequent siblings)
25 siblings, 0 replies; 30+ messages in thread
From: Eric Wong @ 2020-12-18 12:09 UTC (permalink / raw)
To: meta
There's a bunch of work in here as the foundations are being
fleshed out. One of the UI/UX is to make it easy to keep
built-in help and shell completions consistent
---
lib/PublicInbox/LeiDaemon.pm | 401 ++++++++++++++++++++++++++---------
lib/PublicInbox/LeiStore.pm | 7 +-
script/lei | 2 +-
t/lei.t | 82 +++++--
4 files changed, 378 insertions(+), 114 deletions(-)
diff --git a/lib/PublicInbox/LeiDaemon.pm b/lib/PublicInbox/LeiDaemon.pm
index fd4d00d4..010c1cba 100644
--- a/lib/PublicInbox/LeiDaemon.pm
+++ b/lib/PublicInbox/LeiDaemon.pm
@@ -15,13 +15,18 @@ use POSIX qw(setsid);
use IO::Socket::UNIX;
use IO::Handle ();
use Sys::Syslog qw(syslog openlog);
+use PublicInbox::Config;
use PublicInbox::Syscall qw($SFD_NONBLOCK EPOLLIN EPOLLONESHOT);
use PublicInbox::Sigfd;
use PublicInbox::DS qw(now);
use PublicInbox::Spawn qw(spawn);
-our $quit = sub { exit(shift // 0) };
+use Text::Wrap qw(wrap);
+use File::Path qw(mkpath);
+use File::Spec;
+our $quit = \&CORE::exit;
my $glp = Getopt::Long::Parser->new;
$glp->configure(qw(gnu_getopt no_ignore_case auto_abbrev));
+our %PATH2CFG; # persistent for socket daemon
# TBD: this is a documentation mechanism to show a subcommand
# (may) pass options through to another command:
@@ -30,45 +35,48 @@ sub pass_through { () }
# TODO: generate shell completion + help using %CMD and %OPTDESC
# command => [ positional_args, 1-line description, Getopt::Long option spec ]
our %CMD = ( # sorted in order of importance/use:
-'query' => [ 'SEARCH-TERMS...', 'search for messages matching terms', qw(
+'query' => [ 'SEARCH_TERMS...', 'search for messages matching terms', qw(
save-as=s output|o=s format|f=s dedupe|d=s thread|t augment|a
- limit|n=i sort|s=s reverse|r offset=i remote local! extinbox!
+ limit|n=i sort|s=s@ reverse|r offset=i remote local! extinbox!
since|after=s until|before=s) ],
-'show' => [ '{MID|OID}', 'show a given object (Message-ID or object ID)',
+'show' => [ 'MID|OID', 'show a given object (Message-ID or object ID)',
qw(type=s solve! format|f=s dedupe|d=s thread|t remote local!),
pass_through('git show') ],
-'add-extinbox' => [ 'URL-OR-PATHNAME',
+'add-extinbox' => [ 'URL_OR_PATHNAME',
'add/set priority of a publicinbox|extindex for extra matches',
qw(prio=i) ],
-'ls-extinbox' => [ '[FILTER]', 'list publicinbox|extindex locations',
+'ls-extinbox' => [ '[FILTER...]', 'list publicinbox|extindex locations',
qw(format|f=s z local remote) ],
-'forget-extinbox' => [ '{URL-OR-PATHNAME|--prune}',
+'forget-extinbox' => [ '{URL_OR_PATHNAME|--prune}',
'exclude further results from a publicinbox|extindex',
qw(prune) ],
-'ls-query' => [ '[FILTER]', 'list saved search queries',
+'ls-query' => [ '[FILTER...]', 'list saved search queries',
qw(name-only format|f=s z) ],
'rm-query' => [ 'QUERY_NAME', 'remove a saved search' ],
'mv-query' => [ qw(OLD_NAME NEW_NAME), 'rename a saved search' ],
-'plonk' => [ '{--thread|--from=IDENT}',
+'plonk' => [ '--thread|--from=IDENT',
'exclude mail matching From: or thread from non-Message-ID searches',
- qw(thread|t from|f=s mid=s oid=s) ],
-'mark' => [ 'MESSAGE-FLAGS', 'set/unset flags on message(s) from stdin',
+ qw(thread|t stdin| from|f=s mid=s oid=s) ],
+'mark' => [ 'MESSAGE_FLAGS...',
+ 'set/unset flags on message(s) from stdin',
qw(stdin| oid=s exact by-mid|mid:s) ],
-'forget' => [ '--stdin', 'exclude message(s) on stdin from query results',
- qw(stdin| oid=s exact by-mid|mid:s) ],
+'forget' => [ '[--stdin|--oid=OID|--by-mid=MID]',
+ 'exclude message(s) on stdin from query results',
+ qw(stdin| oid=s exact by-mid|mid:s quiet|q) ],
-'purge-mailsource' => [ '{URL-OR-PATHNAME|--all}',
+'purge-mailsource' => [ '{URL_OR_PATHNAME|--all}',
'remove imported messages from IMAP, Maildirs, and MH',
qw(exact! all jobs:i indexed) ],
# code repos are used for `show' to solve blobs from patch mails
'add-coderepo' => [ 'PATHNAME', 'add or set priority of a git code repo',
qw(prio=i) ],
-'ls-coderepo' => [ '[FILTER]', 'list known code repos', qw(format|f=s z) ],
+'ls-coderepo' => [ '[FILTER_TERMS...]',
+ 'list known code repos', qw(format|f=s z) ],
'forget-coderepo' => [ 'PATHNAME',
'stop using repo to solve blobs from patches',
qw(prune) ],
@@ -76,7 +84,7 @@ our %CMD = ( # sorted in order of importance/use:
'add-watch' => [ '[URL_OR_PATHNAME]',
'watch for new messages and flag changes',
qw(import! flags! interval=s recursive|r exclude=s include=s) ],
-'ls-watch' => [ '[FILTER]', 'list active watches with numbers and status',
+'ls-watch' => [ '[FILTER...]', 'list active watches with numbers and status',
qw(format|f=s z) ],
'pause-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote) ],
'resume-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote) ],
@@ -88,11 +96,13 @@ our %CMD = ( # sorted in order of importance/use:
qw(stdin| limit|n=i offset=i recursive|r exclude=s include=s !flags),
],
-'config' => [ '[ANYTHING...]',
- 'git-config(1) wrapper for ~/.config/lei/config',
+'config' => [ '[...]', 'git-config(1) wrapper for ~/.config/lei/config',
pass_through('git config') ],
-'daemon-stop' => [ undef, 'stop the lei-daemon' ],
-'daemon-pid' => [ undef, 'show the PID of the lei-daemon' ],
+'init' => [ '[PATHNAME]',
+ 'initialize storage, default: ~/.local/share/lei/store',
+ qw(quiet|q) ],
+'daemon-stop' => [ '', 'stop the lei-daemon' ],
+'daemon-pid' => [ '', 'show the PID of the lei-daemon' ],
'help' => [ '[SUBCOMMAND]', 'show help' ],
# XXX do we need this?
@@ -108,36 +118,43 @@ our %CMD = ( # sorted in order of importance/use:
# $spec => [@ALLOWED_VALUES (default is first), $description],
# $spec => $description
# "$SUB_COMMAND TAB $spec" => as above
-my $stdin_formats = [ qw(auto raw mboxrd mboxcl2 mboxcl mboxo),
+my $stdin_formats = [ 'IN|auto|raw|mboxrd|mboxcl2|mboxcl|mboxo',
'specify message input format' ];
-my $ls_format = [ qw(plain json null), 'listing output format' ];
-my $show_format = [ qw(plain raw html mboxrd mboxcl2 mboxcl),
- 'message/object output format' ];
+my $ls_format = [ 'OUT|plain|json|null', 'listing output format' ];
my %OPTDESC = (
+'help|h' => 'show this built-in help',
+'quiet|q' => 'be quiet',
'solve!' => 'do not attempt to reconstruct blobs from emails',
-'save-as=s' => 'save a search terms by given name',
+'save-as=s' => ['NAME', 'save a search terms by given name'],
-'type=s' => [qw(any mid git), 'disambiguate type' ],
+'type=s' => [ 'any|mid|git', 'disambiguate type' ],
-'dedupe|d=s' => [qw(content oid mid), 'deduplication strategy'],
-'thread|t' => 'every message in the same thread as the actual match(es)',
+'dedupe|d=s' => ['STRAT|content|oid|mid',
+ 'deduplication strategy'],
+'show thread|t' => 'display entire thread a message belongs to',
+'query thread|t' =>
+ 'return all messages in the same thread as the actual match(es)',
'augment|a' => 'augment --output destination instead of clobbering',
-'output|o=s' => "destination (e.g. `/path/to/Maildir', or `-' for stdout)",
+'output|o=s' => [ 'DEST',
+ "destination (e.g. `/path/to/Maildir', or `-' for stdout)" ],
+'show format|f=s' => [ 'OUT|plain|raw|html|mboxrd|mboxcl2|mboxcl',
+ 'message/object output format' ],
'mark format|f=s' => $stdin_formats,
'forget format|f=s' => $stdin_formats,
-'query format|f=s' => [qw(maildir mboxrd mboxcl2 mboxcl html oid),
- q[specify output format (default: determined by --output)]],
+'query format|f=s' => [ 'OUT|maildir|mboxrd|mboxcl2|mboxcl|html|oid',
+ 'specify output format, default depends on --output'],
'ls-query format|f=s' => $ls_format,
-'ls-extinbox format|f=s' => $ls_format,
+'ls-extinbox format|f=s' => $ls_format,
-'limit|n=i' => 'integer limit on number of matches (default: 10000)',
-'offset=i' => 'search result offset (default: 0)',
+'limit|n=i' => ['NUM',
+ 'limit on number of matches (default: 10000)' ],
+'offset=i' => ['OFF', 'search result offset (default: 0)'],
-'sort|s=s@' => [qw(internaldate date relevance docid),
- "order of results `--output'-dependent)"],
+'sort|s=s@' => [ 'VAL|internaldate,date,relevance,docid',
+ "order of results `--output'-dependent"],
'prio=i' => 'priority of query source',
@@ -156,7 +173,7 @@ my %OPTDESC = (
'exact' => 'operate on exact header matches only',
'exact!' => 'rely on content match instead of exact header matches',
-'by-mid|mid:s' => 'match only by Message-ID, ignoring contents',
+'by-mid|mid:s' => [ 'MID', 'match only by Message-ID, ignoring contents' ],
'jobs:i' => 'set parallelism level',
); # %OPTDESC
@@ -174,93 +191,282 @@ sub x_it ($$) { # pronounced "exit"
}
}
-sub emit ($$$) {
- my ($client, $channel, $buf) = @_;
- print { $client->{$channel} } $buf or warn "print FD[$channel]: $!";
+sub emit {
+ my ($client, $channel) = @_; # $buf = $_[2]
+ print { $client->{$channel} } $_[2] or die "print FD[$channel]: $!";
}
-sub fail ($$;$) {
- my ($client, $buf, $exit_code) = @_;
+sub err {
+ my ($client, $buf) = @_;
$buf .= "\n" unless $buf =~ /\n\z/s;
emit($client, 2, $buf);
+}
+
+sub qerr { $_[0]->{opt}->{quiet} or err(@_) }
+
+sub fail ($$;$) {
+ my ($client, $buf, $exit_code) = @_;
+ err($client, $buf);
x_it($client, ($exit_code // 1) << 8);
undef;
}
sub _help ($;$) {
- my ($client, $channel) = @_;
- emit($client, $channel //= 1, <<EOF);
-usage: lei COMMAND [OPTIONS]
+ my ($client, $errmsg) = @_;
+ my $cmd = $client->{cmd} // 'COMMAND';
+ my @info = @{$CMD{$cmd} // [ '...', '...' ]};
+ my @top = ($cmd, shift(@info) // ());
+ my $cmd_desc = shift(@info);
+ my @opt_desc;
+ my $lpad = 2;
+ for my $sw (@info) { # qw(prio=s
+ my $desc = $OPTDESC{"$cmd\t$sw"} // $OPTDESC{$sw} // next;
+ my $arg_vals = '';
+ ($arg_vals, $desc) = @$desc if ref($desc) eq 'ARRAY';
+
+ # lower-case is a keyword (e.g. `content', `oid'),
+ # ALL_CAPS is a string description (e.g. `PATH')
+ if ($desc !~ /default/ && $arg_vals =~ /\b([a-z]+)[,\|]/) {
+ $desc .= "\ndefault: `$1'";
+ }
+ my (@vals, @s, @l);
+ my $x = $sw;
+ if ($x =~ s/!\z//) { # solve! => --no-solve
+ $x = "no-$x";
+ } elsif ($x =~ s/:.+//) { # optional args: $x = "mid:s"
+ @vals = (' [', undef, ']');
+ } elsif ($x =~ s/=.+//) { # required arg: $x = "type=s"
+ @vals = (' ', undef);
+ } # else: no args $x = 'thread|t'
+ for (split(/\|/, $x)) { # help|h
+ length($_) > 1 ? push(@l, "--$_") : push(@s, "-$_");
+ }
+ if (!scalar(@vals)) { # no args 'thread|t'
+ } elsif ($arg_vals =~ s/\A([A-Z_]+)\b//) { # "NAME"
+ $vals[1] = $1;
+ } else {
+ $vals[1] = uc(substr($l[0], 2)); # "--type" => "TYPE"
+ }
+ if ($arg_vals =~ /([,\|])/) {
+ my $sep = $1;
+ my @allow = split(/\Q$sep\E/, $arg_vals);
+ my $must = $sep eq '|' ? 'Must' : 'Can';
+ @allow = map { "`$_'" } @allow;
+ my $last = pop @allow;
+ $desc .= "\n$must be one of: " .
+ join(', ', @allow) . " or $last";
+ }
+ my $lhs = join(', ', @s, @l) . join('', @vals);
+ $lhs =~ s/\A--/ --/; # pad if no short options
+ $lpad = length($lhs) if length($lhs) > $lpad;
+ push @opt_desc, $lhs, $desc;
+ }
+ my $msg = $errmsg ? "E: $errmsg\n" : '';
+ $msg .= <<EOF;
+usage: lei @top
+ $cmd_desc
-...
EOF
- x_it($client, $channel == 2 ? 1 << 8 : 0); # stderr => failure
+ $lpad += 2;
+ local $Text::Wrap::columns = 78 - $lpad;
+ my $padding = ' ' x ($lpad + 2);
+ while (my ($lhs, $rhs) = splice(@opt_desc, 0, 2)) {
+ $msg .= ' '.pack("A$lpad", $lhs);
+ $rhs = wrap('', '', $rhs);
+ $rhs =~ s/\n/\n$padding/sg; # LHS pad continuation lines
+ $msg .= $rhs;
+ $msg .= "\n";
+ }
+ my $channel = $errmsg ? 2 : 1;
+ emit($client, $channel, $msg);
+ x_it($client, $errmsg ? 1 << 8 : 0); # stderr => failure
+ undef;
}
-sub assert_args ($$$;$@) {
- my ($client, $argv, $proto, $opt, @spec) = @_;
- $opt //= {};
- push @spec, qw(help|h);
- $glp->getoptionsfromarray($argv, $opt, @spec) or
- return fail($client, 'bad arguments or options');
- if ($opt->{help}) {
- _help($client);
- undef;
- } else {
- my ($nreq, $rest) = split(/;/, $proto);
- $nreq = (($nreq // '') =~ tr/$/$/);
- my $argc = scalar(@$argv);
- my $tot = ($rest // '') eq '@' ? $argc : ($proto =~ tr/$/$/);
- return 1 if $argc <= $tot && $argc >= $nreq;
- _help($client, 2);
- undef
+sub optparse ($$$) {
+ my ($client, $cmd, $argv) = @_;
+ $client->{cmd} = $cmd;
+ my $opt = $client->{opt} = {};
+ my $info = $CMD{$cmd} // [ '[...]', '(undocumented command)' ];
+ my ($proto, $desc, @spec) = @$info;
+ $glp->getoptionsfromarray($argv, $opt, @spec, qw(help|h)) or
+ return _help($client, "bad arguments or options for $cmd");
+ return _help($client) if $opt->{help};
+ my $i = 0;
+ my $POS_ARG = '[A-Z][A-Z0-9_]+';
+ my ($err, $inf);
+ my @args = split(/ /, $proto);
+ for my $var (@args) {
+ if ($var =~ /\A$POS_ARG\.\.\.\z/o) { # >= 1 args;
+ $inf = defined($argv->[$i]) and last;
+ $var =~ s/\.\.\.\z//;
+ $err = "$var not supplied";
+ } elsif ($var =~ /\A$POS_ARG\z/o) { # required arg at $i
+ $argv->[$i++] // ($err = "$var not supplied");
+ } elsif ($var =~ /\.\.\.\]\z/) { # optional args start
+ $inf = 1;
+ last;
+ } elsif ($var =~ /\A\[$POS_ARG\]\z/) { # one optional arg
+ $i++;
+ } elsif ($var =~ /\A.+?\|/) { # required FOO|--stdin
+ my @or = split(/\|/, $var);
+ my $ok;
+ for my $o (@or) {
+ if ($o =~ /\A--([a-z0-9\-]+)/) {
+ $ok = defined($opt->{$1});
+ last;
+ } elsif (defined($argv->[$i])) {
+ $ok = 1;
+ $i++;
+ last;
+ } # else continue looping
+ }
+ my $last = pop @or;
+ $err = join(', ', @or) . " or $last must be set";
+ } else {
+ warn "BUG: can't parse `$var' in $proto";
+ }
+ last if $err;
+ }
+ # warn "inf=$inf ".scalar(@$argv). ' '.scalar(@args)."\n";
+ if (!$inf && scalar(@$argv) > scalar(@args)) {
+ $err //= 'too many arguments';
}
+ $err ? fail($client, "usage: lei $cmd $proto\nE: $err") : 1;
}
sub dispatch {
my ($client, $cmd, @argv) = @_;
- local $SIG{__WARN__} = sub { emit($client, 2, "@_") };
+ local $SIG{__WARN__} = sub { err($client, "@_") };
local $SIG{__DIE__} = 'DEFAULT';
- if (defined $cmd) {
- my $func = "lei_$cmd";
- $func =~ tr/-/_/;
- if (my $cb = __PACKAGE__->can($func)) {
- $client->{cmd} = $cmd;
- $cb->($client, \@argv);
- } elsif (grep(/\A-/, $cmd, @argv)) {
- assert_args($client, [ $cmd, @argv ], '');
- } else {
- fail($client, "`$cmd' is not an lei command");
- }
+ return _help($client, 'no command given') unless defined($cmd);
+ my $func = "lei_$cmd";
+ $func =~ tr/-/_/;
+ if (my $cb = __PACKAGE__->can($func)) {
+ optparse($client, $cmd, \@argv) or return;
+ $cb->($client, @argv);
+ } elsif (grep(/\A-/, $cmd, @argv)) { # --help or -h only
+ my $opt = {};
+ $glp->getoptionsfromarray([$cmd, @argv], $opt, qw(help|h)) or
+ return _help($client, 'bad arguments or options');
+ _help($client);
} else {
- _help($client, 2);
+ fail($client, "`$cmd' is not an lei command");
}
}
-sub lei_daemon_pid {
- my ($client, $argv) = @_;
- assert_args($client, $argv, '') and emit($client, 1, "$$\n");
+sub _lei_cfg ($;$) {
+ my ($client, $creat) = @_;
+ my $env = $client->{env};
+ my $cfg_dir = File::Spec->canonpath(( $env->{XDG_CONFIG_HOME} //
+ ($env->{HOME} // '/nonexistent').'/.config').'/lei');
+ my $f = "$cfg_dir/config";
+ my @st = stat($f);
+ my $cur_st = @st ? pack('dd', $st[10], $st[7]) : ''; # 10:ctime, 7:size
+ if (my $cfg = $PATH2CFG{$f}) { # reuse existing object in common case
+ return ($client->{cfg} = $cfg) if $cur_st eq $cfg->{-st};
+ }
+ if (!@st) {
+ unless ($creat) {
+ delete $client->{cfg};
+ return;
+ }
+ -d $cfg_dir or mkpath($cfg_dir) or die "mkpath($cfg_dir): $!\n";
+ open my $fh, '>>', $f or die "open($f): $!\n";
+ @st = stat($fh) or die "fstat($f): $!\n";
+ $cur_st = pack('dd', $st[10], $st[7]);
+ qerr($client, "I: $f created");
+ }
+ my $cfg = PublicInbox::Config::git_config_dump($f);
+ $cfg->{-st} = $cur_st;
+ $cfg->{'-f'} = $f;
+ $client->{cfg} = $PATH2CFG{$f} = $cfg;
}
-sub lei_DBG_pwd {
- my ($client, $argv) = @_;
- assert_args($client, $argv, '') and
- emit($client, 1, "$client->{env}->{PWD}\n");
+sub _lei_store ($;$) {
+ my ($client, $creat) = @_;
+ my $cfg = _lei_cfg($client, $creat);
+ $cfg->{-lei_store} //= do {
+ require PublicInbox::LeiStore;
+ PublicInbox::SearchIdx::load_xapian_writable();
+ defined(my $dir = $cfg->{'leistore.dir'}) or return;
+ PublicInbox::LeiStore->new($dir, { creat => $creat });
+ };
+}
+
+sub lei_show {
+ my ($client, @argv) = @_;
}
-sub lei_DBG_cwd {
- my ($client, $argv) = @_;
- require Cwd;
- assert_args($client, $argv, '') and emit($client, 1, Cwd::cwd()."\n");
+sub lei_query {
+ my ($client, @argv) = @_;
}
-sub lei_DBG_false { x_it($_[0], 1 << 8) }
+sub lei_mark {
+ my ($client, @argv) = @_;
+}
-sub lei_daemon_stop {
- my ($client, $argv) = @_;
- assert_args($client, $argv, '') and $quit->(0);
+sub lei_config {
+ my ($client, @argv) = @_;
+ my $env = $client->{env};
+ if (defined $env->{GIT_CONFIG}) {
+ my %copy = %$env;
+ delete $copy{GIT_CONFIG};
+ $env = \%copy;
+ }
+ if (my @conflict = (grep(/\A-f=?\z/, @argv),
+ grep(/\A--(?:global|system|
+ file|config-file)=?\z/x, @argv))) {
+ return fail($client, "@conflict not supported by lei config");
+ }
+ my $cfg = _lei_cfg($client, 1);
+ my $cmd = [ qw(git config -f), $cfg->{'-f'}, @argv ];
+ my %rdr = map { $_ => $client->{$_} } (0..2);
+ require PublicInbox::Import;
+ PublicInbox::Import::run_die($cmd, $env, \%rdr);
}
+sub lei_init {
+ my ($client, $dir) = @_;
+ my $cfg = _lei_cfg($client, 1);
+ my $cur = $cfg->{'leistore.dir'};
+ my $env = $client->{env};
+ $dir //= ( $env->{XDG_DATA_HOME} //
+ ($env->{HOME} // '/nonexistent').'/.local/share'
+ ) . '/lei/store';
+ $dir = File::Spec->rel2abs($dir, $env->{PWD}); # PWD is symlink-aware
+ my @cur = stat($cur) if defined($cur);
+ $cur = File::Spec->canonpath($cur) if $cur;
+ my @dir = stat($dir);
+ my $exists = "I: leistore.dir=$cur already initialized" if @dir;
+ if (@cur) {
+ if ($cur eq $dir) {
+ _lei_store($client, 1)->done;
+ return qerr($client, $exists);
+ }
+
+ # some folks like symlinks and bind mounts :P
+ if (@dir && "$cur[0] $cur[1]" eq "$dir[0] $dir[1]") {
+ lei_config($client, 'leistore.dir', $dir);
+ _lei_store($client, 1)->done;
+ return qerr($client, "$exists (as $cur)");
+ }
+ return fail($client, <<"");
+E: leistore.dir=$cur already initialized and it is not $dir
+
+ }
+ lei_config($client, 'leistore.dir', $dir);
+ _lei_store($client, 1)->done;
+ $exists //= "I: leistore.dir=$dir newly initialized";
+ return qerr($client, $exists);
+}
+
+sub lei_daemon_pid {
+ emit($_[0], 1, "$$\n");
+}
+
+sub lei_daemon_stop { $quit->(0) }
+
sub lei_help { _help($_[0]) }
sub reap_exec { # dwaitpid callback
@@ -269,9 +475,9 @@ sub reap_exec { # dwaitpid callback
}
sub lei_git { # support passing through random git commands
- my ($client, $argv) = @_;
- my %opt = map { $_ => $client->{$_} } (0..2);
- my $pid = spawn(['git', @$argv], $client->{env}, \%opt);
+ my ($client, @argv) = @_;
+ my %rdr = map { $_ => $client->{$_} } (0..2);
+ my $pid = spawn(['git', @argv], $client->{env}, \%rdr);
PublicInbox::DS::dwaitpid($pid, \&reap_exec, $client);
}
@@ -360,6 +566,7 @@ sub lazy_start {
$pid = fork // die "fork: $!";
return if $pid;
$0 = "lei-daemon $path";
+ local %PATH2CFG;
require PublicInbox::Listener;
require PublicInbox::EOFpipe;
$l->blocking(0);
@@ -427,6 +634,10 @@ sub lazy_start {
# for users w/o IO::FDPass
sub oneshot {
+ my ($main_pkg) = @_;
+ my $exit = $main_pkg->can('exit'); # caller may override exit()
+ local $quit = $exit if $exit;
+ local %PATH2CFG;
dispatch({
0 => *STDIN{IO},
1 => *STDOUT{IO},
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index 56f668b8..b5b49efb 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -22,7 +22,12 @@ use PublicInbox::LeiSearch;
sub new {
my (undef, $dir, $opt) = @_;
my $eidx = PublicInbox::ExtSearchIdx->new($dir, $opt);
- bless { priv_eidx => $eidx }, __PACKAGE__;
+ my $self = bless { priv_eidx => $eidx }, __PACKAGE__;
+ if ($opt->{creat}) {
+ PublicInbox::SearchIdx::load_xapian_writable();
+ eidx_init($self);
+ }
+ $self;
}
sub git { $_[0]->{priv_eidx}->git } # read-only
diff --git a/script/lei b/script/lei
index 637c1951..fce088e9 100755
--- a/script/lei
+++ b/script/lei
@@ -60,5 +60,5 @@ if (eval { require IO::FDPass; 1 }) { # use daemon to reduce load time
}
} else { # for systems lacking IO::FDPass
require PublicInbox::LeiDaemon;
- PublicInbox::LeiDaemon::oneshot();
+ PublicInbox::LeiDaemon::oneshot(__PACKAGE__);
}
diff --git a/t/lei.t b/t/lei.t
index 02f21322..9fb0ce00 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -6,23 +6,17 @@ use v5.10.1;
use Test::More;
use PublicInbox::TestCommon;
use PublicInbox::Config;
+use File::Path qw(rmtree);
require_mods(qw(json DBD::SQLite Search::Xapian));
my ($home, $for_destroy) = tmpdir();
my $opt = { 1 => \(my $out = ''), 2 => \(my $err = '') };
+delete local $ENV{XDG_DATA_HOME};
+delete local $ENV{XDG_CONFIG_HOME};
+local $ENV{XDG_RUNTIME_DIR} = "$home/xdg_run";
+local $ENV{HOME} = $home;
+mkdir "$home/xdg_run", 0700 or BAIL_OUT "mkdir: $!";
-SKIP: {
- require_mods('IO::FDPass', 51);
- local $ENV{XDG_RUNTIME_DIR} = "$home/xdg_run";
- mkdir "$home/xdg_run", 0700 or BAIL_OUT "mkdir: $!";
- my $sock = "$ENV{XDG_RUNTIME_DIR}/lei/sock";
-
- ok(run_script([qw(lei daemon-pid)], undef, $opt), 'daemon-pid');
- is($err, '', 'no error from daemon-pid');
- like($out, qr/\A[0-9]+\n\z/s, 'pid returned') or BAIL_OUT;
- chomp(my $pid = $out);
- ok(kill(0, $pid), 'pid is valid');
- ok(-S $sock, 'sock created');
-
+my $test_lei_common = sub {
ok(!run_script([qw(lei)], undef, $opt), 'no args fails');
is($? >> 8, 1, '$? is 1');
is($out, '', 'nothing in stdout');
@@ -35,10 +29,6 @@ SKIP: {
is($err, '', "nothing in stderr (@$arg)");
}
- ok(!run_script([qw(lei DBG-false)], undef, $opt), 'false(1) emulation');
- is($? >> 8, 1, '$? set correctly');
- is($err, '', 'no error from false(1) emulation');
-
for my $arg ([''], ['--halp'], ['halp'], [qw(daemon-pid --halp)]) {
$out = $err = '';
ok(!run_script(['lei', @$arg], undef, $opt), "lei @$arg");
@@ -47,6 +37,62 @@ SKIP: {
is($out, '', 'nothing in stdout');
}
+ # init tests
+ $out = $err = '';
+ my $ok_err_info = sub {
+ my ($msg) = @_;
+ is(grep(!/^I:/, split(/^/, $err)), 0, $msg) or
+ diag "$msg: err=$err";
+ $err = '';
+ };
+ my $home_trash = [ "$home/.local", "$home/.config" ];
+ rmtree($home_trash);
+ ok(run_script([qw(lei init)], undef, $opt), 'init w/o args');
+ $ok_err_info->('after init w/o args');
+ ok(run_script([qw(lei init)], undef, $opt), 'idempotent init w/o args');
+ $ok_err_info->('after idempotent init w/o args');
+
+ ok(!run_script([qw(lei init), "$home/x"], undef, $opt),
+ 'init conflict');
+ is(grep(/^E:/, split(/^/, $err)), 1, 'got error on conflict');
+ ok(!-e "$home/x", 'nothing created on conflict');
+ rmtree($home_trash);
+
+ $err = '';
+ ok(run_script([qw(lei init), "$home/x"], undef, $opt),
+ 'init conflict resolved');
+ $ok_err_info->('init w/ arg');
+ ok(run_script([qw(lei init), "$home/x"], undef, $opt),
+ 'init idempotent with path');
+ $ok_err_info->('init idempotent w/ arg');
+ ok(-d "$home/x", 'created dir');
+ rmtree([ "$home/x", @$home_trash ]);
+
+ $err = '';
+ ok(!run_script([qw(lei init), "$home/x", "$home/2" ], undef, $opt),
+ 'too many args fails');
+ like($err, qr/too many/, 'noted excessive');
+ ok(!-e "$home/x", 'x not created on excessive');
+ for my $d (@$home_trash) {
+ my $base = (split(m!/!, $d))[-1];
+ ok(!-d $d, "$base not created");
+ }
+ is($out, '', 'nothing in stdout');
+};
+
+SKIP: {
+ require_mods('IO::FDPass', 16);
+ my $sock = "$ENV{XDG_RUNTIME_DIR}/lei/sock";
+
+ ok(run_script([qw(lei daemon-pid)], undef, $opt), 'daemon-pid');
+ is($err, '', 'no error from daemon-pid');
+ like($out, qr/\A[0-9]+\n\z/s, 'pid returned') or BAIL_OUT;
+ chomp(my $pid = $out);
+ ok(kill(0, $pid), 'pid is valid');
+ ok(-S $sock, 'sock created');
+
+ $test_lei_common->();
+
$out = '';
ok(run_script([qw(lei daemon-pid)], undef, $opt), 'daemon-pid');
chomp(my $pid_again = $out);
@@ -72,8 +118,10 @@ SKIP: {
tick();
}
ok(!kill(0, $new_pid), 'daemon exits after unlink');
+ $test_lei_common = undef; # success over socket, can't test without
};
require_ok 'PublicInbox::LeiDaemon';
+$test_lei_common->() if $test_lei_common;
done_testing;
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 07/26] t/lei-oneshot: standalone oneshot (non-socket) test
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
` (5 preceding siblings ...)
2020-12-18 12:09 ` [PATCH 06/26] lei: refine help/option parsing, implement "init" Eric Wong
@ 2020-12-18 12:09 ` Eric Wong
2020-12-18 12:09 ` [PATCH 08/26] lei: ensure we run a restrictive umask Eric Wong
` (18 subsequent siblings)
25 siblings, 0 replies; 30+ messages in thread
From: Eric Wong @ 2020-12-18 12:09 UTC (permalink / raw)
To: meta
We can use the same "local $ENV{FOO}" hack we do with
t/nntpd-v2.t to test the oneshot code path without imposing
an extra script in the users' $PATH.
---
MANIFEST | 1 +
lib/PublicInbox/TestCommon.pm | 2 +-
t/lei-oneshot.t | 25 +++++++++++++++++++++++++
t/lei.t | 32 ++++++++++++++++++++------------
4 files changed, 47 insertions(+), 13 deletions(-)
create mode 100644 t/lei-oneshot.t
diff --git a/MANIFEST b/MANIFEST
index 9eb97d14..898766e7 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -320,6 +320,7 @@ t/indexlevels-mirror.t
t/init.t
t/iso-2202-jp.eml
t/kqnotify.t
+t/lei-oneshot.t
t/lei.t
t/lei_store.t
t/linkify.t
diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm
index 2116575b..c236c589 100644
--- a/lib/PublicInbox/TestCommon.pm
+++ b/lib/PublicInbox/TestCommon.pm
@@ -168,7 +168,7 @@ sub run_script_exit {
die RUN_SCRIPT_EXIT;
}
-my %cached_scripts;
+our %cached_scripts;
sub key2sub ($) {
my ($key) = @_;
$cached_scripts{$key} //= do {
diff --git a/t/lei-oneshot.t b/t/lei-oneshot.t
new file mode 100644
index 00000000..848682ee
--- /dev/null
+++ b/t/lei-oneshot.t
@@ -0,0 +1,25 @@
+#!perl -w
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use PublicInbox::TestCommon;
+$PublicInbox::TestCommon::cached_scripts{'lei-oneshot'} //= do {
+ eval <<'EOF';
+package LeiOneshot;
+use strict;
+use subs qw(exit);
+*exit = \&PublicInbox::TestCommon::run_script_exit;
+sub main {
+# the below "line" directive is a magic comment, see perlsyn(1) manpage
+# line 1 "lei-oneshot"
+ require PublicInbox::LeiDaemon;
+ PublicInbox::LeiDaemon::oneshot(__PACKAGE__);
+ 0;
+}
+1;
+EOF
+ LeiOneshot->can('main');
+};
+local $ENV{TEST_LEI_ONESHOT} = '1';
+require './t/lei.t';
diff --git a/t/lei.t b/t/lei.t
index 9fb0ce00..507c7164 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -8,6 +8,12 @@ use PublicInbox::TestCommon;
use PublicInbox::Config;
use File::Path qw(rmtree);
require_mods(qw(json DBD::SQLite Search::Xapian));
+my $LEI = 'lei';
+my $lei = sub {
+ my ($cmd, $env, $opt) = @_;
+ run_script([$LEI, @$cmd], $env, $opt);
+};
+
my ($home, $for_destroy) = tmpdir();
my $opt = { 1 => \(my $out = ''), 2 => \(my $err = '') };
delete local $ENV{XDG_DATA_HOME};
@@ -17,21 +23,21 @@ local $ENV{HOME} = $home;
mkdir "$home/xdg_run", 0700 or BAIL_OUT "mkdir: $!";
my $test_lei_common = sub {
- ok(!run_script([qw(lei)], undef, $opt), 'no args fails');
+ ok(!$lei->([], undef, $opt), 'no args fails');
is($? >> 8, 1, '$? is 1');
is($out, '', 'nothing in stdout');
like($err, qr/^usage:/sm, 'usage in stderr');
for my $arg (['-h'], ['--help'], ['help'], [qw(daemon-pid --help)]) {
$out = $err = '';
- ok(run_script(['lei', @$arg], undef, $opt), "lei @$arg");
+ ok($lei->($arg, undef, $opt), "lei @$arg");
like($out, qr/^usage:/sm, "usage in stdout (@$arg)");
is($err, '', "nothing in stderr (@$arg)");
}
for my $arg ([''], ['--halp'], ['halp'], [qw(daemon-pid --halp)]) {
$out = $err = '';
- ok(!run_script(['lei', @$arg], undef, $opt), "lei @$arg");
+ ok(!$lei->($arg, undef, $opt), "lei @$arg");
is($? >> 8, 1, '$? set correctly');
isnt($err, '', 'something in stderr');
is($out, '', 'nothing in stdout');
@@ -47,29 +53,27 @@ my $test_lei_common = sub {
};
my $home_trash = [ "$home/.local", "$home/.config" ];
rmtree($home_trash);
- ok(run_script([qw(lei init)], undef, $opt), 'init w/o args');
+ ok($lei->(['init'], undef, $opt), 'init w/o args');
$ok_err_info->('after init w/o args');
- ok(run_script([qw(lei init)], undef, $opt), 'idempotent init w/o args');
+ ok($lei->(['init'], undef, $opt), 'idempotent init w/o args');
$ok_err_info->('after idempotent init w/o args');
- ok(!run_script([qw(lei init), "$home/x"], undef, $opt),
+ ok(!$lei->(['init', "$home/x"], undef, $opt),
'init conflict');
is(grep(/^E:/, split(/^/, $err)), 1, 'got error on conflict');
ok(!-e "$home/x", 'nothing created on conflict');
rmtree($home_trash);
$err = '';
- ok(run_script([qw(lei init), "$home/x"], undef, $opt),
- 'init conflict resolved');
+ ok($lei->(['init', "$home/x"], undef, $opt), 'init conflict resolved');
$ok_err_info->('init w/ arg');
- ok(run_script([qw(lei init), "$home/x"], undef, $opt),
- 'init idempotent with path');
+ ok($lei->(['init', "$home/x"], undef, $opt), 'init idempotent w/ path');
$ok_err_info->('init idempotent w/ arg');
ok(-d "$home/x", 'created dir');
rmtree([ "$home/x", @$home_trash ]);
$err = '';
- ok(!run_script([qw(lei init), "$home/x", "$home/2" ], undef, $opt),
+ ok(!$lei->(['init', "$home/x", "$home/2" ], undef, $opt),
'too many args fails');
like($err, qr/too many/, 'noted excessive');
ok(!-e "$home/x", 'x not created on excessive');
@@ -80,7 +84,9 @@ my $test_lei_common = sub {
is($out, '', 'nothing in stdout');
};
+my $test_lei_oneshot = $ENV{TEST_LEI_ONESHOT};
SKIP: {
+ last SKIP if $test_lei_oneshot;
require_mods('IO::FDPass', 16);
my $sock = "$ENV{XDG_RUNTIME_DIR}/lei/sock";
@@ -118,10 +124,12 @@ SKIP: {
tick();
}
ok(!kill(0, $new_pid), 'daemon exits after unlink');
- $test_lei_common = undef; # success over socket, can't test without
+ # success over socket, can't test without
+ $test_lei_common = undef;
};
require_ok 'PublicInbox::LeiDaemon';
+$LEI = 'lei-oneshot' if $test_lei_oneshot;
$test_lei_common->() if $test_lei_common;
done_testing;
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 08/26] lei: ensure we run a restrictive umask
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
` (6 preceding siblings ...)
2020-12-18 12:09 ` [PATCH 07/26] t/lei-oneshot: standalone oneshot (non-socket) test Eric Wong
@ 2020-12-18 12:09 ` Eric Wong
2020-12-18 12:09 ` [PATCH 09/26] lei: support `daemon-env' for modifying long-lived env Eric Wong
` (17 subsequent siblings)
25 siblings, 0 replies; 30+ messages in thread
From: Eric Wong @ 2020-12-18 12:09 UTC (permalink / raw)
To: meta
While we configure the LeiStore git repos and DBs to have a
restrictive umask, lei may also write to Maildirs/mboxes/etc.
We will follow mutt behavior when saving files/messages to the FS.
We only want to create files which are only readable by the local
user since this is intended for private mail and could be used
on shared systems.
We may allow passing the umask on a per-command-basis, but it's
probably not worth the effort to support.
---
lib/PublicInbox/LeiDaemon.pm | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/PublicInbox/LeiDaemon.pm b/lib/PublicInbox/LeiDaemon.pm
index 010c1cba..1f170f1d 100644
--- a/lib/PublicInbox/LeiDaemon.pm
+++ b/lib/PublicInbox/LeiDaemon.pm
@@ -538,12 +538,11 @@ sub lazy_start {
die "connect($path): $!";
}
require IO::FDPass;
- my $umask = umask(077) // die("umask(077): $!");
+ umask(077) // die("umask(077): $!");
my $l = IO::Socket::UNIX->new(Local => $path,
Listen => 1024,
Type => SOCK_STREAM) or
$err = $!;
- umask($umask) or die("umask(restore): $!");
$l or return die "bind($path): $err";
my @st = stat($path) or die "stat($path): $!";
my $dev_ino_expect = pack('dd', $st[0], $st[1]); # dev+ino
@@ -638,6 +637,7 @@ sub oneshot {
my $exit = $main_pkg->can('exit'); # caller may override exit()
local $quit = $exit if $exit;
local %PATH2CFG;
+ umask(077) // die("umask(077): $!");
dispatch({
0 => *STDIN{IO},
1 => *STDOUT{IO},
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 09/26] lei: support `daemon-env' for modifying long-lived env
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
` (7 preceding siblings ...)
2020-12-18 12:09 ` [PATCH 08/26] lei: ensure we run a restrictive umask Eric Wong
@ 2020-12-18 12:09 ` Eric Wong
2020-12-18 12:09 ` [PATCH 10/26] lei_store: simplify git_epoch_max, slightly Eric Wong
` (16 subsequent siblings)
25 siblings, 0 replies; 30+ messages in thread
From: Eric Wong @ 2020-12-18 12:09 UTC (permalink / raw)
To: meta
While lei(1) socket connections can set environment variables
for its running context, it may not completely remove some of
them. The background daemon just inherits whatever env the
client spawning it had. This command ensures the persistent env
can be modified as needed.
Similar to env(1), this supports "-u", "-" (--clear), and
"-0"/"-z" switches. It may be useful to unset or change
or even completely clear the environment independently
of what a socket client feeds us.
"-i" is omitted since "--ignore-environment" seems like a bad
name for a persistent daemon as opposed to a one-shot command.
"-" and --clear (like clearenv(3)) will completely clobber
the environment.
"Lonesome dash" support is added to our option/help parsing
for the "-" shortcut to "--clear".
Getopt::Long doesn't seem to support specs like "clear|" or
"stdin|", but only "", so we do a little pre/post-processing
to merge the cases.
---
lib/PublicInbox/LeiDaemon.pm | 55 ++++++++++++++++++++++++++++++++----
t/lei.t | 31 ++++++++++++++++++++
2 files changed, 80 insertions(+), 6 deletions(-)
diff --git a/lib/PublicInbox/LeiDaemon.pm b/lib/PublicInbox/LeiDaemon.pm
index 1f170f1d..56f4aa7d 100644
--- a/lib/PublicInbox/LeiDaemon.pm
+++ b/lib/PublicInbox/LeiDaemon.pm
@@ -60,7 +60,7 @@ our %CMD = ( # sorted in order of importance/use:
'plonk' => [ '--thread|--from=IDENT',
'exclude mail matching From: or thread from non-Message-ID searches',
- qw(thread|t stdin| from|f=s mid=s oid=s) ],
+ qw(stdin| thread|t from|f=s mid=s oid=s) ],
'mark' => [ 'MESSAGE_FLAGS...',
'set/unset flags on message(s) from stdin',
qw(stdin| oid=s exact by-mid|mid:s) ],
@@ -103,6 +103,8 @@ our %CMD = ( # sorted in order of importance/use:
qw(quiet|q) ],
'daemon-stop' => [ '', 'stop the lei-daemon' ],
'daemon-pid' => [ '', 'show the PID of the lei-daemon' ],
+'daemon-env' => [ '[NAME=VALUE...]', 'set, unset, or show daemon environment',
+ qw(clear| unset|u=s@ z|0) ],
'help' => [ '[SUBCOMMAND]', 'show help' ],
# XXX do we need this?
@@ -175,6 +177,16 @@ my %OPTDESC = (
'by-mid|mid:s' => [ 'MID', 'match only by Message-ID, ignoring contents' ],
'jobs:i' => 'set parallelism level',
+
+# xargs, env, use "-0", git(1) uses "-z". Should we support z|0 everywhere?
+'z' => 'use NUL \\0 instead of newline (CR) to delimit lines',
+'z|0' => 'use NUL \\0 instead of newline (CR) to delimit lines',
+
+# note: no "--ignore-environment" / "-i" support like env(1) since that
+# is one-shot and this is for a persistent daemon:
+'clear|' => 'clear the daemon environment',
+'unset|u=s@' => ['NAME',
+ 'unset matching NAME, may be specified multiple times'],
); # %OPTDESC
sub x_it ($$) { # pronounced "exit"
@@ -257,7 +269,11 @@ sub _help ($;$) {
join(', ', @allow) . " or $last";
}
my $lhs = join(', ', @s, @l) . join('', @vals);
- $lhs =~ s/\A--/ --/; # pad if no short options
+ if ($x =~ /\|\z/) { # "stdin|" or "clear|"
+ $lhs =~ s/\A--/- , --/;
+ } else {
+ $lhs =~ s/\A--/ --/; # pad if no short options
+ }
$lpad = length($lhs) if length($lhs) > $lpad;
push @opt_desc, $lhs, $desc;
}
@@ -289,9 +305,20 @@ sub optparse ($$$) {
my $opt = $client->{opt} = {};
my $info = $CMD{$cmd} // [ '[...]', '(undocumented command)' ];
my ($proto, $desc, @spec) = @$info;
- $glp->getoptionsfromarray($argv, $opt, @spec, qw(help|h)) or
+ push @spec, qw(help|h);
+ my $lone_dash;
+ if ($spec[0] =~ s/\|\z//s) { # "stdin|" or "clear|" allows "-" alias
+ $lone_dash = $spec[0];
+ $opt->{$spec[0]} = \(my $var);
+ push @spec, '' => \$var;
+ }
+ $glp->getoptionsfromarray($argv, $opt, @spec) or
return _help($client, "bad arguments or options for $cmd");
return _help($client) if $opt->{help};
+
+ # "-" aliases "stdin" or "clear"
+ $opt->{$lone_dash} = ${$opt->{$lone_dash}} if defined $lone_dash;
+
my $i = 0;
my $POS_ARG = '[A-Z][A-Z0-9_]+';
my ($err, $inf);
@@ -461,12 +488,28 @@ E: leistore.dir=$cur already initialized and it is not $dir
return qerr($client, $exists);
}
-sub lei_daemon_pid {
- emit($_[0], 1, "$$\n");
-}
+sub lei_daemon_pid { emit($_[0], 1, "$$\n") }
sub lei_daemon_stop { $quit->(0) }
+sub lei_daemon_env {
+ my ($client, @argv) = @_;
+ my $opt = $client->{opt};
+ if (defined $opt->{clear}) {
+ %ENV = ();
+ } elsif (my $u = $opt->{unset}) {
+ delete @ENV{@$u};
+ }
+ if (@argv) {
+ %ENV = (%ENV, map { split(/=/, $_, 2) } @argv);
+ } elsif (!defined($opt->{clear}) && !$opt->{unset}) {
+ my $eor = $opt->{z} ? "\0" : "\n";
+ my $buf = '';
+ while (my ($k, $v) = each %ENV) { $buf .= "$k=$v$eor" }
+ emit($client, 1, $buf)
+ }
+}
+
sub lei_help { _help($_[0]) }
sub reap_exec { # dwaitpid callback
diff --git a/t/lei.t b/t/lei.t
index 507c7164..53268908 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -20,6 +20,7 @@ delete local $ENV{XDG_DATA_HOME};
delete local $ENV{XDG_CONFIG_HOME};
local $ENV{XDG_RUNTIME_DIR} = "$home/xdg_run";
local $ENV{HOME} = $home;
+local $ENV{FOO} = 'BAR';
mkdir "$home/xdg_run", 0700 or BAIL_OUT "mkdir: $!";
my $test_lei_common = sub {
@@ -104,6 +105,36 @@ SKIP: {
chomp(my $pid_again = $out);
is($pid, $pid_again, 'daemon-pid idempotent');
+ $out = '';
+ ok(run_script([qw(lei daemon-env -0)], undef, $opt), 'show env');
+ is($err, '', 'no errors in env dump');
+ my @env = split(/\0/, $out);
+ is(scalar grep(/\AHOME=\Q$home\E\z/, @env), 1, 'env has HOME');
+ is(scalar grep(/\AFOO=BAR\z/, @env), 1, 'env has FOO=BAR');
+ is(scalar grep(/\AXDG_RUNTIME_DIR=/, @env), 1, 'has XDG_RUNTIME_DIR');
+
+ $out = '';
+ ok(run_script([qw(lei daemon-env -u FOO)], undef, $opt), 'unset');
+ is($out.$err, '', 'no output for unset');
+ ok(run_script([qw(lei daemon-env -0)], undef, $opt), 'show again');
+ is($err, '', 'no errors in env dump');
+ @env = split(/\0/, $out);
+ is(scalar grep(/\AFOO=BAR\z/, @env), 0, 'env unset FOO');
+
+ $out = '';
+ ok(run_script([qw(lei daemon-env -u FOO -u HOME -u XDG_RUNTIME_DIR)],
+ undef, $opt), 'unset multiple');
+ is($out.$err, '', 'no errors output for unset');
+ ok(run_script([qw(lei daemon-env -0)], undef, $opt), 'show again');
+ is($err, '', 'no errors in env dump');
+ @env = split(/\0/, $out);
+ is(scalar grep(/\A(?:HOME|XDG_RUNTIME_DIR)=\z/, @env), 0, 'env unset@');
+ $out = '';
+ ok(run_script([qw(lei daemon-env -)], undef, $opt), 'clear env');
+ is($out.$err, '', 'no output');
+ ok(run_script([qw(lei daemon-env)], undef, $opt), 'env is empty');
+ is($out, '', 'env cleared');
+
ok(run_script([qw(lei daemon-stop)], undef, $opt), 'daemon-stop');
is($out, '', 'no output from daemon-stop');
is($err, '', 'no error from daemon-stop');
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 10/26] lei_store: simplify git_epoch_max, slightly
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
` (8 preceding siblings ...)
2020-12-18 12:09 ` [PATCH 09/26] lei: support `daemon-env' for modifying long-lived env Eric Wong
@ 2020-12-18 12:09 ` Eric Wong
2020-12-18 12:09 ` [PATCH 11/26] search: simplify initialization, add ->xdb_shards_flat Eric Wong
` (15 subsequent siblings)
25 siblings, 0 replies; 30+ messages in thread
From: Eric Wong @ 2020-12-18 12:09 UTC (permalink / raw)
To: meta
This follows how we detect the max epoch for v2 and shard count
in Xapian.
---
lib/PublicInbox/LeiStore.pm | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index b5b49efb..d3667d29 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -18,6 +18,7 @@ use PublicInbox::V2Writable;
use PublicInbox::ContentHash qw(content_hash);
use PublicInbox::MID qw(mids);
use PublicInbox::LeiSearch;
+use List::Util qw(max);
sub new {
my (undef, $dir, $opt) = @_;
@@ -42,15 +43,13 @@ sub git_pfx { "$_[0]->{priv_eidx}->{topdir}/local" };
sub git_epoch_max {
my ($self) = @_;
- my $pfx = $self->git_pfx;
- my $max = 0;
- return $max unless -d $pfx ;
- opendir my $dh, $pfx or die "opendir $pfx: $!\n";
- while (defined(my $git_dir = readdir($dh))) {
- $git_dir =~ m!\A([0-9]+)\.git\z! or next;
- $max = $1 + 0 if $1 > $max;
+ if (opendir(my $dh, $self->git_pfx)) {
+ max(map {
+ substr($_, 0, -4) + 0; # drop ".git" suffix
+ } grep(/\A[0-9]+\.git\z/, readdir($dh))) // 0;
+ } else {
+ $!{ENOENT} ? 0 : die("opendir ${\$self->git_pfx}: $!\n");
}
- $max;
}
sub importer {
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 11/26] search: simplify initialization, add ->xdb_shards_flat
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
` (9 preceding siblings ...)
2020-12-18 12:09 ` [PATCH 10/26] lei_store: simplify git_epoch_max, slightly Eric Wong
@ 2020-12-18 12:09 ` Eric Wong
2020-12-18 12:09 ` [PATCH 12/26] rename LeiDaemon package to PublicInbox::LEI Eric Wong
` (14 subsequent siblings)
25 siblings, 0 replies; 30+ messages in thread
From: Eric Wong @ 2020-12-18 12:09 UTC (permalink / raw)
To: meta
This reduces differences between v1 and v2 code, and
introduces ->xdb_shards_flat to provide read-only access
to shards without using Xapian::MultiDatabase. This
will allow us to combine shards of several inboxes
AND extindexes for lei.
---
lib/PublicInbox/ExtSearch.pm | 6 ----
lib/PublicInbox/LeiSearch.pm | 5 ++-
lib/PublicInbox/Search.pm | 65 ++++++++++++++---------------------
lib/PublicInbox/SearchIdx.pm | 15 ++++----
lib/PublicInbox/V2Writable.pm | 8 +----
5 files changed, 34 insertions(+), 65 deletions(-)
diff --git a/lib/PublicInbox/ExtSearch.pm b/lib/PublicInbox/ExtSearch.pm
index 410ae958..7ce950bc 100644
--- a/lib/PublicInbox/ExtSearch.pm
+++ b/lib/PublicInbox/ExtSearch.pm
@@ -33,12 +33,6 @@ sub misc {
sub search { $_[0] } # self
-# overrides PublicInbox::Search::_xdb
-sub _xdb {
- my ($self) = @_;
- $self->xdb_sharded;
-}
-
# same as per-inbox ->over, for now...
sub over {
my ($self) = @_;
diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm
index 9cfd6ea2..66c16e04 100644
--- a/lib/PublicInbox/LeiSearch.pm
+++ b/lib/PublicInbox/LeiSearch.pm
@@ -9,8 +9,7 @@ use PublicInbox::Search;
sub combined_docid ($$) {
my ($self, $num) = @_;
- my $nshard = ($self->{nshard} // 1);
- ($num - 1) * $nshard + 1;
+ ($num - 1) * $self->{nshard} + 1;
}
sub msg_keywords {
@@ -19,7 +18,7 @@ sub msg_keywords {
my $docid = ref($num) ? $num->get_docid : do {
# get combined docid from over.num:
# (not generic Xapian, only works with our sharding scheme)
- my $nshard = $self->{nshard} // 1;
+ my $nshard = $self->{nshard};
($num - 1) * $nshard + $num % $nshard + 1;
};
my %kw;
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index b1d38fb9..bbc5e32f 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -191,41 +191,37 @@ sub xdir ($;$) {
}
}
-sub xdb_sharded {
+# returns all shards as separate Xapian::Database objects w/o combining
+sub xdb_shards_flat ($) {
my ($self) = @_;
- opendir(my $dh, $self->{xpfx}) or return; # not initialized yet
-
- # We need numeric sorting so shard[0] is first for reading
- # Xapian metadata, if needed
- my $last = max(grep(/\A[0-9]+\z/, readdir($dh))) // return;
+ my $xpfx = $self->{xpfx};
my (@xdb, $slow_phrase);
- for (0..$last) {
- my $shard_dir = "$self->{xpfx}/$_";
- if (-d $shard_dir && -r _) {
+ if ($xpfx =~ m/xapian${\SCHEMA_VERSION}\z/) {
+ @xdb = ($X{Database}->new($xpfx));
+ $self->{qp_flags} |= FLAG_PHRASE() if !-f "$xpfx/iamchert";
+ } else {
+ opendir(my $dh, $xpfx) or return (); # not initialized yet
+ # We need numeric sorting so shard[0] is first for reading
+ # Xapian metadata, if needed
+ my $last = max(grep(/\A[0-9]+\z/, readdir($dh))) // return ();
+ for (0..$last) {
+ my $shard_dir = "$self->{xpfx}/$_";
push @xdb, $X{Database}->new($shard_dir);
$slow_phrase ||= -f "$shard_dir/iamchert";
- } else { # gaps from missing epochs throw off mdocid()
- warn "E: $shard_dir missing or unreadable\n";
- return;
}
+ $self->{qp_flags} |= FLAG_PHRASE() if !$slow_phrase;
}
- $self->{qp_flags} |= FLAG_PHRASE() if !$slow_phrase;
- $self->{nshard} = scalar(@xdb);
- my $xdb = shift @xdb;
- $xdb->add_database($_) for @xdb;
- $xdb;
+ @xdb;
}
sub _xdb {
my ($self) = @_;
- my $dir = xdir($self, 1);
$self->{qp_flags} //= $QP_FLAGS;
- if ($self->{ibx_ver} >= 2) {
- xdb_sharded($self);
- } else {
- $self->{qp_flags} |= FLAG_PHRASE() if !-f "$dir/iamchert";
- $X{Database}->new($dir);
- }
+ my @xdb = xdb_shards_flat($self) or return;
+ $self->{nshard} = scalar(@xdb);
+ my $xdb = shift @xdb;
+ $xdb->add_database($_) for @xdb;
+ $xdb;
}
# v2 Xapian docids don't conflict, so they're identical to
@@ -239,7 +235,7 @@ sub mdocid {
sub mset_to_artnums {
my ($self, $mset) = @_;
- my $nshard = $self->{nshard} // 1;
+ my $nshard = $self->{nshard};
[ map { mdocid($nshard, $_) } $mset->items ];
}
@@ -251,25 +247,14 @@ sub xdb ($) {
};
}
-sub xpfx_init ($) {
- my ($self) = @_;
- if ($self->{ibx_ver} == 1) {
- $self->{xpfx} .= '/public-inbox/xapian' . SCHEMA_VERSION;
- } else {
- $self->{xpfx} .= '/xap'.SCHEMA_VERSION;
- }
-}
-
sub new {
my ($class, $ibx) = @_;
ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx";
- my $self = bless {
- xpfx => $ibx->{inboxdir}, # for xpfx_init
+ my $xap = $ibx->version > 1 ? 'xap' : 'public-inbox/xapian';
+ bless {
+ xpfx => "$ibx->{inboxdir}/$xap" . SCHEMA_VERSION,
altid => $ibx->{altid},
- ibx_ver => $ibx->version,
}, $class;
- xpfx_init($self);
- $self;
}
sub reopen {
@@ -362,7 +347,7 @@ sub _enquire_once { # retry_reopen callback
sub mset_to_smsg {
my ($self, $ibx, $mset) = @_;
- my $nshard = $self->{nshard} // 1;
+ my $nshard = $self->{nshard};
my $i = 0;
my %order = map { mdocid($nshard, $_) => ++$i } $mset->items;
my @msgs = sort {
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 548f2114..7e2843e9 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -54,14 +54,11 @@ sub new {
}
}
$ibx = PublicInbox::InboxWritable->new($ibx);
- my $self = bless {
- ibx => $ibx,
- xpfx => $inboxdir, # for xpfx_init
- -altid => $altid,
- ibx_ver => $version,
- indexlevel => $indexlevel,
- }, $class;
- $self->xpfx_init;
+ my $self = PublicInbox::Search->new($ibx);
+ bless $self, $class;
+ $self->{ibx} = $ibx;
+ $self->{-altid} = $altid;
+ $self->{indexlevel} = $indexlevel;
$self->{-set_indexlevel_once} = 1 if $indexlevel eq 'medium';
if ($ibx->{-skip_docdata}) {
$self->{-set_skip_docdata_once} = 1;
@@ -408,7 +405,7 @@ sub add_xapian ($$$$) {
sub _msgmap_init ($) {
my ($self) = @_;
- die "BUG: _msgmap_init is only for v1\n" if $self->{ibx_ver} != 1;
+ die "BUG: _msgmap_init is only for v1\n" if $self->{ibx}->version != 1;
$self->{mm} //= eval {
require PublicInbox::Msgmap;
my $rw = $self->{ibx}->{-no_fsync} ? 2 : 1;
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index e8a5fbd2..7d41b0f6 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -73,13 +73,7 @@ sub count_shards ($) {
delete $ibx->{search};
$srch->{nshard} // 0
} else { # ExtSearchIdx
- $self->{nshard} // do {
- if ($self->xdb_sharded) {
- $self->{nshard} // die 'BUG: {nshard} unset';
- } else {
- 0;
- }
- }
+ $self->{nshard} ||= scalar($self->xdb_shards_flat);
}
}
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 12/26] rename LeiDaemon package to PublicInbox::LEI
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
` (10 preceding siblings ...)
2020-12-18 12:09 ` [PATCH 11/26] search: simplify initialization, add ->xdb_shards_flat Eric Wong
@ 2020-12-18 12:09 ` Eric Wong
2020-12-18 12:09 ` [PATCH 13/26] lei: support pass-through for `lei config' Eric Wong
` (13 subsequent siblings)
25 siblings, 0 replies; 30+ messages in thread
From: Eric Wong @ 2020-12-18 12:09 UTC (permalink / raw)
To: meta
"LEI" is an acronym, and ALL CAPS is consistent with existing
PublicInbox::{IMAP,HTTP,NNTP,WWW} naming for top-level modules,
3 of 4 old ones which deal directly with sockets and requests.
---
MANIFEST | 2 +-
lib/PublicInbox/{LeiDaemon.pm => LEI.pm} | 2 +-
script/lei | 8 ++++----
t/lei-oneshot.t | 4 ++--
t/lei.t | 2 +-
5 files changed, 9 insertions(+), 9 deletions(-)
rename lib/PublicInbox/{LeiDaemon.pm => LEI.pm} (99%)
diff --git a/MANIFEST b/MANIFEST
index 898766e7..29b47843 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -159,7 +159,7 @@ lib/PublicInbox/InboxIdle.pm
lib/PublicInbox/InboxWritable.pm
lib/PublicInbox/Isearch.pm
lib/PublicInbox/KQNotify.pm
-lib/PublicInbox/LeiDaemon.pm
+lib/PublicInbox/LEI.pm
lib/PublicInbox/LeiSearch.pm
lib/PublicInbox/LeiStore.pm
lib/PublicInbox/Linkify.pm
diff --git a/lib/PublicInbox/LeiDaemon.pm b/lib/PublicInbox/LEI.pm
similarity index 99%
rename from lib/PublicInbox/LeiDaemon.pm
rename to lib/PublicInbox/LEI.pm
index 56f4aa7d..b5ba1f71 100644
--- a/lib/PublicInbox/LeiDaemon.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -5,7 +5,7 @@
# PublicInbox::Daemon, this is designed exclusively to handle trusted
# local clients with read/write access to the FS and use as many
# system resources as the local user has access to.
-package PublicInbox::LeiDaemon;
+package PublicInbox::LEI;
use strict;
use v5.10.1;
use parent qw(PublicInbox::DS);
diff --git a/script/lei b/script/lei
index fce088e9..e59e4316 100755
--- a/script/lei
+++ b/script/lei
@@ -23,8 +23,8 @@ if (eval { require IO::FDPass; 1 }) { # use daemon to reduce load time
unless ($sock) { # start the daemon if not started
my $err = $! + 0;
my $env = { PERL5LIB => join(':', @INC) };
- my $cmd = [ $^X, qw[-MPublicInbox::LeiDaemon
- -E PublicInbox::LeiDaemon::lazy_start(@ARGV)],
+ my $cmd = [ $^X, qw[-MPublicInbox::LEI
+ -E PublicInbox::LEI::lazy_start(@ARGV)],
$path, $err ];
require PublicInbox::Spawn;
waitpid(PublicInbox::Spawn::spawn($cmd, $env), 0);
@@ -59,6 +59,6 @@ if (eval { require IO::FDPass; 1 }) { # use daemon to reduce load time
die $line;
}
} else { # for systems lacking IO::FDPass
- require PublicInbox::LeiDaemon;
- PublicInbox::LeiDaemon::oneshot(__PACKAGE__);
+ require PublicInbox::LEI;
+ PublicInbox::LEI::oneshot(__PACKAGE__);
}
diff --git a/t/lei-oneshot.t b/t/lei-oneshot.t
index 848682ee..3b8e412d 100644
--- a/t/lei-oneshot.t
+++ b/t/lei-oneshot.t
@@ -13,8 +13,8 @@ use subs qw(exit);
sub main {
# the below "line" directive is a magic comment, see perlsyn(1) manpage
# line 1 "lei-oneshot"
- require PublicInbox::LeiDaemon;
- PublicInbox::LeiDaemon::oneshot(__PACKAGE__);
+ require PublicInbox::LEI;
+ PublicInbox::LEI::oneshot(__PACKAGE__);
0;
}
1;
diff --git a/t/lei.t b/t/lei.t
index 53268908..7ecadf7d 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -159,7 +159,7 @@ SKIP: {
$test_lei_common = undef;
};
-require_ok 'PublicInbox::LeiDaemon';
+require_ok 'PublicInbox::LEI';
$LEI = 'lei-oneshot' if $test_lei_oneshot;
$test_lei_common->() if $test_lei_common;
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 13/26] lei: support pass-through for `lei config'
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
` (11 preceding siblings ...)
2020-12-18 12:09 ` [PATCH 12/26] rename LeiDaemon package to PublicInbox::LEI Eric Wong
@ 2020-12-18 12:09 ` Eric Wong
2020-12-18 12:09 ` [PATCH 14/26] lei: help: show actual paths being operated on Eric Wong
` (12 subsequent siblings)
25 siblings, 0 replies; 30+ messages in thread
From: Eric Wong @ 2020-12-18 12:09 UTC (permalink / raw)
To: meta
This will be a handy wrapper for "git config" for manipulating
~/.config/lei/config. Since we'll have many commands, start
breaking up t/lei.t into more distinct sections for
ease-of-testing.
---
lib/PublicInbox/LEI.pm | 32 ++++++++++++-------------
t/lei.t | 54 +++++++++++++++++++++++++++++-------------
2 files changed, 53 insertions(+), 33 deletions(-)
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index b5ba1f71..dbd2875d 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -24,13 +24,16 @@ use Text::Wrap qw(wrap);
use File::Path qw(mkpath);
use File::Spec;
our $quit = \&CORE::exit;
-my $glp = Getopt::Long::Parser->new;
-$glp->configure(qw(gnu_getopt no_ignore_case auto_abbrev));
+my $GLP = Getopt::Long::Parser->new;
+$GLP->configure(qw(gnu_getopt no_ignore_case auto_abbrev));
+my $GLP_PASS = Getopt::Long::Parser->new;
+$GLP_PASS->configure(qw(gnu_getopt no_ignore_case auto_abbrev pass_through));
+
our %PATH2CFG; # persistent for socket daemon
# TBD: this is a documentation mechanism to show a subcommand
# (may) pass options through to another command:
-sub pass_through { () }
+sub pass_through { $GLP_PASS }
# TODO: generate shell completion + help using %CMD and %OPTDESC
# command => [ positional_args, 1-line description, Getopt::Long option spec ]
@@ -97,7 +100,8 @@ our %CMD = ( # sorted in order of importance/use:
],
'config' => [ '[...]', 'git-config(1) wrapper for ~/.config/lei/config',
- pass_through('git config') ],
+ qw(config-file|system|global|file|f=s), # conflict detection
+ pass_through('git config') ],
'init' => [ '[PATHNAME]',
'initialize storage, default: ~/.local/share/lei/store',
qw(quiet|q) ],
@@ -231,7 +235,7 @@ sub _help ($;$) {
my $cmd_desc = shift(@info);
my @opt_desc;
my $lpad = 2;
- for my $sw (@info) { # qw(prio=s
+ for my $sw (grep { !ref($_) } @info) { # ("prio=s", "z", $GLP_PASS)
my $desc = $OPTDESC{"$cmd\t$sw"} // $OPTDESC{$sw} // next;
my $arg_vals = '';
($arg_vals, $desc) = @$desc if ref($desc) eq 'ARRAY';
@@ -305,6 +309,7 @@ sub optparse ($$$) {
my $opt = $client->{opt} = {};
my $info = $CMD{$cmd} // [ '[...]', '(undocumented command)' ];
my ($proto, $desc, @spec) = @$info;
+ my $glp = ref($spec[-1]) ? pop(@spec) : $GLP; # or $GLP_PASS
push @spec, qw(help|h);
my $lone_dash;
if ($spec[0] =~ s/\|\z//s) { # "stdin|" or "clear|" allows "-" alias
@@ -374,7 +379,7 @@ sub dispatch {
$cb->($client, @argv);
} elsif (grep(/\A-/, $cmd, @argv)) { # --help or -h only
my $opt = {};
- $glp->getoptionsfromarray([$cmd, @argv], $opt, qw(help|h)) or
+ $GLP->getoptionsfromarray([$cmd, @argv], $opt, qw(help|h)) or
return _help($client, 'bad arguments or options');
_help($client);
} else {
@@ -402,7 +407,7 @@ sub _lei_cfg ($;$) {
open my $fh, '>>', $f or die "open($f): $!\n";
@st = stat($fh) or die "fstat($f): $!\n";
$cur_st = pack('dd', $st[10], $st[7]);
- qerr($client, "I: $f created");
+ qerr($client, "I: $f created") if $client->{cmd} ne 'config';
}
my $cfg = PublicInbox::Config::git_config_dump($f);
$cfg->{-st} = $cur_st;
@@ -435,17 +440,10 @@ sub lei_mark {
sub lei_config {
my ($client, @argv) = @_;
+ $client->{opt}->{'config-file'} and return fail $client,
+ "config file switches not supported by `lei config'";
my $env = $client->{env};
- if (defined $env->{GIT_CONFIG}) {
- my %copy = %$env;
- delete $copy{GIT_CONFIG};
- $env = \%copy;
- }
- if (my @conflict = (grep(/\A-f=?\z/, @argv),
- grep(/\A--(?:global|system|
- file|config-file)=?\z/x, @argv))) {
- return fail($client, "@conflict not supported by lei config");
- }
+ delete local $env->{GIT_CONFIG};
my $cfg = _lei_cfg($client, 1);
my $cmd = [ qw(git config -f), $cfg->{'-f'}, @argv ];
my %rdr = map { $_ => $client->{$_} } (0..2);
diff --git a/t/lei.t b/t/lei.t
index 7ecadf7d..b0943962 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -22,8 +22,13 @@ local $ENV{XDG_RUNTIME_DIR} = "$home/xdg_run";
local $ENV{HOME} = $home;
local $ENV{FOO} = 'BAR';
mkdir "$home/xdg_run", 0700 or BAIL_OUT "mkdir: $!";
+my $home_trash = [ "$home/.local", "$home/.config" ];
+my $cleanup = sub {
+ rmtree([@$home_trash, @_]);
+ $out = $err = '';
+};
-my $test_lei_common = sub {
+my $test_help = sub {
ok(!$lei->([], undef, $opt), 'no args fails');
is($? >> 8, 1, '$? is 1');
is($out, '', 'nothing in stdout');
@@ -43,17 +48,17 @@ my $test_lei_common = sub {
isnt($err, '', 'something in stderr');
is($out, '', 'nothing in stdout');
}
+};
- # init tests
- $out = $err = '';
- my $ok_err_info = sub {
- my ($msg) = @_;
- is(grep(!/^I:/, split(/^/, $err)), 0, $msg) or
- diag "$msg: err=$err";
- $err = '';
- };
- my $home_trash = [ "$home/.local", "$home/.config" ];
- rmtree($home_trash);
+my $ok_err_info = sub {
+ my ($msg) = @_;
+ is(grep(!/^I:/, split(/^/, $err)), 0, $msg) or
+ diag "$msg: err=$err";
+ $err = '';
+};
+
+my $test_init = sub {
+ $cleanup->();
ok($lei->(['init'], undef, $opt), 'init w/o args');
$ok_err_info->('after init w/o args');
ok($lei->(['init'], undef, $opt), 'idempotent init w/o args');
@@ -63,17 +68,15 @@ my $test_lei_common = sub {
'init conflict');
is(grep(/^E:/, split(/^/, $err)), 1, 'got error on conflict');
ok(!-e "$home/x", 'nothing created on conflict');
- rmtree($home_trash);
+ $cleanup->();
- $err = '';
ok($lei->(['init', "$home/x"], undef, $opt), 'init conflict resolved');
$ok_err_info->('init w/ arg');
ok($lei->(['init', "$home/x"], undef, $opt), 'init idempotent w/ path');
$ok_err_info->('init idempotent w/ arg');
ok(-d "$home/x", 'created dir');
- rmtree([ "$home/x", @$home_trash ]);
+ $cleanup->("$home/x");
- $err = '';
ok(!$lei->(['init', "$home/x", "$home/2" ], undef, $opt),
'too many args fails');
like($err, qr/too many/, 'noted excessive');
@@ -82,7 +85,26 @@ my $test_lei_common = sub {
my $base = (split(m!/!, $d))[-1];
ok(!-d $d, "$base not created");
}
- is($out, '', 'nothing in stdout');
+ is($out, '', 'nothing in stdout on init failure');
+};
+
+my $test_config = sub {
+ $cleanup->();
+ ok($lei->([qw(config a.b c)], undef, $opt), 'config set var');
+ is($out.$err, '', 'no output on var set');
+ ok($lei->([qw(config -l)], undef, $opt), 'config -l');
+ is($err, '', 'no errors on listing');
+ is($out, "a.b=c\n", 'got expected output');
+ ok(!$lei->([qw(config -f), "$home/.config/f", qw(x.y z)], undef, $opt),
+ 'config set var with -f fails');
+ like($err, qr/not supported/, 'not supported noted');
+ ok(!-f "$home/config/f", 'no file created');
+};
+
+my $test_lei_common = sub {
+ $test_help->();
+ $test_config->();
+ $test_init->();
};
my $test_lei_oneshot = $ENV{TEST_LEI_ONESHOT};
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 14/26] lei: help: show actual paths being operated on
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
` (12 preceding siblings ...)
2020-12-18 12:09 ` [PATCH 13/26] lei: support pass-through for `lei config' Eric Wong
@ 2020-12-18 12:09 ` Eric Wong
2020-12-18 12:09 ` [PATCH 15/26] lei: rename $client => $self and bless Eric Wong
` (11 subsequent siblings)
25 siblings, 0 replies; 30+ messages in thread
From: Eric Wong @ 2020-12-18 12:09 UTC (permalink / raw)
To: meta
This allows us to respect XDG_* environment variables
to override HOME.
We'll also make the $lei wrapper easier-to-use by auto-clearing
$out/$err and reducing [] needed for common cases.
---
lib/PublicInbox/LEI.pm | 42 +++++++++++++++++++++++++++---------------
t/lei.t | 27 ++++++++++++++++++++++-----
2 files changed, 49 insertions(+), 20 deletions(-)
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index dbd2875d..667ef765 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -35,6 +35,20 @@ our %PATH2CFG; # persistent for socket daemon
# (may) pass options through to another command:
sub pass_through { $GLP_PASS }
+sub _store_path ($) {
+ my ($env) = @_;
+ File::Spec->rel2abs(($env->{XDG_DATA_HOME} //
+ ($env->{HOME} // '/nonexistent').'/.local/share')
+ .'/lei/store', $env->{PWD});
+}
+
+sub _config_path ($) {
+ my ($env) = @_;
+ File::Spec->rel2abs(($env->{XDG_CONFIG_HOME} //
+ ($env->{HOME} // '/nonexistent').'/.config')
+ .'/lei/config', $env->{PWD});
+}
+
# TODO: generate shell completion + help using %CMD and %OPTDESC
# command => [ positional_args, 1-line description, Getopt::Long option spec ]
our %CMD = ( # sorted in order of importance/use:
@@ -99,12 +113,13 @@ our %CMD = ( # sorted in order of importance/use:
qw(stdin| limit|n=i offset=i recursive|r exclude=s include=s !flags),
],
-'config' => [ '[...]', 'git-config(1) wrapper for ~/.config/lei/config',
- qw(config-file|system|global|file|f=s), # conflict detection
+'config' => [ '[...]', sub {
+ 'git-config(1) wrapper for '._config_path($_[0]);
+ }, qw(config-file|system|global|file|f=s), # for conflict detection
pass_through('git config') ],
-'init' => [ '[PATHNAME]',
- 'initialize storage, default: ~/.local/share/lei/store',
- qw(quiet|q) ],
+'init' => [ '[PATHNAME]', sub {
+ 'initialize storage, default: '._store_path($_[0]);
+ }, qw(quiet|q) ],
'daemon-stop' => [ '', 'stop the lei-daemon' ],
'daemon-pid' => [ '', 'show the PID of the lei-daemon' ],
'daemon-env' => [ '[NAME=VALUE...]', 'set, unset, or show daemon environment',
@@ -233,9 +248,10 @@ sub _help ($;$) {
my @info = @{$CMD{$cmd} // [ '...', '...' ]};
my @top = ($cmd, shift(@info) // ());
my $cmd_desc = shift(@info);
+ $cmd_desc = $cmd_desc->($client->{env}) if ref($cmd_desc) eq 'CODE';
my @opt_desc;
my $lpad = 2;
- for my $sw (grep { !ref($_) } @info) { # ("prio=s", "z", $GLP_PASS)
+ for my $sw (grep { !ref } @info) { # ("prio=s", "z", $GLP_PASS)
my $desc = $OPTDESC{"$cmd\t$sw"} // $OPTDESC{$sw} // next;
my $arg_vals = '';
($arg_vals, $desc) = @$desc if ref($desc) eq 'ARRAY';
@@ -307,8 +323,8 @@ sub optparse ($$$) {
my ($client, $cmd, $argv) = @_;
$client->{cmd} = $cmd;
my $opt = $client->{opt} = {};
- my $info = $CMD{$cmd} // [ '[...]', '(undocumented command)' ];
- my ($proto, $desc, @spec) = @$info;
+ my $info = $CMD{$cmd} // [ '[...]' ];
+ my ($proto, undef, @spec) = @$info;
my $glp = ref($spec[-1]) ? pop(@spec) : $GLP; # or $GLP_PASS
push @spec, qw(help|h);
my $lone_dash;
@@ -389,10 +405,7 @@ sub dispatch {
sub _lei_cfg ($;$) {
my ($client, $creat) = @_;
- my $env = $client->{env};
- my $cfg_dir = File::Spec->canonpath(( $env->{XDG_CONFIG_HOME} //
- ($env->{HOME} // '/nonexistent').'/.config').'/lei');
- my $f = "$cfg_dir/config";
+ my $f = _config_path($client->{env});
my @st = stat($f);
my $cur_st = @st ? pack('dd', $st[10], $st[7]) : ''; # 10:ctime, 7:size
if (my $cfg = $PATH2CFG{$f}) { # reuse existing object in common case
@@ -403,6 +416,7 @@ sub _lei_cfg ($;$) {
delete $client->{cfg};
return;
}
+ my (undef, $cfg_dir, undef) = File::Spec->splitpath($f);
-d $cfg_dir or mkpath($cfg_dir) or die "mkpath($cfg_dir): $!\n";
open my $fh, '>>', $f or die "open($f): $!\n";
@st = stat($fh) or die "fstat($f): $!\n";
@@ -456,9 +470,7 @@ sub lei_init {
my $cfg = _lei_cfg($client, 1);
my $cur = $cfg->{'leistore.dir'};
my $env = $client->{env};
- $dir //= ( $env->{XDG_DATA_HOME} //
- ($env->{HOME} // '/nonexistent').'/.local/share'
- ) . '/lei/store';
+ $dir //= _store_path($env);
$dir = File::Spec->rel2abs($dir, $env->{PWD}); # PWD is symlink-aware
my @cur = stat($cur) if defined($cur);
$cur = File::Spec->canonpath($cur) if $cur;
diff --git a/t/lei.t b/t/lei.t
index b0943962..bdf6cc1c 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -9,13 +9,18 @@ use PublicInbox::Config;
use File::Path qw(rmtree);
require_mods(qw(json DBD::SQLite Search::Xapian));
my $LEI = 'lei';
+my $opt = { 1 => \(my $out = ''), 2 => \(my $err = '') };
my $lei = sub {
my ($cmd, $env, $opt) = @_;
+ $out = $err = '';
+ if (!ref($cmd)) {
+ ($env, $opt) = grep { (!defined) || ref } @_;
+ $cmd = [ grep { defined } @_ ];
+ }
run_script([$LEI, @$cmd], $env, $opt);
};
my ($home, $for_destroy) = tmpdir();
-my $opt = { 1 => \(my $out = ''), 2 => \(my $err = '') };
delete local $ENV{XDG_DATA_HOME};
delete local $ENV{XDG_CONFIG_HOME};
local $ENV{XDG_RUNTIME_DIR} = "$home/xdg_run";
@@ -23,10 +28,7 @@ local $ENV{HOME} = $home;
local $ENV{FOO} = 'BAR';
mkdir "$home/xdg_run", 0700 or BAIL_OUT "mkdir: $!";
my $home_trash = [ "$home/.local", "$home/.config" ];
-my $cleanup = sub {
- rmtree([@$home_trash, @_]);
- $out = $err = '';
-};
+my $cleanup = sub { rmtree([@$home_trash, @_]) };
my $test_help = sub {
ok(!$lei->([], undef, $opt), 'no args fails');
@@ -48,6 +50,21 @@ my $test_help = sub {
isnt($err, '', 'something in stderr');
is($out, '', 'nothing in stdout');
}
+ ok($lei->(qw(init -h), undef, $opt), 'init -h');
+ like($out, qr! \Q$home\E/\.local/share/lei/store\b!,
+ 'actual path shown in init -h');
+ ok($lei->(qw(init -h), { XDG_DATA_HOME => '/XDH' }, $opt),
+ 'init with XDG_DATA_HOME');
+ like($out, qr! /XDH/lei/store\b!, 'XDG_DATA_HOME in init -h');
+ is($err, '', 'no errors from init -h');
+
+ ok($lei->(qw(config -h), undef, $opt), 'config-h');
+ like($out, qr! \Q$home\E/\.config/lei/config\b!,
+ 'actual path shown in config -h');
+ ok($lei->(qw(config -h), { XDG_CONFIG_HOME => '/XDC' }, $opt),
+ 'config with XDG_CONFIG_HOME');
+ like($out, qr! /XDC/lei/config\b!, 'XDG_CONFIG_HOME in config -h');
+ is($err, '', 'no errors from config -h');
};
my $ok_err_info = sub {
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 15/26] lei: rename $client => $self and bless
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
` (13 preceding siblings ...)
2020-12-18 12:09 ` [PATCH 14/26] lei: help: show actual paths being operated on Eric Wong
@ 2020-12-18 12:09 ` Eric Wong
2020-12-18 12:09 ` [PATCH 16/26] lei: micro-optimize startup time Eric Wong
` (10 subsequent siblings)
25 siblings, 0 replies; 30+ messages in thread
From: Eric Wong @ 2020-12-18 12:09 UTC (permalink / raw)
To: meta
lei will get bigger, so follow existing OO conventions to make
it easy to call methods in PublicInbox::LEI from other packages.
---
lib/PublicInbox/LEI.pm | 146 ++++++++++++++++++++---------------------
1 file changed, 73 insertions(+), 73 deletions(-)
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 667ef765..f5824c59 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -209,12 +209,12 @@ my %OPTDESC = (
); # %OPTDESC
sub x_it ($$) { # pronounced "exit"
- my ($client, $code) = @_;
+ my ($self, $code) = @_;
if (my $sig = ($code & 127)) {
- kill($sig, $client->{pid} // $$);
+ kill($sig, $self->{pid} // $$);
} else {
$code >>= 8;
- if (my $sock = $client->{sock}) {
+ if (my $sock = $self->{sock}) {
say $sock "exit=$code";
} else { # for oneshot
$quit->($code);
@@ -223,32 +223,32 @@ sub x_it ($$) { # pronounced "exit"
}
sub emit {
- my ($client, $channel) = @_; # $buf = $_[2]
- print { $client->{$channel} } $_[2] or die "print FD[$channel]: $!";
+ my ($self, $channel) = @_; # $buf = $_[2]
+ print { $self->{$channel} } $_[2] or die "print FD[$channel]: $!";
}
sub err {
- my ($client, $buf) = @_;
+ my ($self, $buf) = @_;
$buf .= "\n" unless $buf =~ /\n\z/s;
- emit($client, 2, $buf);
+ emit($self, 2, $buf);
}
sub qerr { $_[0]->{opt}->{quiet} or err(@_) }
sub fail ($$;$) {
- my ($client, $buf, $exit_code) = @_;
- err($client, $buf);
- x_it($client, ($exit_code // 1) << 8);
+ my ($self, $buf, $exit_code) = @_;
+ err($self, $buf);
+ x_it($self, ($exit_code // 1) << 8);
undef;
}
sub _help ($;$) {
- my ($client, $errmsg) = @_;
- my $cmd = $client->{cmd} // 'COMMAND';
+ my ($self, $errmsg) = @_;
+ my $cmd = $self->{cmd} // 'COMMAND';
my @info = @{$CMD{$cmd} // [ '...', '...' ]};
my @top = ($cmd, shift(@info) // ());
my $cmd_desc = shift(@info);
- $cmd_desc = $cmd_desc->($client->{env}) if ref($cmd_desc) eq 'CODE';
+ $cmd_desc = $cmd_desc->($self->{env}) if ref($cmd_desc) eq 'CODE';
my @opt_desc;
my $lpad = 2;
for my $sw (grep { !ref } @info) { # ("prio=s", "z", $GLP_PASS)
@@ -314,15 +314,15 @@ EOF
$msg .= "\n";
}
my $channel = $errmsg ? 2 : 1;
- emit($client, $channel, $msg);
- x_it($client, $errmsg ? 1 << 8 : 0); # stderr => failure
+ emit($self, $channel, $msg);
+ x_it($self, $errmsg ? 1 << 8 : 0); # stderr => failure
undef;
}
sub optparse ($$$) {
- my ($client, $cmd, $argv) = @_;
- $client->{cmd} = $cmd;
- my $opt = $client->{opt} = {};
+ my ($self, $cmd, $argv) = @_;
+ $self->{cmd} = $cmd;
+ my $opt = $self->{opt} = {};
my $info = $CMD{$cmd} // [ '[...]' ];
my ($proto, undef, @spec) = @$info;
my $glp = ref($spec[-1]) ? pop(@spec) : $GLP; # or $GLP_PASS
@@ -334,8 +334,8 @@ sub optparse ($$$) {
push @spec, '' => \$var;
}
$glp->getoptionsfromarray($argv, $opt, @spec) or
- return _help($client, "bad arguments or options for $cmd");
- return _help($client) if $opt->{help};
+ return _help($self, "bad arguments or options for $cmd");
+ return _help($self) if $opt->{help};
# "-" aliases "stdin" or "clear"
$opt->{$lone_dash} = ${$opt->{$lone_dash}} if defined $lone_dash;
@@ -380,40 +380,40 @@ sub optparse ($$$) {
if (!$inf && scalar(@$argv) > scalar(@args)) {
$err //= 'too many arguments';
}
- $err ? fail($client, "usage: lei $cmd $proto\nE: $err") : 1;
+ $err ? fail($self, "usage: lei $cmd $proto\nE: $err") : 1;
}
sub dispatch {
- my ($client, $cmd, @argv) = @_;
- local $SIG{__WARN__} = sub { err($client, "@_") };
+ my ($self, $cmd, @argv) = @_;
+ local $SIG{__WARN__} = sub { err($self, "@_") };
local $SIG{__DIE__} = 'DEFAULT';
- return _help($client, 'no command given') unless defined($cmd);
+ return _help($self, 'no command given') unless defined($cmd);
my $func = "lei_$cmd";
$func =~ tr/-/_/;
if (my $cb = __PACKAGE__->can($func)) {
- optparse($client, $cmd, \@argv) or return;
- $cb->($client, @argv);
+ optparse($self, $cmd, \@argv) or return;
+ $cb->($self, @argv);
} elsif (grep(/\A-/, $cmd, @argv)) { # --help or -h only
my $opt = {};
$GLP->getoptionsfromarray([$cmd, @argv], $opt, qw(help|h)) or
- return _help($client, 'bad arguments or options');
- _help($client);
+ return _help($self, 'bad arguments or options');
+ _help($self);
} else {
- fail($client, "`$cmd' is not an lei command");
+ fail($self, "`$cmd' is not an lei command");
}
}
sub _lei_cfg ($;$) {
- my ($client, $creat) = @_;
- my $f = _config_path($client->{env});
+ my ($self, $creat) = @_;
+ my $f = _config_path($self->{env});
my @st = stat($f);
my $cur_st = @st ? pack('dd', $st[10], $st[7]) : ''; # 10:ctime, 7:size
if (my $cfg = $PATH2CFG{$f}) { # reuse existing object in common case
- return ($client->{cfg} = $cfg) if $cur_st eq $cfg->{-st};
+ return ($self->{cfg} = $cfg) if $cur_st eq $cfg->{-st};
}
if (!@st) {
unless ($creat) {
- delete $client->{cfg};
+ delete $self->{cfg};
return;
}
my (undef, $cfg_dir, undef) = File::Spec->splitpath($f);
@@ -421,17 +421,17 @@ sub _lei_cfg ($;$) {
open my $fh, '>>', $f or die "open($f): $!\n";
@st = stat($fh) or die "fstat($f): $!\n";
$cur_st = pack('dd', $st[10], $st[7]);
- qerr($client, "I: $f created") if $client->{cmd} ne 'config';
+ qerr($self, "I: $f created") if $self->{cmd} ne 'config';
}
my $cfg = PublicInbox::Config::git_config_dump($f);
$cfg->{-st} = $cur_st;
$cfg->{'-f'} = $f;
- $client->{cfg} = $PATH2CFG{$f} = $cfg;
+ $self->{cfg} = $PATH2CFG{$f} = $cfg;
}
sub _lei_store ($;$) {
- my ($client, $creat) = @_;
- my $cfg = _lei_cfg($client, $creat);
+ my ($self, $creat) = @_;
+ my $cfg = _lei_cfg($self, $creat);
$cfg->{-lei_store} //= do {
require PublicInbox::LeiStore;
PublicInbox::SearchIdx::load_xapian_writable();
@@ -441,35 +441,35 @@ sub _lei_store ($;$) {
}
sub lei_show {
- my ($client, @argv) = @_;
+ my ($self, @argv) = @_;
}
sub lei_query {
- my ($client, @argv) = @_;
+ my ($self, @argv) = @_;
}
sub lei_mark {
- my ($client, @argv) = @_;
+ my ($self, @argv) = @_;
}
sub lei_config {
- my ($client, @argv) = @_;
- $client->{opt}->{'config-file'} and return fail $client,
+ my ($self, @argv) = @_;
+ $self->{opt}->{'config-file'} and return fail $self,
"config file switches not supported by `lei config'";
- my $env = $client->{env};
+ my $env = $self->{env};
delete local $env->{GIT_CONFIG};
- my $cfg = _lei_cfg($client, 1);
+ my $cfg = _lei_cfg($self, 1);
my $cmd = [ qw(git config -f), $cfg->{'-f'}, @argv ];
- my %rdr = map { $_ => $client->{$_} } (0..2);
+ my %rdr = map { $_ => $self->{$_} } (0..2);
require PublicInbox::Import;
PublicInbox::Import::run_die($cmd, $env, \%rdr);
}
sub lei_init {
- my ($client, $dir) = @_;
- my $cfg = _lei_cfg($client, 1);
+ my ($self, $dir) = @_;
+ my $cfg = _lei_cfg($self, 1);
my $cur = $cfg->{'leistore.dir'};
- my $env = $client->{env};
+ my $env = $self->{env};
$dir //= _store_path($env);
$dir = File::Spec->rel2abs($dir, $env->{PWD}); # PWD is symlink-aware
my @cur = stat($cur) if defined($cur);
@@ -478,24 +478,24 @@ sub lei_init {
my $exists = "I: leistore.dir=$cur already initialized" if @dir;
if (@cur) {
if ($cur eq $dir) {
- _lei_store($client, 1)->done;
- return qerr($client, $exists);
+ _lei_store($self, 1)->done;
+ return qerr($self, $exists);
}
# some folks like symlinks and bind mounts :P
if (@dir && "$cur[0] $cur[1]" eq "$dir[0] $dir[1]") {
- lei_config($client, 'leistore.dir', $dir);
- _lei_store($client, 1)->done;
- return qerr($client, "$exists (as $cur)");
+ lei_config($self, 'leistore.dir', $dir);
+ _lei_store($self, 1)->done;
+ return qerr($self, "$exists (as $cur)");
}
- return fail($client, <<"");
+ return fail($self, <<"");
E: leistore.dir=$cur already initialized and it is not $dir
}
- lei_config($client, 'leistore.dir', $dir);
- _lei_store($client, 1)->done;
+ lei_config($self, 'leistore.dir', $dir);
+ _lei_store($self, 1)->done;
$exists //= "I: leistore.dir=$dir newly initialized";
- return qerr($client, $exists);
+ return qerr($self, $exists);
}
sub lei_daemon_pid { emit($_[0], 1, "$$\n") }
@@ -503,8 +503,8 @@ sub lei_daemon_pid { emit($_[0], 1, "$$\n") }
sub lei_daemon_stop { $quit->(0) }
sub lei_daemon_env {
- my ($client, @argv) = @_;
- my $opt = $client->{opt};
+ my ($self, @argv) = @_;
+ my $opt = $self->{opt};
if (defined $opt->{clear}) {
%ENV = ();
} elsif (my $u = $opt->{unset}) {
@@ -516,29 +516,29 @@ sub lei_daemon_env {
my $eor = $opt->{z} ? "\0" : "\n";
my $buf = '';
while (my ($k, $v) = each %ENV) { $buf .= "$k=$v$eor" }
- emit($client, 1, $buf)
+ emit($self, 1, $buf)
}
}
sub lei_help { _help($_[0]) }
sub reap_exec { # dwaitpid callback
- my ($client, $pid) = @_;
- x_it($client, $?);
+ my ($self, $pid) = @_;
+ x_it($self, $?);
}
sub lei_git { # support passing through random git commands
- my ($client, @argv) = @_;
- my %rdr = map { $_ => $client->{$_} } (0..2);
- my $pid = spawn(['git', @argv], $client->{env}, \%rdr);
- PublicInbox::DS::dwaitpid($pid, \&reap_exec, $client);
+ my ($self, @argv) = @_;
+ my %rdr = map { $_ => $self->{$_} } (0..2);
+ my $pid = spawn(['git', @argv], $self->{env}, \%rdr);
+ PublicInbox::DS::dwaitpid($pid, \&reap_exec, $self);
}
sub accept_dispatch { # Listener {post_accept} callback
my ($sock) = @_; # ignore other
$sock->blocking(1);
$sock->autoflush(1);
- my $client = { sock => $sock };
+ my $self = bless { sock => $sock }, __PACKAGE__;
vec(my $rin = '', fileno($sock), 1) = 1;
# `say $sock' triggers "die" in lei(1)
for my $i (0..2) {
@@ -547,7 +547,7 @@ sub accept_dispatch { # Listener {post_accept} callback
if ($fd >= 0) {
my $rdr = ($fd == 0 ? '<&=' : '>&=');
if (open(my $fh, $rdr, $fd)) {
- $client->{$i} = $fh;
+ $self->{$i} = $fh;
} else {
say $sock "open($rdr$fd) (FD=$i): $!";
return;
@@ -571,9 +571,9 @@ sub accept_dispatch { # Listener {post_accept} callback
};
my %env = map { split(/=/, $_, 2) } split(/\0/, $env);
if (chdir($env{PWD})) {
- $client->{env} = \%env;
- $client->{pid} = $client_pid;
- eval { dispatch($client, split(/\]\0\[/, $argv)) };
+ $self->{env} = \%env;
+ $self->{pid} = $client_pid;
+ eval { dispatch($self, split(/\]\0\[/, $argv)) };
say $sock $@ if $@;
} else {
say $sock "chdir($env{PWD}): $!"; # implicit close
@@ -691,12 +691,12 @@ sub oneshot {
local $quit = $exit if $exit;
local %PATH2CFG;
umask(077) // die("umask(077): $!");
- dispatch({
+ dispatch((bless {
0 => *STDIN{IO},
1 => *STDOUT{IO},
2 => *STDERR{IO},
env => \%ENV
- }, @ARGV);
+ }, __PACKAGE__), @ARGV);
}
1;
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 16/26] lei: micro-optimize startup time
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
` (14 preceding siblings ...)
2020-12-18 12:09 ` [PATCH 15/26] lei: rename $client => $self and bless Eric Wong
@ 2020-12-18 12:09 ` Eric Wong
2020-12-18 12:09 ` [PATCH 17/26] lei_store: relax GIT_COMMITTER_IDENT check Eric Wong
` (9 subsequent siblings)
25 siblings, 0 replies; 30+ messages in thread
From: Eric Wong @ 2020-12-18 12:09 UTC (permalink / raw)
To: meta
We'll use lower-level Socket and avoid IO::Socket::UNIX,
use Cwd::fastcwd(*), avoid IO::Handle->autoflush by
using the select operator, and reuse buffer for reading
the socket while avoiding unnecessary $/ localization
in a tiny script.
All these things adds up to ~5-10 ms savings on my loaded
system.
(*) caveats about fastcwd won't apply since lei won't work
in removed directories.
---
lib/PublicInbox/LEI.pm | 13 ++++++-------
lib/PublicInbox/TestCommon.pm | 1 +
script/lei | 33 +++++++++++++++++----------------
3 files changed, 24 insertions(+), 23 deletions(-)
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index f5824c59..5399fade 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -10,9 +10,9 @@ use strict;
use v5.10.1;
use parent qw(PublicInbox::DS);
use Getopt::Long ();
+use Socket qw(AF_UNIX SOCK_STREAM pack_sockaddr_un);
use Errno qw(EAGAIN ECONNREFUSED ENOENT);
use POSIX qw(setsid);
-use IO::Socket::UNIX;
use IO::Handle ();
use Sys::Syslog qw(syslog openlog);
use PublicInbox::Config;
@@ -585,18 +585,17 @@ sub noop {}
# lei(1) calls this when it can't connect
sub lazy_start {
my ($path, $err) = @_;
+ require IO::FDPass; # require this early so caller sees it
if ($err == ECONNREFUSED) {
unlink($path) or die "unlink($path): $!";
} elsif ($err != ENOENT) {
+ $! = $err; # allow interpolation to stringify in die
die "connect($path): $!";
}
- require IO::FDPass;
umask(077) // die("umask(077): $!");
- my $l = IO::Socket::UNIX->new(Local => $path,
- Listen => 1024,
- Type => SOCK_STREAM) or
- $err = $!;
- $l or return die "bind($path): $err";
+ socket(my $l, AF_UNIX, SOCK_STREAM, 0) or die "socket: $!";
+ bind($l, pack_sockaddr_un($path)) or die "bind($path): $!";
+ listen($l, 1024) or die "listen $!";
my @st = stat($path) or die "stat($path): $!";
my $dev_ino_expect = pack('dd', $st[0], $st[1]); # dev+ino
pipe(my ($eof_r, $eof_w)) or die "pipe: $!";
diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm
index c236c589..338e760c 100644
--- a/lib/PublicInbox/TestCommon.pm
+++ b/lib/PublicInbox/TestCommon.pm
@@ -261,6 +261,7 @@ sub run_script ($;$$) {
my $orig_io = _prepare_redirects($fhref);
_run_sub($sub, $key, \@argv);
_undo_redirects($orig_io);
+ select STDOUT;
}
# slurp the redirects back into user-supplied strings
diff --git a/script/lei b/script/lei
index e59e4316..2b041fb4 100755
--- a/script/lei
+++ b/script/lei
@@ -3,8 +3,7 @@
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use v5.10.1;
-use Cwd qw(cwd);
-use IO::Socket::UNIX;
+use Socket qw(AF_UNIX SOCK_STREAM pack_sockaddr_un);
if (eval { require IO::FDPass; 1 }) { # use daemon to reduce load time
my $path = do {
@@ -13,14 +12,15 @@ if (eval { require IO::FDPass; 1 }) { # use daemon to reduce load time
require File::Spec;
$runtime_dir = File::Spec->tmpdir."/lei-$<";
}
- unless (-d $runtime_dir && -w _) {
+ unless (-d $runtime_dir) {
require File::Path;
File::Path::mkpath($runtime_dir, 0, 0700);
}
"$runtime_dir/sock";
};
- my $sock = IO::Socket::UNIX->new(Peer => $path, Type => SOCK_STREAM);
- unless ($sock) { # start the daemon if not started
+ my $addr = pack_sockaddr_un($path);
+ socket(my $sock, AF_UNIX, SOCK_STREAM, 0) or die "socket: $!";
+ unless (connect($sock, $addr)) { # start the daemon if not started
my $err = $! + 0;
my $env = { PERL5LIB => join(':', @INC) };
my $cmd = [ $^X, qw[-MPublicInbox::LEI
@@ -31,13 +31,14 @@ if (eval { require IO::FDPass; 1 }) { # use daemon to reduce load time
warn "lei-daemon exited with \$?=$?\n" if $?;
# try connecting again anyways, unlink+bind may be racy
- $sock = IO::Socket::UNIX->new(Peer => $path,
- Type => SOCK_STREAM) // die
+ connect($sock, $addr) or die
"connect($path): $! (after attempted daemon start)";
}
- my $pwd = $ENV{PWD};
- my $cwd = cwd();
- if ($pwd) { # prefer ENV{PWD} if it's a symlink to real cwd
+ require Cwd;
+ my $cwd = Cwd::fastcwd() // die "fastcwd: $!";
+ my $pwd = $ENV{PWD} // '';
+ if ($pwd eq $cwd) { # likely, all good
+ } elsif ($pwd) { # prefer ENV{PWD} if it's a symlink to real cwd
my @st_cwd = stat($cwd) or die "stat(cwd=$cwd): $!\n";
my @st_pwd = stat($pwd);
# make sure st_dev/st_ino match for {PWD} to be valid
@@ -47,16 +48,16 @@ if (eval { require IO::FDPass; 1 }) { # use daemon to reduce load time
$pwd = $cwd;
}
local $ENV{PWD} = $pwd;
- $sock->autoflush(1);
- IO::FDPass::send(fileno($sock), $_) for (0..2);
my $buf = "$$\0\0>" . join("]\0[", @ARGV) . "\0\0>";
while (my ($k, $v) = each %ENV) { $buf .= "$k=$v\0" }
$buf .= "\0\0";
+ select $sock;
+ $| = 1; # unbuffer selected $sock
+ IO::FDPass::send(fileno($sock), $_) for (0..2);
print $sock $buf or die "print(sock, buf): $!";
- local $/ = "\n";
- while (my $line = <$sock>) {
- $line =~ /\Aexit=([0-9]+)\n\z/ and exit($1 + 0);
- die $line;
+ while ($buf = <$sock>) {
+ $buf =~ /\Aexit=([0-9]+)\n\z/ and exit($1 + 0);
+ die $buf;
}
} else { # for systems lacking IO::FDPass
require PublicInbox::LEI;
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 17/26] lei_store: relax GIT_COMMITTER_IDENT check
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
` (15 preceding siblings ...)
2020-12-18 12:09 ` [PATCH 16/26] lei: micro-optimize startup time Eric Wong
@ 2020-12-18 12:09 ` Eric Wong
2020-12-18 12:09 ` [PATCH 18/26] lei_store: keyword extraction from mbox and Maildir Eric Wong
` (8 subsequent siblings)
25 siblings, 0 replies; 30+ messages in thread
From: Eric Wong @ 2020-12-18 12:09 UTC (permalink / raw)
To: meta
It's pretty meaningless, since probably nobody notices committer
info we extract author info from individual emails, anyways.
---
lib/PublicInbox/LeiStore.pm | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index d3667d29..c95df785 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -52,6 +52,14 @@ sub git_epoch_max {
}
}
+sub git_ident ($) {
+ my ($git) = @_;
+ chomp(my $i = $git->qx(qw(var GIT_COMMITTER_IDENT)));
+ warn "$git->{git_dir} GIT_COMMITTER_IDENT failed\n" if $?;
+ $i =~ /\A(.+) <([^>]+)> [0-9]+ [-\+]?[0-9]+$/ ? ($1, $2) :
+ ('lei user', 'x@example.com')
+}
+
sub importer {
my ($self) = @_;
my $max;
@@ -79,10 +87,7 @@ sub importer {
$max++;
next;
}
- chomp(my $i = $git->qx(qw(var GIT_COMMITTER_IDENT)));
- die "$git->{git_dir} GIT_COMMITTER_IDENT failed\n" if $?;
- my ($n, $e) = ($i =~ /\A(.+) <([^>]+)> [0-9]+ [-\+]?[0-9]+$/g)
- or die "could not extract name/email from `$i'\n";
+ my ($n, $e) = git_ident($git);
$self->{im} = $im = PublicInbox::Import->new($git, $n, $e);
$im->{bytes_added} = int($packed_bytes / $self->packing_factor);
$im->{lock_path} = undef;
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 18/26] lei_store: keyword extraction from mbox and Maildir
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
` (16 preceding siblings ...)
2020-12-18 12:09 ` [PATCH 17/26] lei_store: relax GIT_COMMITTER_IDENT check Eric Wong
@ 2020-12-18 12:09 ` Eric Wong
2020-12-18 12:09 ` [PATCH 19/26] on_destroy: generic localized END Eric Wong
` (7 subsequent siblings)
25 siblings, 0 replies; 30+ messages in thread
From: Eric Wong @ 2020-12-18 12:09 UTC (permalink / raw)
To: meta
Dovecot, mutt, and likely much other software support mbox
Status/X-Status headers. Ensure we have a way to extract these
headers as JMAP-compatible keywords before removing them for git
storage.
->add_eml now accepts setting keywords at import time,
and will probably be called like this:
$lst->add_eml($eml, $lst->mbox_keywords($eml));
$lst->add_eml($eml, $lst->maildir_keywords($fn));
---
lib/PublicInbox/LeiStore.pm | 23 ++++++++++++++++++++++-
t/lei_store.t | 14 ++++++++++++++
2 files changed, 36 insertions(+), 1 deletion(-)
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index c95df785..553adbc8 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -162,8 +162,27 @@ sub remove_eml_keywords {
\@docids;
}
+# cf: https://doc.dovecot.org/configuration_manual/mail_location/mbox/
+my %status2kw = (F => 'flagged', A => 'answered', R => 'seen', T => 'draft');
+# O (old/non-recent), and D (deleted) aren't in JMAP,
+# so probably won't be supported by us.
+sub mbox_keywords {
+ my $eml = $_[-1];
+ my $s = "@{[$eml->header_raw('X-Status'),$eml->header_raw('Status')]}";
+ my %kw;
+ $s =~ s/([FART])/$kw{$status2kw{$1}} = 1/sge;
+ sort(keys %kw);
+}
+
+# cf: https://cr.yp.to/proto/maildir.html
+my %c2kw = ('D' => 'draft', F => 'flagged', R => 'answered', S => 'seen');
+sub maildir_keywords {
+ $_[-1] =~ /:2,([A-Z]+)\z/i ?
+ sort(map { $c2kw{$_} // () } split(//, $1)) : ();
+}
+
sub add_eml {
- my ($self, $eml) = @_;
+ my ($self, $eml, @kw) = @_;
my $eidx = eidx_init($self);
my $oidx = $eidx->{oidx};
my $smsg = bless { -oidx => $oidx }, 'PublicInbox::Smsg';
@@ -178,6 +197,7 @@ sub add_eml {
my $idx = $eidx->idx_shard($docid);
$oidx->add_xref3($docid, -1, $smsg->{blob}, '.');
$idx->shard_add_eidx_info($docid, '.', $eml); # List-Id
+ $idx->shard_add_keywords($docid, @kw) if @kw;
}
} else {
$smsg->{num} = $oidx->adj_counter('eidx_docid', '+');
@@ -185,6 +205,7 @@ sub add_eml {
$oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.');
my $idx = $eidx->idx_shard($smsg->{num});
$idx->index_raw($msgref, $eml, $smsg);
+ $idx->shard_add_keywords($smsg->{num}, @kw) if @kw;
}
$smsg->{blob}
}
diff --git a/t/lei_store.t b/t/lei_store.t
index c18a9620..03ab5af6 100644
--- a/t/lei_store.t
+++ b/t/lei_store.t
@@ -19,6 +19,20 @@ like($oid, qr/\A[0-9a-f]+\z/, 'add returned OID');
my $eml = eml_load('t/data/0001.patch');
is($lst->add_eml($eml), undef, 'idempotent');
$lst->done;
+is_deeply([$lst->mbox_keywords($eml)], [], 'no keywords');
+$eml->header_set('Status', 'RO');
+is_deeply([$lst->mbox_keywords($eml)], ['seen'], 'seen extracted');
+$eml->header_set('X-Status', 'A');
+is_deeply([$lst->mbox_keywords($eml)], [qw(answered seen)],
+ 'seen+answered extracted');
+$eml->header_set($_) for qw(Status X-Status);
+
+is_deeply([$lst->maildir_keywords('/foo:2,')], [], 'Maildir no keywords');
+is_deeply([$lst->maildir_keywords('/foo:2,S')], ['seen'], 'Maildir seen');
+is_deeply([$lst->maildir_keywords('/foo:2,RS')], ['answered', 'seen'],
+ 'Maildir answered + seen');
+is_deeply([$lst->maildir_keywords('/foo:2,RSZ')], ['answered', 'seen'],
+ 'Maildir answered + seen w/o Z');
{
my $es = $lst->search;
my $msgs = $es->over->query_xover(0, 1000);
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 19/26] on_destroy: generic localized END
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
` (17 preceding siblings ...)
2020-12-18 12:09 ` [PATCH 18/26] lei_store: keyword extraction from mbox and Maildir Eric Wong
@ 2020-12-18 12:09 ` Eric Wong
2020-12-18 12:09 ` [PATCH 20/26] lei: restore default __DIE__ handler for event loop Eric Wong
` (6 subsequent siblings)
25 siblings, 0 replies; 30+ messages in thread
From: Eric Wong @ 2020-12-18 12:09 UTC (permalink / raw)
To: meta
This is a localized version of the process-wide END{}, but runs
at the end of variable scope. A subroutine ref and arguments
may be passed, which allows us to avoid anonymous subs and
problems they cause.
It's similar to `defer' or `ensure' in other languages; Perl can
rely on deterministic destructors due to refcounting.
---
MANIFEST | 2 ++
lib/PublicInbox/OnDestroy.pm | 16 ++++++++++++++++
t/on_destroy.t | 25 +++++++++++++++++++++++++
3 files changed, 43 insertions(+)
create mode 100644 lib/PublicInbox/OnDestroy.pm
create mode 100644 t/on_destroy.t
diff --git a/MANIFEST b/MANIFEST
index 29b47843..8e870c22 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -181,6 +181,7 @@ lib/PublicInbox/NNTP.pm
lib/PublicInbox/NNTPD.pm
lib/PublicInbox/NNTPdeflate.pm
lib/PublicInbox/NewsWWW.pm
+lib/PublicInbox/OnDestroy.pm
lib/PublicInbox/Over.pm
lib/PublicInbox/OverIdx.pm
lib/PublicInbox/ProcessPipe.pm
@@ -343,6 +344,7 @@ t/nntpd-v2.t
t/nntpd.t
t/nodatacow.t
t/nulsubject.t
+t/on_destroy.t
t/over.t
t/plack-2-txt-bodies.eml
t/plack-attached-patch.eml
diff --git a/lib/PublicInbox/OnDestroy.pm b/lib/PublicInbox/OnDestroy.pm
new file mode 100644
index 00000000..841f87d4
--- /dev/null
+++ b/lib/PublicInbox/OnDestroy.pm
@@ -0,0 +1,16 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+package PublicInbox::OnDestroy;
+
+sub new {
+ shift; # ($class, $cb, @args)
+ bless [ @_ ], __PACKAGE__;
+}
+
+sub DESTROY {
+ my ($cb, @args) = @{$_[0]};
+ $cb->(@args) if $cb;
+}
+
+1;
diff --git a/t/on_destroy.t b/t/on_destroy.t
new file mode 100644
index 00000000..8b85b48e
--- /dev/null
+++ b/t/on_destroy.t
@@ -0,0 +1,25 @@
+#!perl -w
+use strict;
+use v5.10.1;
+use Test::More;
+require_ok 'PublicInbox::OnDestroy';
+my @x;
+my $od = PublicInbox::OnDestroy->new(sub { push @x, 'hi' });
+is_deeply(\@x, [], 'not called, yet');
+undef $od;
+is_deeply(\@x, [ 'hi' ], 'no args works');
+$od = PublicInbox::OnDestroy->new(sub { $x[0] = $_[0] }, 'bye');
+is_deeply(\@x, [ 'hi' ], 'nothing changed while alive');
+undef $od;
+is_deeply(\@x, [ 'bye' ], 'arg passed');
+$od = PublicInbox::OnDestroy->new(sub { @x = @_ }, qw(x y));
+undef $od;
+is_deeply(\@x, [ 'x', 'y' ], '2 args passed');
+
+if (my $nr = $ENV{TEST_LEAK_NR}) {
+ for (0..$nr) {
+ $od = PublicInbox::OnDestroy->new(sub { @x = @_ }, qw(x y));
+ }
+}
+
+done_testing;
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 20/26] lei: restore default __DIE__ handler for event loop
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
` (18 preceding siblings ...)
2020-12-18 12:09 ` [PATCH 19/26] on_destroy: generic localized END Eric Wong
@ 2020-12-18 12:09 ` Eric Wong
2020-12-18 12:09 ` [PATCH 21/26] lei: drop $SIG{__DIE__}, add oneshot fallbacks Eric Wong
` (5 subsequent siblings)
25 siblings, 0 replies; 30+ messages in thread
From: Eric Wong @ 2020-12-18 12:09 UTC (permalink / raw)
To: meta
The kqueue code paths will trigger exceptions which are caught
by eval{}, so we can't be calling exit() from the __DIE__
handler and expect eval to catch it.
We only need the __DIE__ handler to deal with fork or open
failures at startup (since stderr is pointed to /dev/null).
After that we can rely on OnDestroy writing errors to syslog
when it goes out of scope.
---
lib/PublicInbox/LEI.pm | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 5399fade..95b48095 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -20,6 +20,7 @@ use PublicInbox::Syscall qw($SFD_NONBLOCK EPOLLIN EPOLLONESHOT);
use PublicInbox::Sigfd;
use PublicInbox::DS qw(now);
use PublicInbox::Spawn qw(spawn);
+use PublicInbox::OnDestroy;
use Text::Wrap qw(wrap);
use File::Path qw(mkpath);
use File::Spec;
@@ -386,7 +387,6 @@ sub optparse ($$$) {
sub dispatch {
my ($self, $cmd, @argv) = @_;
local $SIG{__WARN__} = sub { err($self, "@_") };
- local $SIG{__DIE__} = 'DEFAULT';
return _help($self, 'no command given') unless defined($cmd);
my $func = "lei_$cmd";
$func =~ tr/-/_/;
@@ -602,12 +602,12 @@ sub lazy_start {
my $oldset = PublicInbox::Sigfd::block_signals();
my $pid = fork // die "fork: $!";
return if $pid;
+ require PublicInbox::Listener;
+ require PublicInbox::EOFpipe;
openlog($path, 'pid', 'user');
local $SIG{__DIE__} = sub {
syslog('crit', "@_");
- exit $! if $!;
- exit $? >> 8 if $? >> 8;
- exit 255;
+ die; # calls the default __DIE__ handler
};
local $SIG{__WARN__} = sub { syslog('warning', "@_") };
open(STDIN, '+<', '/dev/null') or die "redirect stdin failed: $!\n";
@@ -616,10 +616,13 @@ sub lazy_start {
setsid();
$pid = fork // die "fork: $!";
return if $pid;
+ $SIG{__DIE__} = 'DEFAULT';
+ my $on_destroy = PublicInbox::OnDestroy->new(sub {
+ my ($owner_pid) = @_;
+ syslog('crit', "$@") if $@ && $$ == $owner_pid;
+ }, $$);
$0 = "lei-daemon $path";
local %PATH2CFG;
- require PublicInbox::Listener;
- require PublicInbox::EOFpipe;
$l->blocking(0);
$eof_w->blocking(0);
$eof_r->blocking(0);
@@ -680,6 +683,7 @@ sub lazy_start {
$n; # true: continue, false: stop
});
PublicInbox::DS->EventLoop;
+ $@ = undef if $on_destroy; # quiet OnDestroy if we got here
exit($exit_code // 0);
}
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 21/26] lei: drop $SIG{__DIE__}, add oneshot fallbacks
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
` (19 preceding siblings ...)
2020-12-18 12:09 ` [PATCH 20/26] lei: restore default __DIE__ handler for event loop Eric Wong
@ 2020-12-18 12:09 ` Eric Wong
2020-12-18 12:09 ` [PATCH 22/26] lei: start working on bash completion Eric Wong
` (4 subsequent siblings)
25 siblings, 0 replies; 30+ messages in thread
From: Eric Wong @ 2020-12-18 12:09 UTC (permalink / raw)
To: meta
We'll force stdout+stderr to be a pipe the spawning client
controls, thus there's no need to lose error reporting by
prematurely redirecting stdout+stderr to /dev/null.
We can now rely exclusively on OnDestroy to write to syslog() on
uncaught die failures.
Also support falling back to oneshot mode on socket and cwd
failures, since some commands may still be useful if the current
working directory goes missing :P
---
lib/PublicInbox/LEI.pm | 67 ++++++++++++++++++++----------------------
script/lei | 39 +++++++++++++++---------
t/lei.t | 31 +++++++++++++++++--
3 files changed, 86 insertions(+), 51 deletions(-)
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 95b48095..fd412324 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -12,7 +12,7 @@ use parent qw(PublicInbox::DS);
use Getopt::Long ();
use Socket qw(AF_UNIX SOCK_STREAM pack_sockaddr_un);
use Errno qw(EAGAIN ECONNREFUSED ENOENT);
-use POSIX qw(setsid);
+use POSIX ();
use IO::Handle ();
use Sys::Syslog qw(syslog openlog);
use PublicInbox::Config;
@@ -584,60 +584,44 @@ sub noop {}
# lei(1) calls this when it can't connect
sub lazy_start {
- my ($path, $err) = @_;
- require IO::FDPass; # require this early so caller sees it
- if ($err == ECONNREFUSED) {
+ my ($path, $errno) = @_;
+ if ($errno == ECONNREFUSED) {
unlink($path) or die "unlink($path): $!";
- } elsif ($err != ENOENT) {
- $! = $err; # allow interpolation to stringify in die
+ } elsif ($errno != ENOENT) {
+ $! = $errno; # allow interpolation to stringify in die
die "connect($path): $!";
}
umask(077) // die("umask(077): $!");
socket(my $l, AF_UNIX, SOCK_STREAM, 0) or die "socket: $!";
bind($l, pack_sockaddr_un($path)) or die "bind($path): $!";
- listen($l, 1024) or die "listen $!";
+ listen($l, 1024) or die "listen: $!";
my @st = stat($path) or die "stat($path): $!";
my $dev_ino_expect = pack('dd', $st[0], $st[1]); # dev+ino
pipe(my ($eof_r, $eof_w)) or die "pipe: $!";
my $oldset = PublicInbox::Sigfd::block_signals();
- my $pid = fork // die "fork: $!";
- return if $pid;
+ require IO::FDPass;
require PublicInbox::Listener;
require PublicInbox::EOFpipe;
- openlog($path, 'pid', 'user');
- local $SIG{__DIE__} = sub {
- syslog('crit', "@_");
- die; # calls the default __DIE__ handler
- };
- local $SIG{__WARN__} = sub { syslog('warning', "@_") };
- open(STDIN, '+<', '/dev/null') or die "redirect stdin failed: $!\n";
- open STDOUT, '>&STDIN' or die "redirect stdout failed: $!\n";
- open STDERR, '>&STDIN' or die "redirect stderr failed: $!\n";
- setsid();
- $pid = fork // die "fork: $!";
+ (-p STDOUT && -p STDERR) or die "E: stdout+stderr must be pipes\n";
+ open(STDIN, '+<', '/dev/null') or die "redirect stdin failed: $!";
+ POSIX::setsid() > 0 or die "setsid: $!";
+ my $pid = fork // die "fork: $!";
return if $pid;
- $SIG{__DIE__} = 'DEFAULT';
- my $on_destroy = PublicInbox::OnDestroy->new(sub {
- my ($owner_pid) = @_;
- syslog('crit', "$@") if $@ && $$ == $owner_pid;
- }, $$);
$0 = "lei-daemon $path";
local %PATH2CFG;
- $l->blocking(0);
- $eof_w->blocking(0);
- $eof_r->blocking(0);
- my $listener = PublicInbox::Listener->new($l, \&accept_dispatch, $l);
+ $_->blocking(0) for ($l, $eof_r, $eof_w);
+ $l = PublicInbox::Listener->new($l, \&accept_dispatch, $l);
my $exit_code;
local $quit = sub {
$exit_code //= shift;
- my $tmp = $listener or exit($exit_code);
+ my $listener = $l or exit($exit_code);
unlink($path) if defined($path);
- syswrite($eof_w, '.');
- $l = $listener = $path = undef;
- $tmp->close if $tmp; # DS::close
+ # closing eof_w triggers \&noop wakeup
+ $eof_w = $l = $path = undef;
+ $listener->close; # DS::close
PublicInbox::DS->SetLoopTimeout(1000);
};
- PublicInbox::EOFpipe->new($eof_r, sub {}, undef);
+ PublicInbox::EOFpipe->new($eof_r, \&noop, undef);
my $sig = {
CHLD => \&PublicInbox::DS::enqueue_reap,
QUIT => $quit,
@@ -682,8 +666,21 @@ sub lazy_start {
}
$n; # true: continue, false: stop
});
+
+ # STDIN was redirected to /dev/null above, closing STDOUT and
+ # STDERR will cause the calling `lei' client process to finish
+ # reading <$daemon> pipe.
+ open STDOUT, '>&STDIN' or die "redirect stdout failed: $!";
+ openlog($path, 'pid', 'user');
+ local $SIG{__WARN__} = sub { syslog('warning', "@_") };
+ my $owner_pid = $$;
+ my $on_destroy = PublicInbox::OnDestroy->new(sub {
+ syslog('crit', "$@") if $@ && $$ == $owner_pid;
+ });
+ open STDERR, '>&STDIN' or die "redirect stderr failed: $!";
+ # $daemon pipe to `lei' closed, main loop begins:
PublicInbox::DS->EventLoop;
- $@ = undef if $on_destroy; # quiet OnDestroy if we got here
+ @$on_destroy = (); # cancel on_destroy if we get here
exit($exit_code // 0);
}
diff --git a/script/lei b/script/lei
index 2b041fb4..ceaf1e00 100755
--- a/script/lei
+++ b/script/lei
@@ -4,8 +4,8 @@
use strict;
use v5.10.1;
use Socket qw(AF_UNIX SOCK_STREAM pack_sockaddr_un);
-
-if (eval { require IO::FDPass; 1 }) { # use daemon to reduce load time
+if (my ($sock, $pwd) = eval {
+ require IO::FDPass; # will try to use a daemon to reduce load time
my $path = do {
my $runtime_dir = ($ENV{XDG_RUNTIME_DIR} // '') . '/lei';
if ($runtime_dir eq '/lei') {
@@ -21,32 +21,41 @@ if (eval { require IO::FDPass; 1 }) { # use daemon to reduce load time
my $addr = pack_sockaddr_un($path);
socket(my $sock, AF_UNIX, SOCK_STREAM, 0) or die "socket: $!";
unless (connect($sock, $addr)) { # start the daemon if not started
- my $err = $! + 0;
- my $env = { PERL5LIB => join(':', @INC) };
my $cmd = [ $^X, qw[-MPublicInbox::LEI
-E PublicInbox::LEI::lazy_start(@ARGV)],
- $path, $err ];
+ $path, $! + 0 ];
+ my $env = { PERL5LIB => join(':', @INC) };
+ pipe(my ($daemon, $w)) or die "pipe: $!";
+ my $opt = { 1 => $w, 2 => $w };
require PublicInbox::Spawn;
- waitpid(PublicInbox::Spawn::spawn($cmd, $env), 0);
- warn "lei-daemon exited with \$?=$?\n" if $?;
+ my $pid = PublicInbox::Spawn::spawn($cmd, $env, $opt);
+ $opt = $w = undef;
+ while (<$daemon>) { warn $_ } # EOF when STDERR is redirected
+ waitpid($pid, 0) or warn <<"";
+lei-daemon could not start, PID:$pid exited with \$?=$?
# try connecting again anyways, unlink+bind may be racy
- connect($sock, $addr) or die
- "connect($path): $! (after attempted daemon start)";
+ unless (connect($sock, $addr)) {
+ die <<"";
+connect($path): $! (after attempted daemon start)
+Falling back to (slow) one-shot mode
+
+ }
}
require Cwd;
- my $cwd = Cwd::fastcwd() // die "fastcwd: $!";
+ my $cwd = Cwd::fastcwd() // die "fastcwd(PWD=".($ENV{PWD}//'').": $!";
my $pwd = $ENV{PWD} // '';
- if ($pwd eq $cwd) { # likely, all good
- } elsif ($pwd) { # prefer ENV{PWD} if it's a symlink to real cwd
- my @st_cwd = stat($cwd) or die "stat(cwd=$cwd): $!\n";
- my @st_pwd = stat($pwd);
+ if ($pwd ne $cwd) { # prefer ENV{PWD} if it's a symlink to real cwd
+ my @st_cwd = stat($cwd) or die "stat(cwd=$cwd): $!";
+ my @st_pwd = stat($pwd); # PWD invalid, use cwd
# make sure st_dev/st_ino match for {PWD} to be valid
$pwd = $cwd if (!@st_pwd || $st_pwd[1] != $st_cwd[1] ||
$st_pwd[0] != $st_cwd[0]);
} else {
$pwd = $cwd;
}
+ ($sock, $pwd);
+}) { # IO::FDPass, $sock, $pwd are all available:
local $ENV{PWD} = $pwd;
my $buf = "$$\0\0>" . join("]\0[", @ARGV) . "\0\0>";
while (my ($k, $v) = each %ENV) { $buf .= "$k=$v\0" }
@@ -60,6 +69,8 @@ if (eval { require IO::FDPass; 1 }) { # use daemon to reduce load time
die $buf;
}
} else { # for systems lacking IO::FDPass
+ # don't warn about IO::FDPass since it's not commonly installed
+ warn $@ if $@ && index($@, 'IO::FDPass') < 0;
require PublicInbox::LEI;
PublicInbox::LEI::oneshot(__PACKAGE__);
}
diff --git a/t/lei.t b/t/lei.t
index bdf6cc1c..cce90fff 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -127,7 +127,7 @@ my $test_lei_common = sub {
my $test_lei_oneshot = $ENV{TEST_LEI_ONESHOT};
SKIP: {
last SKIP if $test_lei_oneshot;
- require_mods('IO::FDPass', 16);
+ require_mods(qw(IO::FDPass Cwd), 41);
my $sock = "$ENV{XDG_RUNTIME_DIR}/lei/sock";
ok(run_script([qw(lei daemon-pid)], undef, $opt), 'daemon-pid');
@@ -188,7 +188,34 @@ SKIP: {
chomp(my $new_pid = $out);
ok(kill(0, $new_pid), 'new pid is running');
ok(-S $sock, 'sock exists again');
- unlink $sock or BAIL_OUT "unlink $!";
+
+ if ('socket inaccessible') {
+ chmod 0000, $sock or BAIL_OUT "chmod 0000: $!";
+ $out = $err = '';
+ ok(run_script([qw(lei help)], undef, $opt),
+ 'connect fail, one-shot fallback works');
+ like($err, qr/\bconnect\(/, 'connect error noted');
+ like($out, qr/^usage: /, 'help output works');
+ chmod 0700, $sock or BAIL_OUT "chmod 0700: $!";
+ }
+ if ('oneshot on cwd gone') {
+ my $cwd = Cwd::fastcwd() or BAIL_OUT "fastcwd: $!";
+ my $d = "$home/to-be-removed";
+ mkdir $d or BAIL_OUT "mkdir($d) $!";
+ chdir $d or BAIL_OUT "chdir($d) $!";
+ if (rmdir($d)) {
+ $out = $err = '';
+ ok(run_script([qw(lei help)], undef, $opt),
+ 'cwd fail, one-shot fallback works');
+ } else {
+ $err = "rmdir=$!";
+ }
+ chdir $cwd or BAIL_OUT "chdir($cwd) $!";
+ like($err, qr/cwd\(/, 'cwd error noted');
+ like($out, qr/^usage: /, 'help output still works');
+ }
+
+ unlink $sock or BAIL_OUT "unlink($sock) $!";
for (0..100) {
kill('CHLD', $new_pid) or last;
tick();
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 22/26] lei: start working on bash completion
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
` (20 preceding siblings ...)
2020-12-18 12:09 ` [PATCH 21/26] lei: drop $SIG{__DIE__}, add oneshot fallbacks Eric Wong
@ 2020-12-18 12:09 ` Eric Wong
2020-12-18 12:09 ` [PATCH 23/26] build: add lei.sh + "make symlink-install" target Eric Wong
` (3 subsequent siblings)
25 siblings, 0 replies; 30+ messages in thread
From: Eric Wong @ 2020-12-18 12:09 UTC (permalink / raw)
To: meta
Much work still needs to be done, but that goes for this
entire project :P
---
MANIFEST | 1 +
contrib/completion/lei-completion.bash | 11 +++++
lib/PublicInbox/LEI.pm | 61 +++++++++++++++++++++++++-
3 files changed, 72 insertions(+), 1 deletion(-)
create mode 100644 contrib/completion/lei-completion.bash
diff --git a/MANIFEST b/MANIFEST
index 8e870c22..1834e7bb 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -62,6 +62,7 @@ ci/README
ci/deps.perl
ci/profiles.sh
ci/run.sh
+contrib/completion/lei-completion.bash
contrib/css/216dark.css
contrib/css/216light.css
contrib/css/README
diff --git a/contrib/completion/lei-completion.bash b/contrib/completion/lei-completion.bash
new file mode 100644
index 00000000..67cdd3ed
--- /dev/null
+++ b/contrib/completion/lei-completion.bash
@@ -0,0 +1,11 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# preliminary bash completion support for lei (Local Email Interface)
+# Needs a lot of work, see `lei__complete' in lib/PublicInbox::LEI.pm
+_lei() {
+ COMPREPLY=($(compgen -W "$(lei _complete ${COMP_WORDS[@]})" \
+ -- "${COMP_WORDS[COMP_CWORD]}"))
+ return 0
+}
+complete -o filenames -o bashdefault -F _lei lei
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index fd412324..7004e9d7 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -132,7 +132,11 @@ our %CMD = ( # sorted in order of importance/use:
'reorder-local-store-and-break-history' => [ '[REFNAME]',
'rewrite git history in an attempt to improve compression',
- 'gc!' ]
+ 'gc!' ],
+
+# internal commands are prefixed with '_'
+'_complete' => [ '[...]', 'internal shell completion helper',
+ pass_through('everything') ],
); # @CMD
# switch descriptions, try to keep consistent across commands
@@ -209,6 +213,10 @@ my %OPTDESC = (
'unset matching NAME, may be specified multiple times'],
); # %OPTDESC
+my %CONFIG_KEYS = (
+ 'leistore.dir' => 'top-level storage location',
+);
+
sub x_it ($$) { # pronounced "exit"
my ($self, $code) = @_;
if (my $sig = ($code & 127)) {
@@ -223,6 +231,8 @@ sub x_it ($$) { # pronounced "exit"
}
}
+sub puts ($;@) { print { shift->{1} } map { "$_\n" } @_ }
+
sub emit {
my ($self, $channel) = @_; # $buf = $_[2]
print { $self->{$channel} } $_[2] or die "print FD[$channel]: $!";
@@ -522,6 +532,55 @@ sub lei_daemon_env {
sub lei_help { _help($_[0]) }
+# Shell completion helper. Used by lei-completion.bash and hopefully
+# other shells. Try to do as much here as possible to avoid redundancy
+# and improve maintainability.
+sub lei__complete {
+ my ($self, @argv) = @_; # argv = qw(lei and any other args...)
+ shift @argv; # ignore "lei", the entire command is sent
+ @argv or return puts $self, grep(!/^_/, keys %CMD);
+ my $cmd = shift @argv;
+ my $info = $CMD{$cmd} // do { # filter matching commands
+ @argv or puts $self, grep(/\A\Q$cmd\E/, keys %CMD);
+ return;
+ };
+ my ($proto, undef, @spec) = @$info;
+ my $cur = pop @argv;
+ my $re = defined($cur) ? qr/\A\Q$cur\E/ : qr/./;
+ if (substr($cur // '-', 0, 1) eq '-') { # --switches
+ # gross special case since the only git-config options
+ # Consider moving to a table if we need more special cases
+ # we use Getopt::Long for are the ones we reject, so these
+ # are the ones we don't reject:
+ if ($cmd eq 'config') {
+ puts $self, grep(/$re/, keys %CONFIG_KEYS);
+ @spec = qw(add z|null get get-all unset unset-all
+ replace-all get-urlmatch
+ remove-section rename-section
+ name-only list|l edit|e
+ get-color-name get-colorbool);
+ # fall-through
+ }
+ # TODO: arg support
+ puts $self, grep(/$re/, map { # generate short/long names
+ my $eq = '';
+ if (s/=.+\z//) { # required arg, e.g. output|o=i
+ $eq = '=';
+ } elsif (s/:.+\z//) { # optional arg, e.g. mid:s
+ } else { # negation: solve! => no-solve|solve
+ s/\A(.+)!\z/no-$1|$1/;
+ }
+ map {
+ length > 1 ? "--$_$eq" : "-$_"
+ } split(/\|/, $_, -1) # help|h
+ } grep { !ref } @spec); # filter out $GLP_PASS ref
+ } elsif ($cmd eq 'config' && !@argv && !$CONFIG_KEYS{$cur}) {
+ puts $self, grep(/$re/, keys %CONFIG_KEYS);
+ }
+ # TODO: URLs, pathnames, OIDs, MIDs, etc... See optparse() for
+ # proto parsing.
+}
+
sub reap_exec { # dwaitpid callback
my ($self, $pid) = @_;
x_it($self, $?);
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 23/26] build: add lei.sh + "make symlink-install" target
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
` (21 preceding siblings ...)
2020-12-18 12:09 ` [PATCH 22/26] lei: start working on bash completion Eric Wong
@ 2020-12-18 12:09 ` Eric Wong
2020-12-18 12:09 ` [PATCH 24/26] lei: support for -$DIGIT and -$SIG CLI switches Eric Wong
` (2 subsequent siblings)
25 siblings, 0 replies; 30+ messages in thread
From: Eric Wong @ 2020-12-18 12:09 UTC (permalink / raw)
To: meta
This could've been done ages ago, but I rarely invoked
public-inbox-* commands from an interactive terminal
like I would with lei.
---
MANIFEST | 1 +
Makefile.PL | 11 +++++++++++
lei.sh | 7 +++++++
3 files changed, 19 insertions(+)
create mode 100755 lei.sh
diff --git a/MANIFEST b/MANIFEST
index 1834e7bb..e2d4ef72 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -102,6 +102,7 @@ examples/unsubscribe-psgi@.service
examples/unsubscribe.milter
examples/unsubscribe.psgi
examples/varnish-4.vcl
+lei.sh
lib/PublicInbox/Address.pm
lib/PublicInbox/AddressPP.pm
lib/PublicInbox/Admin.pm
diff --git a/Makefile.PL b/Makefile.PL
index 8e710df2..a1a9161f 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -223,5 +223,16 @@ Makefile.PL : MANIFEST
touch -r MANIFEST \$@
\$(PERLRUN) \$@
+# Install symlinks to ~/bin (which is hopefuly in PATH) which point to
+# this source tree.
+# prefix + bindir matches git.git Makefile:
+prefix = \$(HOME)
+bindir = \$(prefix)/bin
+symlink-install :
+ mkdir -p \$(bindir)
+ lei=\$\$(realpath lei.sh) && cd \$(bindir) && \\
+ for x in \$(EXE_FILES); do \\
+ ln -sf "\$\$lei" \$\$(basename "\$\$x"); \\
+ done
EOF
}
diff --git a/lei.sh b/lei.sh
new file mode 100755
index 00000000..f1510a73
--- /dev/null
+++ b/lei.sh
@@ -0,0 +1,7 @@
+#!/bin/sh -e
+# symlink this file to a directory in PATH to run lei (or anything in script/*)
+# without needing perms to install globally. Used by "make symlink-install"
+p=$(realpath "$0" || readlink "$0") # neither is POSIX, but common
+p=$(dirname "$p") c=$(basename "$0") # both are POSIX
+exec ${PERL-perl} -w -I"$p"/lib "$p"/script/"${c%.sh}" "$@"
+: this script is too short to copyright
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 24/26] lei: support for -$DIGIT and -$SIG CLI switches
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
` (22 preceding siblings ...)
2020-12-18 12:09 ` [PATCH 23/26] build: add lei.sh + "make symlink-install" target Eric Wong
@ 2020-12-18 12:09 ` Eric Wong
2020-12-18 12:09 ` [PATCH 25/26] lei: revise output routines Eric Wong
2020-12-18 12:09 ` [PATCH 26/26] lei: extinbox: start implementing in config file Eric Wong
25 siblings, 0 replies; 30+ messages in thread
From: Eric Wong @ 2020-12-18 12:09 UTC (permalink / raw)
To: meta
I'm a bit spoiled by using single-dash digit options
from common tools: ("git log -$DIGIT", "kill -9",
"tail -1", ...), so we'll support it for limiting
query results.
But first, make it easier to send arbitrary signals to
the daemon via "daemon-kill". "daemon-stop" is redundant,
now, and removed, since the default for "daemon-kill" is
SIGTERM to match kill(1) behavior.
---
lib/PublicInbox/LEI.pm | 55 +++++++++++++++++++++++++++++-------------
t/lei.t | 18 +++++++++++---
2 files changed, 52 insertions(+), 21 deletions(-)
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 7004e9d7..c28c9b59 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -36,6 +36,21 @@ our %PATH2CFG; # persistent for socket daemon
# (may) pass options through to another command:
sub pass_through { $GLP_PASS }
+my $OPT;
+sub opt_dash {
+ my ($spec, $re_str) = @_; # 'limit|n=i', '([0-9]+)'
+ my ($key) = ($spec =~ m/\A([a-z]+)/g);
+ my $cb = sub { # Getopt::Long "<>" catch-all handler
+ my ($arg) = @_;
+ if ($arg =~ /\A-($re_str)\z/) {
+ $OPT->{$key} = $1;
+ } else {
+ die "bad argument for --$key: $arg\n";
+ }
+ };
+ ($spec, '<>' => $cb, $GLP_PASS)
+}
+
sub _store_path ($) {
my ($env) = @_;
File::Spec->rel2abs(($env->{XDG_DATA_HOME} //
@@ -55,8 +70,8 @@ sub _config_path ($) {
our %CMD = ( # sorted in order of importance/use:
'query' => [ 'SEARCH_TERMS...', 'search for messages matching terms', qw(
save-as=s output|o=s format|f=s dedupe|d=s thread|t augment|a
- limit|n=i sort|s=s@ reverse|r offset=i remote local! extinbox!
- since|after=s until|before=s) ],
+ sort|s=s@ reverse|r offset=i remote local! extinbox!
+ since|after=s until|before=s), opt_dash('limit|n=i', '[0-9]+') ],
'show' => [ 'MID|OID', 'show a given object (Message-ID or object ID)',
qw(type=s solve! format|f=s dedupe|d=s thread|t remote local!),
@@ -111,7 +126,7 @@ our %CMD = ( # sorted in order of importance/use:
'import' => [ '{URL_OR_PATHNAME|--stdin}',
'one-shot import/update from URL or filesystem',
- qw(stdin| limit|n=i offset=i recursive|r exclude=s include=s !flags),
+ qw(stdin| offset=i recursive|r exclude=s include=s !flags),
],
'config' => [ '[...]', sub {
@@ -121,7 +136,8 @@ our %CMD = ( # sorted in order of importance/use:
'init' => [ '[PATHNAME]', sub {
'initialize storage, default: '._store_path($_[0]);
}, qw(quiet|q) ],
-'daemon-stop' => [ '', 'stop the lei-daemon' ],
+'daemon-kill' => [ '[-SIGNAL]', 'signal the lei-daemon',
+ opt_dash('signal|s=s', '[0-9]+|(?:[A-Z][A-Z0-9]+)') ],
'daemon-pid' => [ '', 'show the PID of the lei-daemon' ],
'daemon-env' => [ '[NAME=VALUE...]', 'set, unset, or show daemon environment',
qw(clear| unset|u=s@ z|0) ],
@@ -175,8 +191,7 @@ my %OPTDESC = (
'ls-query format|f=s' => $ls_format,
'ls-extinbox format|f=s' => $ls_format,
-'limit|n=i' => ['NUM',
- 'limit on number of matches (default: 10000)' ],
+'limit|n=i@' => ['NUM', 'limit on number of matches (default: 10000)' ],
'offset=i' => ['OFF', 'search result offset (default: 0)'],
'sort|s=s@' => [ 'VAL|internaldate,date,relevance,docid',
@@ -211,6 +226,8 @@ my %OPTDESC = (
'clear|' => 'clear the daemon environment',
'unset|u=s@' => ['NAME',
'unset matching NAME, may be specified multiple times'],
+
+'signal|s=s' => [ 'SIG', 'signal to send lei-daemon (default: TERM)' ],
); # %OPTDESC
my %CONFIG_KEYS = (
@@ -333,23 +350,23 @@ EOF
sub optparse ($$$) {
my ($self, $cmd, $argv) = @_;
$self->{cmd} = $cmd;
- my $opt = $self->{opt} = {};
+ $OPT = $self->{opt} = {};
my $info = $CMD{$cmd} // [ '[...]' ];
my ($proto, undef, @spec) = @$info;
- my $glp = ref($spec[-1]) ? pop(@spec) : $GLP; # or $GLP_PASS
+ my $glp = ref($spec[-1]) eq ref($GLP) ? pop(@spec) : $GLP;
push @spec, qw(help|h);
my $lone_dash;
if ($spec[0] =~ s/\|\z//s) { # "stdin|" or "clear|" allows "-" alias
$lone_dash = $spec[0];
- $opt->{$spec[0]} = \(my $var);
+ $OPT->{$spec[0]} = \(my $var);
push @spec, '' => \$var;
}
- $glp->getoptionsfromarray($argv, $opt, @spec) or
+ $glp->getoptionsfromarray($argv, $OPT, @spec) or
return _help($self, "bad arguments or options for $cmd");
- return _help($self) if $opt->{help};
+ return _help($self) if $OPT->{help};
# "-" aliases "stdin" or "clear"
- $opt->{$lone_dash} = ${$opt->{$lone_dash}} if defined $lone_dash;
+ $OPT->{$lone_dash} = ${$OPT->{$lone_dash}} if defined $lone_dash;
my $i = 0;
my $POS_ARG = '[A-Z][A-Z0-9_]+';
@@ -365,14 +382,14 @@ sub optparse ($$$) {
} elsif ($var =~ /\.\.\.\]\z/) { # optional args start
$inf = 1;
last;
- } elsif ($var =~ /\A\[$POS_ARG\]\z/) { # one optional arg
+ } elsif ($var =~ /\A\[-?$POS_ARG\]\z/) { # one optional arg
$i++;
} elsif ($var =~ /\A.+?\|/) { # required FOO|--stdin
my @or = split(/\|/, $var);
my $ok;
for my $o (@or) {
if ($o =~ /\A--([a-z0-9\-]+)/) {
- $ok = defined($opt->{$1});
+ $ok = defined($OPT->{$1});
last;
} elsif (defined($argv->[$i])) {
$ok = 1;
@@ -510,7 +527,11 @@ E: leistore.dir=$cur already initialized and it is not $dir
sub lei_daemon_pid { emit($_[0], 1, "$$\n") }
-sub lei_daemon_stop { $quit->(0) }
+sub lei_daemon_kill {
+ my ($self) = @_;
+ my $sig = $self->{opt}->{signal} // 'TERM';
+ kill($sig, $$) or fail($self, "kill($sig, $$): $!");
+}
sub lei_daemon_env {
my ($self, @argv) = @_;
@@ -538,7 +559,7 @@ sub lei_help { _help($_[0]) }
sub lei__complete {
my ($self, @argv) = @_; # argv = qw(lei and any other args...)
shift @argv; # ignore "lei", the entire command is sent
- @argv or return puts $self, grep(!/^_/, keys %CMD);
+ @argv or return puts $self, grep(!/^_/, keys %CMD), qw(--help -h);
my $cmd = shift @argv;
my $info = $CMD{$cmd} // do { # filter matching commands
@argv or puts $self, grep(/\A\Q$cmd\E/, keys %CMD);
@@ -573,7 +594,7 @@ sub lei__complete {
map {
length > 1 ? "--$_$eq" : "-$_"
} split(/\|/, $_, -1) # help|h
- } grep { !ref } @spec); # filter out $GLP_PASS ref
+ } grep { $OPTDESC{"$cmd\t$_"} || $OPTDESC{$_} } @spec);
} elsif ($cmd eq 'config' && !@argv && !$CONFIG_KEYS{$cur}) {
puts $self, grep(/$re/, keys %CONFIG_KEYS);
}
diff --git a/t/lei.t b/t/lei.t
index cce90fff..30f9d2b6 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -127,7 +127,7 @@ my $test_lei_common = sub {
my $test_lei_oneshot = $ENV{TEST_LEI_ONESHOT};
SKIP: {
last SKIP if $test_lei_oneshot;
- require_mods(qw(IO::FDPass Cwd), 41);
+ require_mods(qw(IO::FDPass Cwd), 46);
my $sock = "$ENV{XDG_RUNTIME_DIR}/lei/sock";
ok(run_script([qw(lei daemon-pid)], undef, $opt), 'daemon-pid');
@@ -174,9 +174,9 @@ SKIP: {
ok(run_script([qw(lei daemon-env)], undef, $opt), 'env is empty');
is($out, '', 'env cleared');
- ok(run_script([qw(lei daemon-stop)], undef, $opt), 'daemon-stop');
- is($out, '', 'no output from daemon-stop');
- is($err, '', 'no error from daemon-stop');
+ ok(run_script([qw(lei daemon-kill)], undef, $opt), 'daemon-kill');
+ is($out, '', 'no output from daemon-kill');
+ is($err, '', 'no error from daemon-kill');
for (0..100) {
kill(0, $pid) or last;
tick();
@@ -189,6 +189,16 @@ SKIP: {
ok(kill(0, $new_pid), 'new pid is running');
ok(-S $sock, 'sock exists again');
+ $out = $err = '';
+ for my $sig (qw(-0 -CHLD)) {
+ ok(run_script([qw(lei daemon-kill), $sig ], undef, $opt),
+ "handles $sig");
+ }
+ is($out.$err, '', 'no output on innocuous signals');
+ ok(run_script([qw(lei daemon-pid)], undef, $opt), 'daemon-pid');
+ chomp $out;
+ is($out, $new_pid, 'PID unchanged after -0/-CHLD');
+
if ('socket inaccessible') {
chmod 0000, $sock or BAIL_OUT "chmod 0000: $!";
$out = $err = '';
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 25/26] lei: revise output routines
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
` (23 preceding siblings ...)
2020-12-18 12:09 ` [PATCH 24/26] lei: support for -$DIGIT and -$SIG CLI switches Eric Wong
@ 2020-12-18 12:09 ` Eric Wong
2020-12-18 12:09 ` [PATCH 26/26] lei: extinbox: start implementing in config file Eric Wong
25 siblings, 0 replies; 30+ messages in thread
From: Eric Wong @ 2020-12-18 12:09 UTC (permalink / raw)
To: meta
Drop emit(), since we hard code the channel (client FD) 99% of
the time and use prototypes to avoid parentheses because my
hands are tired.
---
lib/PublicInbox/LEI.pm | 23 ++++++++---------------
1 file changed, 8 insertions(+), 15 deletions(-)
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index c28c9b59..97c5d91b 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -250,18 +250,13 @@ sub x_it ($$) { # pronounced "exit"
sub puts ($;@) { print { shift->{1} } map { "$_\n" } @_ }
-sub emit {
- my ($self, $channel) = @_; # $buf = $_[2]
- print { $self->{$channel} } $_[2] or die "print FD[$channel]: $!";
-}
+sub out ($;@) { print { shift->{1} } @_ }
-sub err {
- my ($self, $buf) = @_;
- $buf .= "\n" unless $buf =~ /\n\z/s;
- emit($self, 2, $buf);
+sub err ($;@) {
+ print { shift->{2} } @_, (substr($_[-1], -1, 1) eq "\n" ? () : "\n");
}
-sub qerr { $_[0]->{opt}->{quiet} or err(@_) }
+sub qerr ($;@) { $_[0]->{opt}->{quiet} or err(shift, @_) }
sub fail ($$;$) {
my ($self, $buf, $exit_code) = @_;
@@ -341,8 +336,7 @@ EOF
$msg .= $rhs;
$msg .= "\n";
}
- my $channel = $errmsg ? 2 : 1;
- emit($self, $channel, $msg);
+ print { $self->{$errmsg ? 2 : 1} } $msg;
x_it($self, $errmsg ? 1 << 8 : 0); # stderr => failure
undef;
}
@@ -404,7 +398,6 @@ sub optparse ($$$) {
}
last if $err;
}
- # warn "inf=$inf ".scalar(@$argv). ' '.scalar(@args)."\n";
if (!$inf && scalar(@$argv) > scalar(@args)) {
$err //= 'too many arguments';
}
@@ -413,7 +406,7 @@ sub optparse ($$$) {
sub dispatch {
my ($self, $cmd, @argv) = @_;
- local $SIG{__WARN__} = sub { err($self, "@_") };
+ local $SIG{__WARN__} = sub { err($self, @_) };
return _help($self, 'no command given') unless defined($cmd);
my $func = "lei_$cmd";
$func =~ tr/-/_/;
@@ -525,7 +518,7 @@ E: leistore.dir=$cur already initialized and it is not $dir
return qerr($self, $exists);
}
-sub lei_daemon_pid { emit($_[0], 1, "$$\n") }
+sub lei_daemon_pid { puts shift, $$ }
sub lei_daemon_kill {
my ($self) = @_;
@@ -547,7 +540,7 @@ sub lei_daemon_env {
my $eor = $opt->{z} ? "\0" : "\n";
my $buf = '';
while (my ($k, $v) = each %ENV) { $buf .= "$k=$v$eor" }
- emit($self, 1, $buf)
+ out $self, $buf;
}
}
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 26/26] lei: extinbox: start implementing in config file
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
` (24 preceding siblings ...)
2020-12-18 12:09 ` [PATCH 25/26] lei: revise output routines Eric Wong
@ 2020-12-18 12:09 ` Eric Wong
2020-12-18 20:23 ` Eric Wong
2020-12-27 20:02 ` [PATCH 27/26] lei_xsearch: cross-(inbox|extindex) search Eric Wong
25 siblings, 2 replies; 30+ messages in thread
From: Eric Wong @ 2020-12-18 12:09 UTC (permalink / raw)
To: meta
They need to be indexed by MiscIdx, but MiscIdx
still needs more work to support faster config
loading when dealing with ~100K data sources.
---
lib/PublicInbox/LEI.pm | 19 ++++-----
lib/PublicInbox/LeiExtinbox.pm | 52 ++++++++++++++++++++++++
t/lei.t | 72 ++++++++++++++++++++++++++++++++--
3 files changed, 130 insertions(+), 13 deletions(-)
create mode 100644 lib/PublicInbox/LeiExtinbox.pm
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 97c5d91b..b254e2c5 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -8,7 +8,7 @@
package PublicInbox::LEI;
use strict;
use v5.10.1;
-use parent qw(PublicInbox::DS);
+use parent qw(PublicInbox::DS PublicInbox::LeiExtinbox);
use Getopt::Long ();
use Socket qw(AF_UNIX SOCK_STREAM pack_sockaddr_un);
use Errno qw(EAGAIN ECONNREFUSED ENOENT);
@@ -79,12 +79,12 @@ our %CMD = ( # sorted in order of importance/use:
'add-extinbox' => [ 'URL_OR_PATHNAME',
'add/set priority of a publicinbox|extindex for extra matches',
- qw(prio=i) ],
+ qw(boost=i quiet|q) ],
'ls-extinbox' => [ '[FILTER...]', 'list publicinbox|extindex locations',
- qw(format|f=s z local remote) ],
+ qw(format|f=s z|0 local remote quiet|q) ],
'forget-extinbox' => [ '{URL_OR_PATHNAME|--prune}',
'exclude further results from a publicinbox|extindex',
- qw(prune) ],
+ qw(prune quiet|q) ],
'ls-query' => [ '[FILTER...]', 'list saved search queries',
qw(name-only format|f=s z) ],
@@ -107,7 +107,7 @@ our %CMD = ( # sorted in order of importance/use:
# code repos are used for `show' to solve blobs from patch mails
'add-coderepo' => [ 'PATHNAME', 'add or set priority of a git code repo',
- qw(prio=i) ],
+ qw(boost=i) ],
'ls-coderepo' => [ '[FILTER_TERMS...]',
'list known code repos', qw(format|f=s z) ],
'forget-coderepo' => [ 'PATHNAME',
@@ -197,7 +197,7 @@ my %OPTDESC = (
'sort|s=s@' => [ 'VAL|internaldate,date,relevance,docid',
"order of results `--output'-dependent"],
-'prio=i' => 'priority of query source',
+'boost=i' => 'increase/decrease priority of results (default: 0)',
'local' => 'limit operations to the local filesystem',
'local!' => 'exclude results from the local filesystem',
@@ -217,8 +217,7 @@ my %OPTDESC = (
'by-mid|mid:s' => [ 'MID', 'match only by Message-ID, ignoring contents' ],
'jobs:i' => 'set parallelism level',
-# xargs, env, use "-0", git(1) uses "-z". Should we support z|0 everywhere?
-'z' => 'use NUL \\0 instead of newline (CR) to delimit lines',
+# xargs, env, use "-0", git(1) uses "-z". We support z|0 everywhere
'z|0' => 'use NUL \\0 instead of newline (CR) to delimit lines',
# note: no "--ignore-environment" / "-i" support like env(1) since that
@@ -455,7 +454,9 @@ sub _lei_store ($;$) {
$cfg->{-lei_store} //= do {
require PublicInbox::LeiStore;
PublicInbox::SearchIdx::load_xapian_writable();
- defined(my $dir = $cfg->{'leistore.dir'}) or return;
+ my $dir = $cfg->{'leistore.dir'};
+ $dir //= _store_path($self->{env}) if $creat;
+ return unless $dir;
PublicInbox::LeiStore->new($dir, { creat => $creat });
};
}
diff --git a/lib/PublicInbox/LeiExtinbox.pm b/lib/PublicInbox/LeiExtinbox.pm
new file mode 100644
index 00000000..2f52b115
--- /dev/null
+++ b/lib/PublicInbox/LeiExtinbox.pm
@@ -0,0 +1,52 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# *-extinbox commands of lei
+package PublicInbox::LeiExtinbox;
+use strict;
+use v5.10.1;
+use parent qw(Exporter);
+our @EXPORT = qw(lei_ls_extinbox lei_add_extinbox lei_forget_extinbox);
+
+sub lei_ls_extinbox {
+ my ($self, @argv) = @_;
+ my $stor = $self->_lei_store(0);
+ my $cfg = $self->_lei_cfg(0);
+ my $out = $self->{1};
+ my ($OFS, $ORS) = $self->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n");
+ my (%boost, @loc);
+ for my $sec (grep(/\Aextinbox\./, @{$cfg->{-section_order}})) {
+ my $loc = substr($sec, length('extinbox.'));
+ $boost{$loc} = $cfg->{"$sec.boost"};
+ push @loc, $loc;
+ }
+ my $out = $self->{1};
+ use sort 'stable';
+ # highest boost first, but stable for alphabetic tie break
+ for (sort { $boost{$b} <=> $boost{$a} } sort keys %boost) {
+ # TODO: use miscidx and show docid so forget/set is easier
+ print $out $_, $OFS, 'boost=', $boost{$_}, $ORS;
+ }
+}
+
+sub lei_add_extinbox {
+ my ($self, $url_or_dir) = @_;
+ my $cfg = $self->_lei_cfg(1);
+ if ($url_or_dir !~ m!\Ahttps?://!) {
+ $url_or_dir = File::Spec->canonpath($url_or_dir);
+ }
+ my $new_boost = $self->{opt}->{boost} // 0;
+ my $key = "extinbox.$url_or_dir.boost";
+ my $cur_boost = $cfg->{$key};
+ return if defined($cur_boost) && $cur_boost == $new_boost; # idempotent
+ $self->lei_config($key, $new_boost);
+ my $stor = $self->_lei_store(1);
+ # TODO: add to MiscIdx
+ $stor->done;
+}
+
+sub lei_forget_extinbox {
+ # TODO
+}
+
+1;
diff --git a/t/lei.t b/t/lei.t
index 30f9d2b6..a95a0efc 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -7,17 +7,18 @@ use Test::More;
use PublicInbox::TestCommon;
use PublicInbox::Config;
use File::Path qw(rmtree);
+require_git 2.6;
require_mods(qw(json DBD::SQLite Search::Xapian));
my $LEI = 'lei';
my $opt = { 1 => \(my $out = ''), 2 => \(my $err = '') };
my $lei = sub {
- my ($cmd, $env, $opt) = @_;
+ my ($cmd, $env, $xopt) = @_;
$out = $err = '';
if (!ref($cmd)) {
- ($env, $opt) = grep { (!defined) || ref } @_;
- $cmd = [ grep { defined } @_ ];
+ ($env, $xopt) = grep { (!defined) || ref } @_;
+ $cmd = [ grep { defined && !ref } @_ ];
}
- run_script([$LEI, @$cmd], $env, $opt);
+ run_script([$LEI, @$cmd], $env, $xopt // $opt);
};
my ($home, $for_destroy) = tmpdir();
@@ -29,6 +30,8 @@ local $ENV{FOO} = 'BAR';
mkdir "$home/xdg_run", 0700 or BAIL_OUT "mkdir: $!";
my $home_trash = [ "$home/.local", "$home/.config" ];
my $cleanup = sub { rmtree([@$home_trash, @_]) };
+my $config_file = "$home/.config/lei/config";
+my $store_dir = "$home/.local/share/lei";
my $test_help = sub {
ok(!$lei->([], undef, $opt), 'no args fails');
@@ -118,10 +121,71 @@ my $test_config = sub {
ok(!-f "$home/config/f", 'no file created');
};
+my $setup_publicinboxes = sub {
+ state $done = '';
+ return if $done eq $home;
+ use PublicInbox::InboxWritable;
+ for my $V (1, 2) {
+ run_script([qw(-init -Lmedium), "-V$V", "t$V",
+ '--newsgroup', "t.$V",
+ "$home/t$V", "http://example.com/t$V",
+ "t$V\@example.com" ]) or BAIL_OUT "init v$V";
+ }
+ my $cfg = PublicInbox::Config->new;
+ my $seen = 0;
+ $cfg->each_inbox(sub {
+ my ($ibx) = @_;
+ my $im = PublicInbox::InboxWritable->new($ibx)->importer(0);
+ my $V = $ibx->version;
+ my @eml = glob('t/*.eml');
+ push(@eml, 't/data/0001.patch') if $V == 2;
+ for (@eml) {
+ next if $_ eq 't/psgi_v2-old.eml'; # dup mid
+ $im->add(eml_load($_)) or BAIL_OUT "v$V add $_";
+ $seen++;
+ }
+ $im->done;
+ if ($V == 1) {
+ run_script(['-index', $ibx->{inboxdir}]) or
+ BAIL_OUT 'index v1';
+ }
+ });
+ $done = $home;
+ $seen || BAIL_OUT 'no imports';
+};
+
+my $test_extinbox = sub {
+ $setup_publicinboxes->();
+ $cleanup->();
+ $lei->('ls-extinbox');
+ is($out.$err, '', 'ls-extinbox no output, yet');
+ ok(!-e $config_file && !-e $store_dir,
+ 'nothing created by ls-extinbox');
+
+ my $cfg = PublicInbox::Config->new;
+ $cfg->each_inbox(sub {
+ my ($ibx) = @_;
+ ok($lei->(qw(add-extinbox -q), $ibx->{inboxdir}),
+ 'added extinbox');
+ is($out.$err, '', 'no output');
+ });
+ ok(-s $config_file && -e $store_dir,
+ 'add-extinbox created config + store');
+ my $lcfg = PublicInbox::Config->new($config_file);
+ $cfg->each_inbox(sub {
+ my ($ibx) = @_;
+ is($lcfg->{"extinbox.$ibx->{inboxdir}.boost"}, 0,
+ "configured boost on $ibx->{name}");
+ });
+ $lei->('ls-extinbox');
+ like($out, qr/boost=0\n/s, 'ls-extinbox has output');
+};
+
my $test_lei_common = sub {
$test_help->();
$test_config->();
$test_init->();
+ $test_extinbox->();
};
my $test_lei_oneshot = $ENV{TEST_LEI_ONESHOT};
^ permalink raw reply related [flat|nested] 30+ messages in thread
* Re: [PATCH 26/26] lei: extinbox: start implementing in config file
2020-12-18 12:09 ` [PATCH 26/26] lei: extinbox: start implementing in config file Eric Wong
@ 2020-12-18 20:23 ` Eric Wong
2020-12-27 20:02 ` [PATCH 27/26] lei_xsearch: cross-(inbox|extindex) search Eric Wong
1 sibling, 0 replies; 30+ messages in thread
From: Eric Wong @ 2020-12-18 20:23 UTC (permalink / raw)
To: meta
Will squash these changes in before pushing:
diff --git a/MANIFEST b/MANIFEST
index e2d4ef72..f0847e3c 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -162,6 +162,7 @@ lib/PublicInbox/InboxWritable.pm
lib/PublicInbox/Isearch.pm
lib/PublicInbox/KQNotify.pm
lib/PublicInbox/LEI.pm
+lib/PublicInbox/LeiExtinbox.pm
lib/PublicInbox/LeiSearch.pm
lib/PublicInbox/LeiStore.pm
lib/PublicInbox/Linkify.pm
diff --git a/lib/PublicInbox/LeiExtinbox.pm b/lib/PublicInbox/LeiExtinbox.pm
index 2f52b115..c2de7735 100644
--- a/lib/PublicInbox/LeiExtinbox.pm
+++ b/lib/PublicInbox/LeiExtinbox.pm
@@ -20,7 +20,6 @@ sub lei_ls_extinbox {
$boost{$loc} = $cfg->{"$sec.boost"};
push @loc, $loc;
}
- my $out = $self->{1};
use sort 'stable';
# highest boost first, but stable for alphabetic tie break
for (sort { $boost{$b} <=> $boost{$a} } sort keys %boost) {
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 27/26] lei_xsearch: cross-(inbox|extindex) search
2020-12-18 12:09 ` [PATCH 26/26] lei: extinbox: start implementing in config file Eric Wong
2020-12-18 20:23 ` Eric Wong
@ 2020-12-27 20:02 ` Eric Wong
1 sibling, 0 replies; 30+ messages in thread
From: Eric Wong @ 2020-12-27 20:02 UTC (permalink / raw)
To: meta
While a single extindex combines multiple inboxes into a single
search index, extindex still requires up-front indexing on items
which can be searched. XSearch has no on-disk footprint itself
and uses Xapian DBs of existing publicinbox and extindex
("extinbox") exclusively.
XSearch still suffers from the multi-shard Xapian scalability
problems which led to the creation of extindex, but I expect the
number of shards to remain relatively low.
I envision users hosting public-inbox instances on their
workstations will only have two extindex combined by this, one
read-only extindex for serving public archives, and one
read-write extindex managed by LeiStore for private mail.
---
Note: this depends on {relevance} == 2 support at
https://public-inbox.org/meta/20201227193829.9408-3-e@80x24.org/
MANIFEST | 2 +
lib/PublicInbox/LeiSearch.pm | 14 +++----
lib/PublicInbox/LeiXSearch.pm | 72 ++++++++++++++++++++++++++++++++++
lib/PublicInbox/Search.pm | 19 ++++-----
t/lei_xsearch.t | 73 +++++++++++++++++++++++++++++++++++
5 files changed, 160 insertions(+), 20 deletions(-)
create mode 100644 lib/PublicInbox/LeiXSearch.pm
create mode 100644 t/lei_xsearch.t
diff --git a/MANIFEST b/MANIFEST
index 656c707e..a5ff81cf 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -165,6 +165,7 @@ lib/PublicInbox/LEI.pm
lib/PublicInbox/LeiExtinbox.pm
lib/PublicInbox/LeiSearch.pm
lib/PublicInbox/LeiStore.pm
+lib/PublicInbox/LeiXSearch.pm
lib/PublicInbox/Linkify.pm
lib/PublicInbox/Listener.pm
lib/PublicInbox/Lock.pm
@@ -327,6 +328,7 @@ t/kqnotify.t
t/lei-oneshot.t
t/lei.t
t/lei_store.t
+t/lei_xsearch.t
t/linkify.t
t/main-bin/spamc
t/mda-mime.eml
diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm
index 66c16e04..0b962b11 100644
--- a/lib/PublicInbox/LeiSearch.pm
+++ b/lib/PublicInbox/LeiSearch.pm
@@ -7,20 +7,18 @@ use v5.10.1;
use parent qw(PublicInbox::ExtSearch);
use PublicInbox::Search;
-sub combined_docid ($$) {
+# get combined docid from over.num:
+# (not generic Xapian, only works with our sharding scheme)
+sub num2docid ($$) {
my ($self, $num) = @_;
- ($num - 1) * $self->{nshard} + 1;
+ my $nshard = $self->{nshard};
+ ($num - 1) * $nshard + $num % $nshard + 1;
}
sub msg_keywords {
my ($self, $num) = @_; # num_or_mitem
my $xdb = $self->xdb; # set {nshard};
- my $docid = ref($num) ? $num->get_docid : do {
- # get combined docid from over.num:
- # (not generic Xapian, only works with our sharding scheme)
- my $nshard = $self->{nshard};
- ($num - 1) * $nshard + $num % $nshard + 1;
- };
+ my $docid = ref($num) ? $num->get_docid : num2docid($self, $num);
my %kw;
eval {
my $end = $xdb->termlist_end($docid);
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
new file mode 100644
index 00000000..1a81b14a
--- /dev/null
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -0,0 +1,72 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Combine any combination of PublicInbox::Search,
+# PublicInbox::ExtSearch, and PublicInbox::LeiSearch objects
+# into one Xapian DB
+package PublicInbox::LeiXSearch;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::LeiSearch);
+
+sub new {
+ my ($class) = @_;
+ PublicInbox::Search::load_xapian();
+ bless {
+ qp_flags => $PublicInbox::Search::QP_FLAGS |
+ PublicInbox::Search::FLAG_PURE_NOT(),
+ }, $class
+}
+
+sub attach_extinbox {
+ my ($self, $ibxish) = @_; # ibxish = ExtSearch or Inbox
+ if (!$ibxish->can('over')) {
+ push @{$self->{remotes}}, $ibxish
+ }
+ if (delete $self->{xdb}) { # XXX: do we need this?
+ # clobber existing {xdb} if amending
+ my $expect = delete $self->{nshard};
+ my $shards = delete $self->{shards_flat};
+ scalar(@$shards) == $expect or die
+ "BUG: {nshard}$expect != shards=".scalar(@$shards);
+
+ my $prev = {};
+ for my $old_ibxish (@{$self->{shard2ibx}}) {
+ next if $prev == $old_ibxish;
+ $prev = $old_ibxish;
+ my @shards = $old_ibxish->search->xdb_shards_flat;
+ push @{$self->{shards_flat}}, @shards;
+ }
+ my $nr = scalar(@{$self->{shards_flat}});
+ $nr == $expect or die
+ "BUG: reloaded $nr shards, expected $expect"
+ }
+ my @shards = $ibxish->search->xdb_shards_flat;
+ push @{$self->{shards_flat}}, @shards;
+ push(@{$self->{shard2ibx}}, $ibxish) for (@shards);
+}
+
+# called by PublicInbox::Search::xdb
+sub xdb_shards_flat { @{$_[0]->{shards_flat}} }
+
+# like over->get_art
+sub smsg_for {
+ my ($self, $mitem) = @_;
+ # cf. https://trac.xapian.org/wiki/FAQ/MultiDatabaseDocumentID
+ my $nshard = $self->{nshard};
+ my $docid = $mitem->get_docid;
+ my $shard = ($docid - 1) % $nshard;
+ my $num = int(($docid - 1) / $nshard) + 1;
+ my $smsg = $self->{shard2ibx}->[$shard]->over->get_art($num);
+ $smsg->{docid} = $docid;
+ $smsg;
+}
+
+sub recent {
+ my ($self, $qstr, $opt) = @_;
+ $opt //= {};
+ $opt->{relevance} //= -2;
+ $self->mset($qstr //= 'bytes:1..', $opt);
+}
+
+1;
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 4fae0c66..58653c9e 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -195,6 +195,7 @@ sub xdb_shards_flat ($) {
my ($self) = @_;
my $xpfx = $self->{xpfx};
my (@xdb, $slow_phrase);
+ load_xapian();
if ($xpfx =~ m/xapian${\SCHEMA_VERSION}\z/) {
@xdb = ($X{Database}->new($xpfx));
$self->{qp_flags} |= FLAG_PHRASE() if !-f "$xpfx/iamchert";
@@ -213,16 +214,6 @@ sub xdb_shards_flat ($) {
@xdb;
}
-sub _xdb {
- my ($self) = @_;
- $self->{qp_flags} //= $QP_FLAGS;
- my @xdb = xdb_shards_flat($self) or return;
- $self->{nshard} = scalar(@xdb);
- my $xdb = shift @xdb;
- $xdb->add_database($_) for @xdb;
- $xdb;
-}
-
# v2 Xapian docids don't conflict, so they're identical to
# NNTP article numbers and IMAP UIDs.
# https://trac.xapian.org/wiki/FAQ/MultiDatabaseDocumentID
@@ -241,8 +232,12 @@ sub mset_to_artnums {
sub xdb ($) {
my ($self) = @_;
$self->{xdb} //= do {
- load_xapian();
- $self->_xdb;
+ $self->{qp_flags} //= $QP_FLAGS;
+ my @xdb = $self->xdb_shards_flat or return;
+ $self->{nshard} = scalar(@xdb);
+ my $xdb = shift @xdb;
+ $xdb->add_database($_) for @xdb;
+ $xdb;
};
}
diff --git a/t/lei_xsearch.t b/t/lei_xsearch.t
new file mode 100644
index 00000000..c41213bd
--- /dev/null
+++ b/t/lei_xsearch.t
@@ -0,0 +1,73 @@
+#!perl -w
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use Test::More;
+use List::Util qw(shuffle max);
+use PublicInbox::TestCommon;
+use PublicInbox::ExtSearchIdx;
+use PublicInbox::Eml;
+use PublicInbox::InboxWritable;
+require_mods(qw(DBD::SQLite Search::Xapian));
+require_git 2.6;
+require_ok 'PublicInbox::LeiXSearch';
+my ($home, $for_destroy) = tmpdir();
+my @ibx;
+for my $V (1..2) {
+ for my $i (3..6) {
+ my $ibx = PublicInbox::InboxWritable->new({
+ inboxdir => "$home/v$V-$i",
+ name => "test-v$V-$i",
+ version => $V,
+ indexlevel => 'medium',
+ -primary_address => "v$V-$i\@example.com",
+ }, { nproc => int(rand(8)) + 1 });
+ push @ibx, $ibx;
+ my $im = $ibx->importer(0);
+ for my $j (0..9) {
+ my $eml = PublicInbox::Eml->new(<<EOF);
+From: x\@example.com
+To: $ibx->{-primary_address}
+Date: Fri, 02 Oct 1993 0$V:0$i:0$j +0000
+Subject: v${V}i${i}j$j
+Message-ID: <v${V}i${i}j$j\@example>
+
+${V}er ${i}on j$j
+EOF
+ $im->add($eml);
+ }
+ $im->done;
+ }
+}
+my $first = shift @ibx; is($first->{name}, 'test-v1-3', 'first plucked');
+my $last = pop @ibx; is($last->{name}, 'test-v2-6', 'last plucked');
+my $eidx = PublicInbox::ExtSearchIdx->new("$home/eidx");
+$eidx->attach_inbox($first);
+$eidx->attach_inbox($last);
+$eidx->eidx_sync({fsync => 0});
+my $es = PublicInbox::ExtSearch->new("$home/eidx");
+my $lxs = PublicInbox::LeiXSearch->new;
+for my $ibxish (shuffle($es, @ibx)) {
+ $lxs->attach_extinbox($ibxish);
+}
+my $nr = $lxs->xdb->get_doccount;
+my $mset = $lxs->mset('d:19931002..19931003', { limit => $nr });
+is($mset->size, $nr, 'got all messages');
+my @msgs;
+for my $mi ($mset->items) {
+ if (my $smsg = $lxs->smsg_for($mi)) {
+ push @msgs, $smsg;
+ } else {
+ diag "E: ${\$mi->get_docid} missing";
+ }
+}
+is(scalar(@msgs), $nr, 'smsgs retrieved for all');
+
+$mset = $lxs->recent(undef, { limit => 1 });
+is($mset->size, 1, 'one result');
+my $max = max(map { $_->{docid} } @msgs);
+is($lxs->smsg_for(($mset->items)[0])->{docid}, $max,
+ 'got highest docid');
+
+done_testing;
^ permalink raw reply related [flat|nested] 30+ messages in thread