* [PATCH 01/16] lei: switch to use SEQPACKET socketpair instead of pipe
2021-02-02 11:46 [PATCH 00/16] lei: -I/--include and more Eric Wong
@ 2021-02-02 11:46 ` Eric Wong
2021-02-02 11:46 ` [PATCH 02/16] lei_query: default to 10000 messages as documented Eric Wong
` (14 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Eric Wong @ 2021-02-02 11:46 UTC (permalink / raw)
To: meta
This will allow us to use larger messages and do progress
reporting to accumulate in the main daemon.
---
MANIFEST | 2 +-
lib/PublicInbox/LEI.pm | 8 ++--
lib/PublicInbox/LeiXSearch.pm | 27 ++++++------
lib/PublicInbox/OpPipe.pm | 41 ------------------
lib/PublicInbox/PktOp.pm | 79 +++++++++++++++++++++++++++++++++++
5 files changed, 98 insertions(+), 59 deletions(-)
delete mode 100644 lib/PublicInbox/OpPipe.pm
create mode 100644 lib/PublicInbox/PktOp.pm
diff --git a/MANIFEST b/MANIFEST
index 017dc7f2..bcb9d08e 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -205,9 +205,9 @@ lib/PublicInbox/NNTPD.pm
lib/PublicInbox/NNTPdeflate.pm
lib/PublicInbox/NewsWWW.pm
lib/PublicInbox/OnDestroy.pm
-lib/PublicInbox/OpPipe.pm
lib/PublicInbox/Over.pm
lib/PublicInbox/OverIdx.pm
+lib/PublicInbox/PktOp.pm
lib/PublicInbox/ProcessPipe.pm
lib/PublicInbox/Qspawn.pm
lib/PublicInbox/Reply.pm
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 17ad18b9..737db1e1 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -306,7 +306,7 @@ sub qerr ($;@) { $_[0]->{opt}->{quiet} or err(shift, @_) }
sub fail ($$;$) {
my ($self, $buf, $exit_code) = @_;
err($self, $buf) if defined $buf;
- syswrite($self->{op_pipe}, '!') if $self->{op_pipe}; # fail_handler
+ send($self->{pkt_op}, '!', MSG_EOR) if $self->{pkt_op}; # fail_handler
x_it($self, ($exit_code // 1) << 8);
undef;
}
@@ -369,14 +369,14 @@ sub io_restore ($$) {
sub note_sigpipe { # triggers sigpipe_handler
my ($self, $fd) = @_;
close(delete($self->{$fd})); # explicit close silences Perl warning
- syswrite($self->{op_pipe}, '|') if $self->{op_pipe};
+ send($self->{pkt_op}, '|', MSG_EOR) if $self->{pkt_op};
x_it($self, 13);
}
sub atfork_child_wq {
my ($self, $wq) = @_;
io_restore($self, $wq);
- -p $self->{op_pipe} or die 'BUG: {op_pipe} expected';
+ -S $self->{pkt_op} or die 'BUG: {pkt_op} expected';
io_restore($self->{l2m}, $wq);
%PATH2CFG = ();
undef $errors_log;
@@ -408,7 +408,7 @@ sub atfork_parent_wq {
$self->{env} = $env;
delete @$lei{qw(3 -lei_store cfg old_1 pgr lxs)}; # keep l2m
my @io = (delete(@$lei{qw(0 1 2)}),
- io_extract($lei, qw(sock op_pipe startq)));
+ io_extract($lei, qw(sock pkt_op startq)));
my $l2m = $lei->{l2m};
if ($l2m && $l2m != $wq) { # $wq == lxs
if (my $wq_s1 = $l2m->{-wq_s1}) {
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index f630e79a..e577ab09 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -9,10 +9,11 @@ use strict;
use v5.10.1;
use parent qw(PublicInbox::LeiSearch PublicInbox::IPC);
use PublicInbox::DS qw(dwaitpid);
-use PublicInbox::OpPipe;
+use PublicInbox::PktOp;
use PublicInbox::Import;
use File::Temp 0.19 (); # 0.19 for ->newdir
use File::Spec ();
+use Socket qw(MSG_EOR);
use PublicInbox::Search qw(xap_terms);
use PublicInbox::Spawn qw(popen_rd spawn which);
use PublicInbox::MID qw(mids);
@@ -353,7 +354,8 @@ sub query_prepare { # called by wq_do
delete $lei->{l2m}->{-wq_s1};
eval { $lei->{l2m}->do_augment($lei) };
$lei->fail($@) if $@;
- syswrite($lei->{op_pipe}, '.') == 1 or die "do_post_augment trigger: $!"
+ send($lei->{pkt_op}, '.', MSG_EOR) == 1 or
+ die "do_post_augment trigger: $!"
}
sub fail_handler ($;$$) {
@@ -380,20 +382,19 @@ sub do_query {
fcntl($lei->{startq}, 1031, 4096) if $^O eq 'linux';
$zpipe = $l2m->pre_augment($lei);
}
- pipe(my $done, $lei->{op_pipe}) or die "pipe $!";
+ my $in_loop = exists $lei->{sock};
+ my $ops = {
+ '|' => [ \&sigpipe_handler, $lei ],
+ '!' => [ \&fail_handler, $lei ],
+ '.' => [ \&do_post_augment, $lei, $zpipe, $au_done ],
+ '' => [ \&query_done, $lei ],
+ };
+ (my $op, $lei->{pkt_op}) = PublicInbox::PktOp->pair($ops, $in_loop);
my ($lei_ipc, @io) = $lei->atfork_parent_wq($self);
- delete($lei->{op_pipe});
+ delete($lei->{pkt_op});
$lei->event_step_init; # wait for shutdowns
- my $done_op = {
- '' => [ \&query_done, $lei ],
- '|' => [ \&sigpipe_handler, $lei ],
- '!' => [ \&fail_handler, $lei ]
- };
- my $in_loop = exists $lei->{sock};
- $done = PublicInbox::OpPipe->new($done, $done_op, $in_loop);
if ($l2m) {
- $done_op->{'.'} = [ \&do_post_augment, $lei, $zpipe, $au_done ];
$self->wq_do('query_prepare', \@io, $lei_ipc);
$io[1] = $zpipe->[1] if $zpipe;
}
@@ -401,7 +402,7 @@ sub do_query {
$self->wq_close(1);
unless ($in_loop) {
# for the $lei_ipc->atfork_child_wq PIPE handler:
- while ($done->{sock}) { $done->event_step }
+ while ($op->{sock}) { $op->event_step }
}
}
diff --git a/lib/PublicInbox/OpPipe.pm b/lib/PublicInbox/OpPipe.pm
deleted file mode 100644
index 295a8aa5..00000000
--- a/lib/PublicInbox/OpPipe.pm
+++ /dev/null
@@ -1,41 +0,0 @@
-# Copyright (C) 2021 all contributors <meta@public-inbox.org>
-# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-
-# bytecode dispatch pipe, reads a byte, runs a sub
-# byte => [ sub, @operands ]
-package PublicInbox::OpPipe;
-use strict;
-use v5.10.1;
-use parent qw(PublicInbox::DS);
-use PublicInbox::Syscall qw(EPOLLIN);
-
-sub new {
- my ($cls, $rd, $op_map, $in_loop) = @_;
- my $self = bless { sock => $rd, op_map => $op_map }, $cls;
- # 1031: F_SETPIPE_SZ, 4096: page size
- fcntl($rd, 1031, 4096) if $^O eq 'linux';
- if ($in_loop) { # iff using DS->EventLoop
- $rd->blocking(0);
- $self->SUPER::new($rd, EPOLLIN);
- }
- $self;
-}
-
-sub event_step {
- my ($self) = @_;
- my $rd = $self->{sock};
- my $byte;
- until (defined(sysread($rd, $byte, 1))) {
- return if $!{EAGAIN};
- next if $!{EINTR};
- die "read \$rd: $!";
- }
- my $op = $self->{op_map}->{$byte} or die "BUG: unknown byte `$byte'";
- if ($byte eq '') { # close on EOF
- $rd->blocking ? delete($self->{sock}) : $self->close;
- }
- my ($sub, @args) = @$op;
- $sub->(@args);
-}
-
-1;
diff --git a/lib/PublicInbox/PktOp.pm b/lib/PublicInbox/PktOp.pm
new file mode 100644
index 00000000..d5b95a73
--- /dev/null
+++ b/lib/PublicInbox/PktOp.pm
@@ -0,0 +1,79 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# op dispatch socket, reads a message, runs a sub
+# There may be multiple producers, but (for now) only one consumer
+# Used for lei_xsearch and maybe other things
+# "literal" => [ sub, @operands ]
+# /regexp/ => [ sub, @operands ]
+package PublicInbox::PktOp;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::DS);
+use Errno qw(EAGAIN EINTR);
+use PublicInbox::Syscall qw(EPOLLIN EPOLLET);
+use Socket qw(AF_UNIX MSG_EOR SOCK_SEQPACKET);
+
+sub new {
+ my ($cls, $r, $ops, $in_loop) = @_;
+ my $self = bless { sock => $r, ops => $ops, re => [] }, $cls;
+ if (ref($ops) eq 'ARRAY') {
+ my %ops;
+ for my $op (@$ops) {
+ if (ref($op->[0])) {
+ push @{$self->{re}}, $op;
+ } else {
+ $ops{$op->[0]} = $op->[1];
+ }
+ }
+ $self->{ops} = \%ops;
+ }
+ if ($in_loop) { # iff using DS->EventLoop
+ $r->blocking(0);
+ $self->SUPER::new($r, EPOLLIN|EPOLLET);
+ }
+ $self;
+}
+
+# returns a blessed object as the consumer, and a GLOB/IO for the producer
+sub pair {
+ my ($cls, $ops, $in_loop) = @_;
+ my ($c, $p);
+ socketpair($c, $p, AF_UNIX, SOCK_SEQPACKET, 0) or die "socketpair: $!";
+ (new($cls, $c, $ops, $in_loop), $p);
+}
+
+sub close {
+ my ($self) = @_;
+ my $c = $self->{sock} or return;
+ $c->blocking ? delete($self->{sock}) : $self->SUPER::close;
+}
+
+sub event_step {
+ my ($self) = @_;
+ my $c = $self->{sock};
+ my $msg;
+ do {
+ my $n = recv($c, $msg, 128, 0);
+ unless (defined $n) {
+ return if $! == EAGAIN;
+ next if $! == EINTR;
+ $self->close;
+ die "recv: $!";
+ }
+ my $op = $self->{ops}->{$msg};
+ unless ($op) {
+ for my $re_op (@{$self->{re}}) {
+ $msg =~ $re_op->[0] or next;
+ $op = $re_op->[1];
+ last;
+ }
+ }
+ die "BUG: unknown message: `$msg'" unless $op;
+ my ($sub, @args) = @$op;
+ $sub->(@args);
+ return $self->close if $msg eq ''; # close on EOF
+ } while (1);
+}
+
+1;
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 02/16] lei_query: default to 10000 messages as documented
2021-02-02 11:46 [PATCH 00/16] lei: -I/--include and more Eric Wong
2021-02-02 11:46 ` [PATCH 01/16] lei: switch to use SEQPACKET socketpair instead of pipe Eric Wong
@ 2021-02-02 11:46 ` Eric Wong
2021-02-02 11:46 ` [PATCH 03/16] lei q: emit progress and counting via PktOp Eric Wong
` (13 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Eric Wong @ 2021-02-02 11:46 UTC (permalink / raw)
To: meta
Otherwise, we were only getting 50 matches without (-t)
thread expansion.
---
lib/PublicInbox/LeiQuery.pm | 1 +
1 file changed, 1 insertion(+)
diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm
index 953d1fc2..dea04c13 100644
--- a/lib/PublicInbox/LeiQuery.pm
+++ b/lib/PublicInbox/LeiQuery.pm
@@ -51,6 +51,7 @@ sub lei_q {
my %mset_opt = map { $_ => $opt->{$_} } qw(thread limit offset);
$mset_opt{asc} = $opt->{'reverse'} ? 1 : 0;
+ $mset_opt{limit} //= 10000;
$mset_opt{qstr} = join(' ', map {;
# Consider spaces in argv to be for phrase search in Xapian.
# In other words, the users should need only care about
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 03/16] lei q: emit progress and counting via PktOp
2021-02-02 11:46 [PATCH 00/16] lei: -I/--include and more Eric Wong
2021-02-02 11:46 ` [PATCH 01/16] lei: switch to use SEQPACKET socketpair instead of pipe Eric Wong
2021-02-02 11:46 ` [PATCH 02/16] lei_query: default to 10000 messages as documented Eric Wong
@ 2021-02-02 11:46 ` Eric Wong
2021-02-02 11:46 ` [PATCH 04/16] lei q: support --only, --include and --exclude Eric Wong
` (12 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Eric Wong @ 2021-02-02 11:46 UTC (permalink / raw)
To: meta
Sometimes it can be confusing for "lei q" to finish writing to a
Maildir|mbox and not know if it did anything. So show some
per-external progress and stats.
These can be disabled via the new --quiet/-q switch.
We differ slightly from mairix(1) here, as we use stderr
instead of stdout for reporting totals (and we support
parallel queries from various sources).
---
lib/PublicInbox/IPC.pm | 23 +++++++++-------
lib/PublicInbox/LEI.pm | 2 +-
lib/PublicInbox/LeiXSearch.pm | 51 ++++++++++++++++++++++++++---------
lib/PublicInbox/PktOp.pm | 36 +++++++++----------------
t/lei.t | 8 +++---
xt/lei-sigpipe.t | 2 +-
6 files changed, 71 insertions(+), 51 deletions(-)
diff --git a/lib/PublicInbox/IPC.pm b/lib/PublicInbox/IPC.pm
index 689f32d0..50de1bed 100644
--- a/lib/PublicInbox/IPC.pm
+++ b/lib/PublicInbox/IPC.pm
@@ -10,6 +10,7 @@
package PublicInbox::IPC;
use strict;
use v5.10.1;
+use parent qw(Exporter);
use Carp qw(confess croak);
use PublicInbox::DS qw(dwaitpid);
use PublicInbox::Spawn;
@@ -18,6 +19,7 @@ use PublicInbox::WQWorker;
use Socket qw(AF_UNIX MSG_EOR SOCK_STREAM);
my $SEQPACKET = eval { Socket::SOCK_SEQPACKET() }; # portable enough?
use constant PIPE_BUF => $^O eq 'linux' ? 4096 : POSIX::_POSIX_PIPE_BUF();
+our @EXPORT_OK = qw(ipc_freeze ipc_thaw);
my $WQ_MAX_WORKERS = 4096;
my ($enc, $dec);
# ->imports at BEGIN turns sereal_*_with_object into custom ops on 5.14+
@@ -33,12 +35,13 @@ BEGIN {
};
if ($enc && $dec) { # should be custom ops
- *freeze = sub ($) { sereal_encode_with_object $enc, $_[0] };
- *thaw = sub ($) { sereal_decode_with_object $dec, $_[0], my $ret };
+ *ipc_freeze = sub ($) { sereal_encode_with_object $enc, $_[0] };
+ *ipc_thaw = sub ($) { sereal_decode_with_object $dec, $_[0], my $ret };
} else {
eval { # some distros have Storable as a separate package from Perl
require Storable;
- Storable->import(qw(freeze thaw));
+ *ipc_freeze = \&Storable::freeze;
+ *ipc_thaw = \&Storable::thaw;
$enc = 1;
} // warn("Storable (part of Perl) missing: $@\n");
}
@@ -56,12 +59,12 @@ sub _get_rec ($) {
chop($len) eq "\n" or croak "no LF byte in $len";
defined(my $n = read($r, my $buf, $len)) or croak "read error: $!";
$n == $len or croak "short read: $n != $len";
- thaw($buf);
+ ipc_thaw($buf);
}
sub _pack_rec ($) {
my ($ref) = @_;
- my $buf = freeze($ref);
+ my $buf = ipc_freeze($ref);
length($buf) . "\n" . $buf;
}
@@ -275,7 +278,7 @@ sub recv_and_run {
$n = length($buf);
}
# Sereal dies on truncated data, Storable returns undef
- my $args = thaw($buf) // die "thaw error on buffer of size: $n";
+ my $args = ipc_thaw($buf) // die "thaw error on buffer of size: $n";
undef $buf;
my $sub = shift @$args;
eval { $self->$sub(@$args) };
@@ -301,15 +304,15 @@ sub wq_do { # always async
my ($self, $sub, $ios, @args) = @_;
if (my $s1 = $self->{-wq_s1}) { # run in worker
my $fds = [ map { fileno($_) } @$ios ];
- my $n = $send_cmd->($s1, $fds, freeze([$sub, @args]), MSG_EOR);
+ my $buf = ipc_freeze([$sub, @args]);
+ my $n = $send_cmd->($s1, $fds, $buf, MSG_EOR);
return if defined($n); # likely
croak "sendmsg: $! (check RLIMIT_NOFILE)" if $!{ETOOMANYREFS};
croak "sendmsg: $!" if !$!{EMSGSIZE};
socketpair(my $r, my $w, AF_UNIX, SOCK_STREAM, 0) or
croak "socketpair: $!";
- my $buf = freeze([$sub, @args]);
$n = $send_cmd->($s1, [ fileno($r) ],
- freeze(['do_sock_stream', length($buf)]),
+ ipc_freeze(['do_sock_stream', length($buf)]),
MSG_EOR) // croak "sendmsg: $!";
undef $r;
$n = $send_cmd->($w, $fds, $buf, 0) // croak "sendmsg: $!";
@@ -461,6 +464,6 @@ sub DESTROY {
}
# Sereal doesn't have dclone
-sub deep_clone { thaw(freeze($_[-1])) }
+sub deep_clone { ipc_thaw(ipc_freeze($_[-1])) }
1;
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 737db1e1..6c2515dc 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -104,7 +104,7 @@ our %CMD = ( # sorted in order of importance/use:
'q' => [ 'SEARCH_TERMS...', 'search for messages matching terms', qw(
save-as=s output|mfolder|o=s format|f=s dedupe|d=s thread|t augment|a
sort|s=s reverse|r offset=i remote! local! external! pretty
- mua-cmd|mua=s no-torsocks torsocks=s verbose|v
+ mua-cmd|mua=s no-torsocks torsocks=s verbose|v quiet|q
received-after=s received-before=s sent-after=s sent-since=s),
PublicInbox::LeiQuery::curl_opt(), opt_dash('limit|n=i', '[0-9]+') ],
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index e577ab09..95862306 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -8,12 +8,11 @@ package PublicInbox::LeiXSearch;
use strict;
use v5.10.1;
use parent qw(PublicInbox::LeiSearch PublicInbox::IPC);
-use PublicInbox::DS qw(dwaitpid);
-use PublicInbox::PktOp;
+use PublicInbox::DS qw(dwaitpid now);
+use PublicInbox::PktOp qw(pkt_do);
use PublicInbox::Import;
use File::Temp 0.19 (); # 0.19 for ->newdir
use File::Spec ();
-use Socket qw(MSG_EOR);
use PublicInbox::Search qw(xap_terms);
use PublicInbox::Spawn qw(popen_rd spawn which);
use PublicInbox::MID qw(mids);
@@ -97,7 +96,7 @@ sub over {}
sub _mset_more ($$) {
my ($mset, $mo) = @_;
my $size = $mset->size;
- $size && (($mo->{offset} += $size) < ($mo->{limit} // 10000));
+ $size >= $mo->{limit} && (($mo->{offset} += $size) < $mo->{limit});
}
# $startq will EOF when query_prepare is done augmenting and allow
@@ -115,16 +114,15 @@ sub query_thread_mset { # for --thread
my $startq = delete $lei->{startq};
my ($srch, $over) = ($ibxish->search, $ibxish->over);
- unless ($srch && $over) {
- my $desc = $ibxish->{inboxdir} // $ibxish->{topdir};
- warn "$desc not indexed by Xapian\n";
- return;
- }
+ my $desc = $ibxish->{inboxdir} // $ibxish->{topdir};
+ return warn("$desc not indexed by Xapian\n") unless ($srch && $over);
my $mo = { %{$lei->{mset_opt}} };
my $mset;
my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei, $ibxish);
do {
$mset = $srch->mset($mo->{qstr}, $mo);
+ pkt_do($lei->{pkt_op}, 'mset_progress', $desc, $mset->size,
+ $mset->get_matches_estimated);
my $ids = $srch->mset_to_artnums($mset, $mo);
my $ctx = { ids => $ids };
my $i = 0;
@@ -156,6 +154,8 @@ sub query_mset { # non-parallel for non-"--thread" users
my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei, $self);
do {
$mset = $self->mset($mo->{qstr}, $mo);
+ pkt_do($lei->{pkt_op}, 'mset_progress', 'xsearch',
+ $mset->size, $mset->get_matches_estimated);
for my $mitem ($mset->items) {
my $smsg = smsg_for($self, $mitem) or next;
wait_startq($startq) if $startq;
@@ -174,6 +174,16 @@ sub each_eml { # callback for MboxReader->mboxrd
$smsg->{$_} //= '' for qw(from to cc ds subject references mid);
delete @$smsg{qw(From Subject -ds -ts)};
if (my $startq = delete($lei->{startq})) { wait_startq($startq) }
+ ++$lei->{-nr_remote_eml};
+ if (!$lei->{opt}->{quiet}) {
+ my $now = now();
+ my $next = $lei->{-next_progress} //= ($now + 1);
+ if ($now > $next) {
+ $lei->{-next_progress} = $now + 1;
+ my $nr = $lei->{-nr_remote_eml};
+ $lei->err("# $lei->{-current_url} $nr/?");
+ }
+ }
$each_smsg->($smsg, undef, $eml);
}
@@ -223,6 +233,8 @@ sub query_remote_mboxrd {
my $tor = $opt->{torsocks} //= 'auto';
my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei);
for my $uri (@$uris) {
+ $lei->{-current_url} = $uri->as_string;
+ $lei->{-nr_remote_eml} = 0;
$uri->query_form(@qform);
my $cmd = [ @cmd, $uri->as_string ];
if ($tor eq 'auto' && substr($uri->host, -6) eq '.onion' &&
@@ -246,7 +258,12 @@ sub query_remote_mboxrd {
$lei, $each_smsg);
};
return $lei->fail("E: @$cmd: $@") if $@;
- next unless $?;
+ if ($? == 0) {
+ my $nr = $lei->{-nr_remote_eml};
+ pkt_do($lei->{pkt_op}, 'mset_progress',
+ $lei->{-current_url}, $nr, $nr);
+ next;
+ }
seek($cerr, $coff, SEEK_SET) or warn "seek(curl stderr): $!\n";
my $e = do { local $/; <$cerr> } //
die "read(curl stderr): $!\n";
@@ -299,9 +316,19 @@ Error closing $lei->{ovv}->{dst}: $!
}
$lei->start_mua;
}
+ $lei->{opt}->{quiet} or
+ $lei->err('# ', $lei->{-mset_total} // 0, " matches");
$lei->dclose;
}
+sub mset_progress { # called via pkt_op/pkt_do from workers
+ my ($lei, $pargs) = @_;
+ my ($desc, $mset_size, $mset_total_est) = @$pargs;
+ return if $lei->{opt}->{quiet};
+ $lei->{-mset_total} += $mset_size;
+ $lei->err("# $desc $mset_size/$mset_total_est");
+}
+
sub do_post_augment {
my ($lei, $zpipe, $au_done) = @_;
my $l2m = $lei->{l2m} or die 'BUG: no {l2m}';
@@ -354,8 +381,7 @@ sub query_prepare { # called by wq_do
delete $lei->{l2m}->{-wq_s1};
eval { $lei->{l2m}->do_augment($lei) };
$lei->fail($@) if $@;
- send($lei->{pkt_op}, '.', MSG_EOR) == 1 or
- die "do_post_augment trigger: $!"
+ pkt_do($lei->{pkt_op}, '.') == 1 or die "do_post_augment trigger: $!"
}
sub fail_handler ($;$$) {
@@ -388,6 +414,7 @@ sub do_query {
'!' => [ \&fail_handler, $lei ],
'.' => [ \&do_post_augment, $lei, $zpipe, $au_done ],
'' => [ \&query_done, $lei ],
+ 'mset_progress' => [ \&mset_progress, $lei ],
};
(my $op, $lei->{pkt_op}) = PublicInbox::PktOp->pair($ops, $in_loop);
my ($lei_ipc, @io) = $lei->atfork_parent_wq($self);
diff --git a/lib/PublicInbox/PktOp.pm b/lib/PublicInbox/PktOp.pm
index d5b95a73..12839e71 100644
--- a/lib/PublicInbox/PktOp.pm
+++ b/lib/PublicInbox/PktOp.pm
@@ -9,25 +9,16 @@
package PublicInbox::PktOp;
use strict;
use v5.10.1;
-use parent qw(PublicInbox::DS);
+use parent qw(PublicInbox::DS Exporter);
use Errno qw(EAGAIN EINTR);
use PublicInbox::Syscall qw(EPOLLIN EPOLLET);
use Socket qw(AF_UNIX MSG_EOR SOCK_SEQPACKET);
+use PublicInbox::IPC qw(ipc_freeze ipc_thaw);
+our @EXPORT_OK = qw(pkt_do);
sub new {
my ($cls, $r, $ops, $in_loop) = @_;
my $self = bless { sock => $r, ops => $ops, re => [] }, $cls;
- if (ref($ops) eq 'ARRAY') {
- my %ops;
- for my $op (@$ops) {
- if (ref($op->[0])) {
- push @{$self->{re}}, $op;
- } else {
- $ops{$op->[0]} = $op->[1];
- }
- }
- $self->{ops} = \%ops;
- }
if ($in_loop) { # iff using DS->EventLoop
$r->blocking(0);
$self->SUPER::new($r, EPOLLIN|EPOLLET);
@@ -43,6 +34,11 @@ sub pair {
(new($cls, $c, $ops, $in_loop), $p);
}
+sub pkt_do { # for the producer to trigger event_step in consumer
+ my ($producer, $cmd, @args) = @_;
+ send($producer, @args ? "$cmd\0".ipc_freeze(\@args) : $cmd, MSG_EOR);
+}
+
sub close {
my ($self) = @_;
my $c = $self->{sock} or return;
@@ -54,24 +50,18 @@ sub event_step {
my $c = $self->{sock};
my $msg;
do {
- my $n = recv($c, $msg, 128, 0);
+ my $n = recv($c, $msg, 4096, 0);
unless (defined $n) {
return if $! == EAGAIN;
next if $! == EINTR;
$self->close;
die "recv: $!";
}
- my $op = $self->{ops}->{$msg};
- unless ($op) {
- for my $re_op (@{$self->{re}}) {
- $msg =~ $re_op->[0] or next;
- $op = $re_op->[1];
- last;
- }
- }
- die "BUG: unknown message: `$msg'" unless $op;
+ my ($cmd, $pargs) = split(/\0/, $msg, 2);
+ my $op = $self->{ops}->{$cmd // $msg};
+ die "BUG: unknown message: `$cmd'" unless $op;
my ($sub, @args) = @$op;
- $sub->(@args);
+ $sub->(@args, $pargs ? ipc_thaw($pargs) : ());
return $self->close if $msg eq ''; # close on EOF
} while (1);
}
diff --git a/t/lei.t b/t/lei.t
index 3f6702e6..a46e46f2 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -174,11 +174,11 @@ SKIP: {
}
$lei->('add-external', $url);
my $mid = '20140421094015.GA8962@dcvr.yhbt.net';
- ok($lei->('q', "m:$mid"), "query $url");
+ ok($lei->('q', '-q', "m:$mid"), "query $url");
is($err, '', "no errors on $url");
my $res = $json->decode($out);
is($res->[0]->{'m'}, "<$mid>", "got expected mid from $url");
- ok($lei->('q', "m:$mid", 'd:..20101002'), 'no results, no error');
+ ok($lei->('q', '-q', "m:$mid", 'd:..20101002'), 'no results, no error');
is($err, '', 'no output on 404, matching local FS behavior');
is($out, "[null]\n", 'got null results');
$lei->('forget-external', $url);
@@ -291,12 +291,12 @@ my $test_external = sub {
my @s = grep(/^Subject:/, $cat->());
is(scalar(@s), 1, "1 result in mbox$sfx");
$lei->('q', '-a', '-o', "mboxcl2:$f", 's:see attachment');
- is($err, '', 'no errors from augment');
+ is(grep(!/^#/, $err), 0, 'no errors from augment');
@s = grep(/^Subject:/, my @wtf = $cat->());
is(scalar(@s), 2, "2 results in mbox$sfx");
$lei->('q', '-a', '-o', "mboxcl2:$f", 's:nonexistent');
- is($err, '', "no errors on no results ($sfx)");
+ is(grep(!/^#/, $err), 0, "no errors on no results ($sfx)");
my @s2 = grep(/^Subject:/, $cat->());
is_deeply(\@s2, \@s,
diff --git a/xt/lei-sigpipe.t b/xt/lei-sigpipe.t
index 448bd7db..1aa9ed07 100644
--- a/xt/lei-sigpipe.t
+++ b/xt/lei-sigpipe.t
@@ -15,7 +15,7 @@ my $do_test = sub {
pipe(my ($r, $w)) or BAIL_OUT $!;
open my $err, '+>', undef or BAIL_OUT $!;
my $opt = { run_mode => 0, 1 => $w, 2 => $err };
- my $cmd = [qw(lei q -t), @$out, 'bytes:1..'];
+ my $cmd = [qw(lei q -q -t), @$out, 'bytes:1..'];
my $tp = start_script($cmd, $env, $opt);
close $w;
sysread($r, my $buf, 1);
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 04/16] lei q: support --only, --include and --exclude
2021-02-02 11:46 [PATCH 00/16] lei: -I/--include and more Eric Wong
` (2 preceding siblings ...)
2021-02-02 11:46 ` [PATCH 03/16] lei q: emit progress and counting via PktOp Eric Wong
@ 2021-02-02 11:46 ` Eric Wong
2021-02-02 11:46 ` [PATCH 05/16] lei: complete: do not complete non-arg options w/ help text Eric Wong
` (11 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Eric Wong @ 2021-02-02 11:46 UTC (permalink / raw)
To: meta
-I is short for --include since it's standard for C compilers
(along with Perl and Ruby). There are no single-character
shortcuts for --exclude or --only, since I don't expect
--exclude to be used very often and --only is already short (and
will support shell completion).
---
lib/PublicInbox/LEI.pm | 1 +
lib/PublicInbox/LeiExternal.pm | 12 +++++-----
lib/PublicInbox/LeiQuery.pm | 42 ++++++++++++++++++++++++----------
t/lei_external.t | 2 +-
4 files changed, 38 insertions(+), 19 deletions(-)
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 6c2515dc..ffbc2503 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -104,6 +104,7 @@ our %CMD = ( # sorted in order of importance/use:
'q' => [ 'SEARCH_TERMS...', 'search for messages matching terms', qw(
save-as=s output|mfolder|o=s format|f=s dedupe|d=s thread|t augment|a
sort|s=s reverse|r offset=i remote! local! external! pretty
+ include|I=s@ exclude=s@ only=s@
mua-cmd|mua=s no-torsocks torsocks=s verbose|v quiet|q
received-after=s received-before=s sent-after=s sent-since=s),
PublicInbox::LeiQuery::curl_opt(), opt_dash('limit|n=i', '[0-9]+') ],
diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm
index b1176824..3853cfc1 100644
--- a/lib/PublicInbox/LeiExternal.pm
+++ b/lib/PublicInbox/LeiExternal.pm
@@ -9,7 +9,7 @@ use parent qw(Exporter);
our @EXPORT = qw(lei_ls_external lei_add_external lei_forget_external);
use PublicInbox::Config;
-sub _externals_each {
+sub externals_each {
my ($self, $cb, @arg) = @_;
my $cfg = $self->_lei_cfg(0);
my %boost;
@@ -32,14 +32,14 @@ sub _externals_each {
sub lei_ls_external {
my ($self, @argv) = @_;
my ($OFS, $ORS) = $self->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n");
- $self->_externals_each(sub {
+ externals_each($self, sub {
my ($loc, $boost_val) = @_;
$self->out($loc, $OFS, 'boost=', $boost_val, $ORS);
});
}
-sub _canonicalize {
- my ($location) = @_;
+sub ext_canonicalize {
+ my ($location) = $_[-1];
if ($location !~ m!\Ahttps?://!) {
PublicInbox::Config::rel2abs_collapsed($location);
} else {
@@ -56,7 +56,7 @@ sub lei_add_external {
my ($self, $location) = @_;
my $cfg = $self->_lei_cfg(1);
my $new_boost = $self->{opt}->{boost} // 0;
- $location = _canonicalize($location);
+ $location = ext_canonicalize($location);
if ($location !~ m!\Ahttps?://! && !-d $location) {
return $self->fail("$location not a directory");
}
@@ -74,7 +74,7 @@ sub lei_forget_external {
my %seen;
for my $loc (@locations) {
my (@unset, @not_found);
- for my $l ($loc, _canonicalize($loc)) {
+ for my $l ($loc, ext_canonicalize($loc)) {
next if $seen{$l}++;
my $key = "external.$l.boost";
delete($cfg->{$key});
diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm
index dea04c13..fd8a3bca 100644
--- a/lib/PublicInbox/LeiQuery.pm
+++ b/lib/PublicInbox/LeiQuery.pm
@@ -7,6 +7,11 @@ use strict;
use v5.10.1;
use PublicInbox::DS qw(dwaitpid);
+sub prep_ext { # externals_each callback
+ my ($lxs, $exclude, $loc) = @_;
+ $lxs->prepare_external($loc) unless $exclude->{$loc};
+}
+
# the main "lei q SEARCH_TERMS" method
sub lei_q {
my ($self, @argv) = @_;
@@ -14,22 +19,35 @@ sub lei_q {
require PublicInbox::LeiOverview;
PublicInbox::Config->json; # preload before forking
my $opt = $self->{opt};
+ # prepare any number of LeiXSearch || LeiSearch || Inbox || URL
my $lxs = $self->{lxs} = PublicInbox::LeiXSearch->new;
- # any number of LeiXSearch || LeiSearch || Inbox
- if ($opt->{'local'} //= 1) { # --local is enabled by default
+ my @only = @{$opt->{only} // []};
+ # --local is enabled by default unless --only is used
+ # we'll allow "--only $LOCATION --local"
+ if ($opt->{'local'} //= scalar(@only) ? 0 : 1) {
my $sto = $self->_lei_store(1);
$lxs->prepare_external($sto->search);
}
-
- # --external is enabled by default, but allow --no-external
- if ($opt->{external} //= 1) {
- my $cb = $lxs->can('prepare_external');
- my $ne = $self->_externals_each($cb, $lxs);
- $opt->{remote} //= $ne == $lxs->remotes;
- if ($opt->{'local'}) {
- delete($lxs->{remotes}) if !$opt->{remote};
- } else {
- delete($lxs->{locals});
+ if (@only) {
+ for my $loc (@only) {
+ $lxs->prepare_external($self->ext_canonicalize($loc));
+ }
+ } else {
+ for my $loc (@{$opt->{include} // []}) {
+ $lxs->prepare_external($self->ext_canonicalize($loc));
+ }
+ # --external is enabled by default, but allow --no-external
+ if ($opt->{external} //= 1) {
+ my %x = map {;
+ ($self->ext_canonicalize($_), 1)
+ } @{$self->{exclude} // []};
+ my $ne = $self->externals_each(\&prep_ext, $lxs, \%x);
+ $opt->{remote} //= !($lxs->locals - $opt->{'local'});
+ if ($opt->{'local'}) {
+ delete($lxs->{remotes}) if !$opt->{remote};
+ } else {
+ delete($lxs->{locals});
+ }
}
}
unless ($lxs->locals || $lxs->remotes) {
diff --git a/t/lei_external.t b/t/lei_external.t
index 1f0048a1..587990db 100644
--- a/t/lei_external.t
+++ b/t/lei_external.t
@@ -4,7 +4,7 @@ use v5.10.1;
use Test::More;
my $cls = 'PublicInbox::LeiExternal';
require_ok $cls;
-my $canon = $cls->can('_canonicalize');
+my $canon = $cls->can('ext_canonicalize');
my $exp = 'https://example.com/my-inbox/';
is($canon->('https://example.com/my-inbox'), $exp, 'trailing slash added');
is($canon->('https://example.com/my-inbox//'), $exp, 'trailing slash removed');
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 05/16] lei: complete: do not complete non-arg options w/ help text
2021-02-02 11:46 [PATCH 00/16] lei: -I/--include and more Eric Wong
` (3 preceding siblings ...)
2021-02-02 11:46 ` [PATCH 04/16] lei q: support --only, --include and --exclude Eric Wong
@ 2021-02-02 11:46 ` Eric Wong
2021-02-02 11:46 ` [PATCH 06/16] lei: q: shell completion for --(include|exclude|only) Eric Wong
` (10 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Eric Wong @ 2021-02-02 11:46 UTC (permalink / raw)
To: meta
Some of our command-line switches take no arguments, and need
no completion for those arguments.
---
lib/PublicInbox/LEI.pm | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index ffbc2503..b0a8358a 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -737,8 +737,7 @@ sub lei__complete {
my $opt = quotemeta $1;
puts $self, map {
my $v = $OPTDESC{$_};
- $v = $v->[0] if ref($v);
- my @v = split(/\|/, $v);
+ my @v = ref($v) ? split(/\|/, $v->[0]) : ();
# get rid of ALL CAPS placeholder (e.g "OUT")
# (TODO: completion for external paths)
shift(@v) if uc($v[0]) eq $v[0];
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 06/16] lei: q: shell completion for --(include|exclude|only)
2021-02-02 11:46 [PATCH 00/16] lei: -I/--include and more Eric Wong
` (4 preceding siblings ...)
2021-02-02 11:46 ` [PATCH 05/16] lei: complete: do not complete non-arg options w/ help text Eric Wong
@ 2021-02-02 11:46 ` Eric Wong
2021-02-02 11:46 ` [PATCH 07/16] lei_xsearch: truncate curl stderr after reading it Eric Wong
` (9 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Eric Wong @ 2021-02-02 11:46 UTC (permalink / raw)
To: meta
Because .onion URLs names are long!
---
lib/PublicInbox/LEI.pm | 7 +++++++
lib/PublicInbox/LeiQuery.pm | 16 ++++++++++++++++
t/lei.t | 6 ++++++
3 files changed, 29 insertions(+)
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index b0a8358a..bb7efd59 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -229,6 +229,13 @@ my %OPTDESC = (
'q format|f=s' => [
'OUT|maildir|mboxrd|mboxcl2|mboxcl|mboxo|html|json|jsonl|concatjson',
'specify output format, default depends on --output'],
+'q exclude=s@' => [ 'URL_OR_PATHNAME',
+ 'exclude specified external(s) from search' ],
+'q include|I=s@' => [ 'URL_OR_PATHNAME',
+ 'include specified external(s) in search' ],
+'q only=s@' => [ 'URL_OR_PATHNAME',
+ 'only use specified external(s) for search' ],
+
'ls-query format|f=s' => $ls_format,
'ls-external format|f=s' => $ls_format,
diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm
index fd8a3bca..7c1e3606 100644
--- a/lib/PublicInbox/LeiQuery.pm
+++ b/lib/PublicInbox/LeiQuery.pm
@@ -94,6 +94,22 @@ sub lei_q {
$lxs->do_query($self);
}
+# shell completion helper called by lei__complete
+sub _complete_q {
+ my ($self, @argv) = @_;
+ my $ext = qr/\A(?:-I|(?:--(?:include|exclude|only)))\z/;
+ # $argv[-1] =~ $ext and return $self->_complete_forget_external;
+ my @cur;
+ while (@argv) {
+ if ($argv[-1] =~ $ext) {
+ my @c = $self->_complete_forget_external(@cur);
+ return @c if @c;
+ }
+ unshift(@cur, pop @argv);
+ }
+ ();
+}
+
# Stuff we may pass through to curl (as of 7.64.0), see curl manpage for
# details, so most options which make sense for HTTP/HTTPS (including proxy
# support for Tor and other methods of getting past weird networks).
diff --git a/t/lei.t b/t/lei.t
index a46e46f2..33f47ae4 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -232,6 +232,12 @@ my $test_external = sub {
"partial completion for URL $u");
is($out, "https://example.com/ibx/\n",
"completed partial URL $u");
+ for my $qo (qw(-I --include --exclude --only)) {
+ ok($lei->(qw(_complete lei q), $qo, $u),
+ "partial completion for URL q $qo $u");
+ is($out, "https://example.com/ibx/\n",
+ "completed partial URL $u on q $qo");
+ }
}
$lei->('ls-external');
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 07/16] lei_xsearch: truncate curl stderr after reading it
2021-02-02 11:46 [PATCH 00/16] lei: -I/--include and more Eric Wong
` (5 preceding siblings ...)
2021-02-02 11:46 ` [PATCH 06/16] lei: q: shell completion for --(include|exclude|only) Eric Wong
@ 2021-02-02 11:46 ` Eric Wong
2021-02-02 11:46 ` [PATCH 08/16] lib: explicitly distinguish oneshot use Eric Wong
` (8 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Eric Wong @ 2021-02-02 11:46 UTC (permalink / raw)
To: meta
We may have further URLs to read in that process, so ensure
we don't end up having tail send stale data.
---
lib/PublicInbox/LeiXSearch.pm | 1 +
1 file changed, 1 insertion(+)
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 95862306..5cf02136 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -268,6 +268,7 @@ sub query_remote_mboxrd {
my $e = do { local $/; <$cerr> } //
die "read(curl stderr): $!\n";
$coff += length($e);
+ truncate($cerr, 0);
next if (($? >> 8) == 22 && $e =~ /\b404\b/);
$lei->child_error($?);
$uri->query_form(q => $lei->{mset_opt}->{qstr});
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 08/16] lib: explicitly distinguish oneshot use
2021-02-02 11:46 [PATCH 00/16] lei: -I/--include and more Eric Wong
` (6 preceding siblings ...)
2021-02-02 11:46 ` [PATCH 07/16] lei_xsearch: truncate curl stderr after reading it Eric Wong
@ 2021-02-02 11:46 ` Eric Wong
2021-02-02 11:46 ` [PATCH 09/16] lei q: do not leave temporary files after oneshot exit Eric Wong
` (7 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Eric Wong @ 2021-02-02 11:46 UTC (permalink / raw)
To: meta
The daemon must not be fooled into thinking it's in oneshot
after a lei client disconnects and erases {sock}.
---
lib/PublicInbox/LEI.pm | 23 ++++++++++++++---------
lib/PublicInbox/LeiXSearch.pm | 5 ++---
2 files changed, 16 insertions(+), 12 deletions(-)
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index bb7efd59..d6fa814c 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -284,11 +284,13 @@ sub x_it ($$) {
dump_and_clear_log();
if (my $sock = $self->{sock}) {
send($sock, "x_it $code", MSG_EOR);
- } elsif (my $signum = ($code & 127)) { # oneshot, usually SIGPIPE (13)
+ } elsif (!$self->{oneshot}) {
+ return; # client disconnected, noop
+ } elsif (my $signum = ($code & 127)) { # usually SIGPIPE (13)
$SIG{PIPE} = 'DEFAULT'; # $SIG{$signum} doesn't work
kill $signum, $$;
sleep; # wait for signal
- } else { # oneshot
+ } else {
# don't want to end up using $? from child processes
for my $f (qw(lxs l2m)) {
my $wq = delete $self->{$f} or next;
@@ -334,10 +336,9 @@ sub child_error { # passes non-fatal curl exit codes to user
my ($self, $child_error) = @_; # child_error is $?
if (my $sock = $self->{sock}) { # send to lei(1) client
send($sock, "child_error $child_error", MSG_EOR);
- } else { # oneshot
+ } elsif ($self->{oneshot}) {
$self->{child_error} = $child_error;
- }
- undef;
+ } # else noop if client disconnected
}
sub atfork_prepare_wq {
@@ -784,7 +785,7 @@ sub start_mua {
push @cmd, $mfolder unless defined($replaced);
if (my $sock = $self->{sock}) { # lei(1) client process runs it
send($sock, exec_buf(\@cmd, {}), MSG_EOR);
- } else { # oneshot
+ } elsif ($self->{oneshot}) {
$self->{"mua.pid.$self.$$"} = spawn(\@cmd);
}
}
@@ -802,13 +803,16 @@ sub start_pager {
$new_env->{MORE} = 'FRX' if $^O eq 'freebsd';
pipe(my ($r, $wpager)) or return warn "pipe: $!";
my $rdr = { 0 => $r, 1 => $self->{1}, 2 => $self->{2} };
- my $pgr = [ undef, @$rdr{1, 2}, $$ ];
+ my $pgr = [ undef, @$rdr{1, 2} ];
if (my $sock = $self->{sock}) { # lei(1) process runs it
delete @$new_env{keys %$env}; # only set iff unset
my $fds = [ map { fileno($_) } @$rdr{0..2} ];
$send_cmd->($sock, $fds, exec_buf([$pager], $new_env), MSG_EOR);
- } else {
+ } elsif ($self->{oneshot}) {
$pgr->[0] = spawn([$pager], $new_env, $rdr);
+ $pgr->[3] = $$; # ew'll reap it
+ } else {
+ die 'BUG: start_pager w/o socket';
}
$self->{1} = $wpager;
$self->{2} = $wpager if -t $self->{2};
@@ -823,7 +827,7 @@ sub stop_pager {
# do not restore original stdout, just close it so we error out
close(delete($self->{1})) if $self->{1};
my $pid = $pgr->[0];
- dwaitpid($pid, undef, $self->{sock}) if $pid && $pgr->[3] == $$;
+ dwaitpid($pid) if $pid && ($pgr->[3] // 0) == $$;
}
sub accept_dispatch { # Listener {post_accept} callback
@@ -1056,6 +1060,7 @@ sub oneshot {
local %PATH2CFG;
umask(077) // die("umask(077): $!");
my $self = bless {
+ oneshot => 1,
0 => *STDIN{GLOB},
1 => *STDOUT{GLOB},
2 => *STDERR{GLOB},
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 5cf02136..e997431f 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -409,7 +409,6 @@ sub do_query {
fcntl($lei->{startq}, 1031, 4096) if $^O eq 'linux';
$zpipe = $l2m->pre_augment($lei);
}
- my $in_loop = exists $lei->{sock};
my $ops = {
'|' => [ \&sigpipe_handler, $lei ],
'!' => [ \&fail_handler, $lei ],
@@ -417,7 +416,7 @@ sub do_query {
'' => [ \&query_done, $lei ],
'mset_progress' => [ \&mset_progress, $lei ],
};
- (my $op, $lei->{pkt_op}) = PublicInbox::PktOp->pair($ops, $in_loop);
+ (my $op, $lei->{pkt_op}) = PublicInbox::PktOp->pair($ops, !$lei->{oneshot});
my ($lei_ipc, @io) = $lei->atfork_parent_wq($self);
delete($lei->{pkt_op});
@@ -428,7 +427,7 @@ sub do_query {
}
start_query($self, \@io, $lei_ipc);
$self->wq_close(1);
- unless ($in_loop) {
+ if ($lei->{oneshot}) {
# for the $lei_ipc->atfork_child_wq PIPE handler:
while ($op->{sock}) { $op->event_step }
}
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 09/16] lei q: do not leave temporary files after oneshot exit
2021-02-02 11:46 [PATCH 00/16] lei: -I/--include and more Eric Wong
` (7 preceding siblings ...)
2021-02-02 11:46 ` [PATCH 08/16] lib: explicitly distinguish oneshot use Eric Wong
@ 2021-02-02 11:46 ` Eric Wong
2021-02-02 11:46 ` [PATCH 10/16] cmd_ipc4: fix comments and formatting Eric Wong
` (6 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Eric Wong @ 2021-02-02 11:46 UTC (permalink / raw)
To: meta
Avoid on-stack shortcuts which may prevent destructors from
firing since we're not inside the event loop. We'll also tidy
up the unlink mechanism in LeiOverview while we're at it.
---
lib/PublicInbox/LEI.pm | 20 +++++++++++---------
lib/PublicInbox/LeiOverview.pm | 7 +++----
lib/PublicInbox/LeiQuery.pm | 4 ++--
lib/PublicInbox/LeiXSearch.pm | 5 +++--
xt/lei-sigpipe.t | 27 +++++++++++++++++++++++++--
5 files changed, 44 insertions(+), 19 deletions(-)
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index d6fa814c..44afced3 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -284,20 +284,22 @@ sub x_it ($$) {
dump_and_clear_log();
if (my $sock = $self->{sock}) {
send($sock, "x_it $code", MSG_EOR);
- } elsif (!$self->{oneshot}) {
- return; # client disconnected, noop
- } elsif (my $signum = ($code & 127)) { # usually SIGPIPE (13)
- $SIG{PIPE} = 'DEFAULT'; # $SIG{$signum} doesn't work
- kill $signum, $$;
- sleep; # wait for signal
- } else {
+ } elsif ($self->{oneshot}) {
# don't want to end up using $? from child processes
for my $f (qw(lxs l2m)) {
my $wq = delete $self->{$f} or next;
$wq->DESTROY;
}
- $quit->($code >> 8);
- }
+ # cleanup anything that has tempfiles
+ delete @$self{qw(ovv dedupe)};
+ if (my $signum = ($code & 127)) { # usually SIGPIPE (13)
+ $SIG{PIPE} = 'DEFAULT'; # $SIG{$signum} doesn't work
+ kill $signum, $$;
+ sleep; # wait for signal
+ } else {
+ $quit->($code >> 8);
+ }
+ } # else ignore if client disconnected
}
sub err ($;@) {
diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm
index 1d62ffe2..31cc67f1 100644
--- a/lib/PublicInbox/LeiOverview.pm
+++ b/lib/PublicInbox/LeiOverview.pm
@@ -26,16 +26,15 @@ sub _iso8601 ($) { strftime('%Y-%m-%dT%H:%M:%SZ', gmtime($_[0])) }
# we open this in the parent process before ->wq_do handoff
sub ovv_out_lk_init ($) {
my ($self) = @_;
- $self->{tmp_lk_id} = "$self.$$";
my $tmp = File::Temp->new("lei-ovv.dst.$$.lock-XXXXXX",
TMPDIR => 1, UNLINK => 0);
- $self->{lock_path} = $tmp->filename;
+ $self->{"lk_id.$self.$$"} = $self->{lock_path} = $tmp->filename;
}
sub ovv_out_lk_cancel ($) {
my ($self) = @_;
- ($self->{tmp_lk_id}//'') eq "$self.$$" and
- unlink(delete($self->{lock_path}));
+ my $lock_path = delete $self->{"lk_id.$self.$$"} or return;
+ unlink($lock_path);
}
sub detect_fmt ($$) {
diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm
index 7c1e3606..ca214ca1 100644
--- a/lib/PublicInbox/LeiQuery.pm
+++ b/lib/PublicInbox/LeiQuery.pm
@@ -54,7 +54,7 @@ sub lei_q {
return $self->fail('no local or remote inboxes to search');
}
my $xj = $lxs->concurrency($opt);
- my $ovv = PublicInbox::LeiOverview->new($self) or return;
+ PublicInbox::LeiOverview->new($self) or return;
$self->atfork_prepare_wq($lxs);
$lxs->wq_workers_start('lei_xsearch', $xj, $self->oldset);
delete $lxs->{-ipc_atfork_child_close};
@@ -90,7 +90,7 @@ sub lei_q {
# descending docid order
$mset_opt{relevance} //= -2 if $opt->{thread};
$self->{mset_opt} = \%mset_opt;
- $ovv->ovv_begin($self);
+ $self->{ovv}->ovv_begin($self);
$lxs->do_query($self);
}
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index e997431f..b3cace74 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -387,8 +387,9 @@ sub query_prepare { # called by wq_do
sub fail_handler ($;$$) {
my ($lei, $code, $io) = @_;
- if (my $lxs = delete $lei->{lxs}) {
- $lxs->wq_wait_old($lei) if $lxs->wq_kill_old; # lei-daemon
+ for my $f (qw(lxs l2m)) {
+ my $wq = delete $lei->{$f} or next;
+ $wq->wq_wait_old($lei) if $wq->wq_kill_old; # lei-daemon
}
close($io) if $io; # needed to avoid warnings on SIGPIPE
$lei->x_it($code // (1 >> 8));
diff --git a/xt/lei-sigpipe.t b/xt/lei-sigpipe.t
index 1aa9ed07..ba2d23c8 100644
--- a/xt/lei-sigpipe.t
+++ b/xt/lei-sigpipe.t
@@ -29,7 +29,30 @@ my $do_test = sub {
}
};
-$do_test->();
-$do_test->({XDG_RUNTIME_DIR => '/dev/null'});
+my ($tmp, $for_destroy) = tmpdir();
+my $pid;
+my $opt = { run_mode => 0, 1 => \(my $out = '') };
+if (run_script([qw(lei daemon-pid)], undef, $opt)) {
+ chomp($pid = $out);
+ mkdir "$tmp/d" or BAIL_OUT $!;
+ local $ENV{TMPDIR} = "$tmp/d";
+ $do_test->();
+ $out = '';
+ ok(run_script([qw(lei daemon-pid)], undef, $opt), 'daemon-pid again');
+ chomp($out);
+ is($out, $pid, 'daemon-pid unchanged');
+ ok(kill(0, $pid), 'daemon still running');
+ $out = '';
+}
+{
+ mkdir "$tmp/1" or BAIL_OUT $!;
+ local $ENV{TMPDIR} = "$tmp/1";
+ $do_test->({XDG_RUNTIME_DIR => '/dev/null'});
+ is(unlink(glob("$tmp/1/*")), 0, 'nothing left over w/ oneshot');
+}
+
+# the one-shot test should be slow enough that the daemon has cleaned
+# up in the background:
+is_deeply([glob("$tmp/d/*")], [], 'nothing left over with daemon');
done_testing;
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 10/16] cmd_ipc4: fix comments and formatting
2021-02-02 11:46 [PATCH 00/16] lei: -I/--include and more Eric Wong
` (8 preceding siblings ...)
2021-02-02 11:46 ` [PATCH 09/16] lei q: do not leave temporary files after oneshot exit Eric Wong
@ 2021-02-02 11:46 ` Eric Wong
2021-02-02 11:46 ` [PATCH 11/16] pktop: fix potential undefined var Eric Wong
` (5 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Eric Wong @ 2021-02-02 11:46 UTC (permalink / raw)
To: meta
---
lib/PublicInbox/CmdIPC4.pm | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/lib/PublicInbox/CmdIPC4.pm b/lib/PublicInbox/CmdIPC4.pm
index c244f6a1..74dbf8a1 100644
--- a/lib/PublicInbox/CmdIPC4.pm
+++ b/lib/PublicInbox/CmdIPC4.pm
@@ -3,7 +3,7 @@
# callers should use PublicInbox::CmdIPC4->can('send_cmd4') (or recv_cmd4)
# first choice for script/lei front-end and 2nd choice for lei backend
-# libsocket-msghdr-perl is in Debian but many other distros as of 2021.
+# libsocket-msghdr-perl is in Debian but not many other distros as of 2021.
package PublicInbox::CmdIPC4;
use strict;
use v5.10.1;
@@ -12,12 +12,11 @@ BEGIN { eval {
require Socket::MsgHdr; # XS
no warnings 'once';
-# 3 FDs per-sendmsg(2) + buffer
+# any number of FDs per-sendmsg(2) + buffer
*send_cmd4 = sub ($$$$) { # (sock, fds, buf, flags) = @_;
my ($sock, $fds, undef, $flags) = @_;
my $mh = Socket::MsgHdr->new(buf => $_[2]);
- $mh->cmsghdr(SOL_SOCKET, SCM_RIGHTS,
- pack('i' x scalar(@$fds), @$fds));
+ $mh->cmsghdr(SOL_SOCKET, SCM_RIGHTS, pack('i' x scalar(@$fds), @$fds));
Socket::MsgHdr::sendmsg($sock, $mh, $flags);
};
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 11/16] pktop: fix potential undefined var
2021-02-02 11:46 [PATCH 00/16] lei: -I/--include and more Eric Wong
` (9 preceding siblings ...)
2021-02-02 11:46 ` [PATCH 10/16] cmd_ipc4: fix comments and formatting Eric Wong
@ 2021-02-02 11:46 ` Eric Wong
2021-02-02 11:46 ` [PATCH 12/16] lei_xsearch: ensure curl.err and tail(1) cleanup happens Eric Wong
` (4 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Eric Wong @ 2021-02-02 11:46 UTC (permalink / raw)
To: meta
In case we have other bugs in our code.
---
lib/PublicInbox/PktOp.pm | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/PublicInbox/PktOp.pm b/lib/PublicInbox/PktOp.pm
index 12839e71..59b37ff8 100644
--- a/lib/PublicInbox/PktOp.pm
+++ b/lib/PublicInbox/PktOp.pm
@@ -58,7 +58,7 @@ sub event_step {
die "recv: $!";
}
my ($cmd, $pargs) = split(/\0/, $msg, 2);
- my $op = $self->{ops}->{$cmd // $msg};
+ my $op = $self->{ops}->{$cmd //= $msg};
die "BUG: unknown message: `$cmd'" unless $op;
my ($sub, @args) = @$op;
$sub->(@args, $pargs ? ipc_thaw($pargs) : ());
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 12/16] lei_xsearch: ensure curl.err and tail(1) cleanup happens
2021-02-02 11:46 [PATCH 00/16] lei: -I/--include and more Eric Wong
` (10 preceding siblings ...)
2021-02-02 11:46 ` [PATCH 11/16] pktop: fix potential undefined var Eric Wong
@ 2021-02-02 11:46 ` Eric Wong
2021-02-02 11:46 ` [PATCH 13/16] doc: lei-q: note "-a" and link to Xapian QueryParser Eric Wong
` (3 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Eric Wong @ 2021-02-02 11:46 UTC (permalink / raw)
To: meta
We can safely rely on exit(0) here when interacting with curl(1)
and git(1), unlike query workers which hit Xapian directly,
where some badness happens when hit with a signal while
retrieving an mset.
---
lib/PublicInbox/LeiXSearch.pm | 1 +
1 file changed, 1 insertion(+)
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index b3cace74..e207f0fc 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -198,6 +198,7 @@ sub query_remote_mboxrd {
my ($self, $lei, $uris) = @_;
local $0 = "$0 query_remote_mboxrd";
$lei->atfork_child_wq($self);
+ local $SIG{TERM} = sub { exit(0) }; # for DESTROY (File::Temp, $reap)
my ($opt, $env) = @$lei{qw(opt env)};
my @qform = (q => $lei->{mset_opt}->{qstr}, x => 'm');
push(@qform, t => 1) if $opt->{thread};
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 13/16] doc: lei-q: note "-a" and link to Xapian QueryParser
2021-02-02 11:46 [PATCH 00/16] lei: -I/--include and more Eric Wong
` (11 preceding siblings ...)
2021-02-02 11:46 ` [PATCH 12/16] lei_xsearch: ensure curl.err and tail(1) cleanup happens Eric Wong
@ 2021-02-02 11:46 ` Eric Wong
2021-02-02 11:47 ` [PATCH 14/16] lei_overview: avoid unnecessary {l2m} delete Eric Wong
` (2 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Eric Wong @ 2021-02-02 11:46 UTC (permalink / raw)
To: meta
"-a" is supported by mairix, too. We should also note somewhere
the query parsing features supported by Xapian.
---
Documentation/lei-q.pod | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/Documentation/lei-q.pod b/Documentation/lei-q.pod
index e307e020..5c0ca843 100644
--- a/Documentation/lei-q.pod
+++ b/Documentation/lei-q.pod
@@ -43,7 +43,7 @@ For a subset of MUAs known to accept a mailbox via C<-f>, COMMAND can
be abbreviated to the name of the program: C<mutt>, C<mailx>, C<mail>,
or C<neomutt>.
-=item --augment
+=item -a, --augment
Augment output destination instead of clobbering it.
@@ -124,4 +124,5 @@ License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
=head1 SEE ALSO
-L<lei-add-external(1)>
+L<lei-add-external(1)>,
+L<Xapian::QueryParser Syntax|https://xapian.org/docs/queryparser.html>
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 14/16] lei_overview: avoid unnecessary {l2m} delete
2021-02-02 11:46 [PATCH 00/16] lei: -I/--include and more Eric Wong
` (12 preceding siblings ...)
2021-02-02 11:46 ` [PATCH 13/16] doc: lei-q: note "-a" and link to Xapian QueryParser Eric Wong
@ 2021-02-02 11:47 ` Eric Wong
2021-02-02 11:47 ` [PATCH 15/16] lei q: tidy up progress reporting Eric Wong
2021-02-02 11:47 ` [PATCH 16/16] lei q: support --jobs [SEARCHERS],[WRITERS] Eric Wong
15 siblings, 0 replies; 17+ messages in thread
From: Eric Wong @ 2021-02-02 11:47 UTC (permalink / raw)
To: meta
We may reuse these objects in the non-worker code paths.
---
lib/PublicInbox/LeiOverview.pm | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm
index 31cc67f1..ff15d295 100644
--- a/lib/PublicInbox/LeiOverview.pm
+++ b/lib/PublicInbox/LeiOverview.pm
@@ -147,10 +147,8 @@ sub _unbless_smsg {
sub ovv_atexit_child {
my ($self, $lei) = @_;
- if (my $l2m = delete $lei->{l2m}) {
- # gracefully stop lei2mail processes after all
- # ->write_mail work is complete
- delete $l2m->{-wq_s1};
+ if (my $l2m = $lei->{l2m}) {
+ # wait for ->write_mail work we submitted to lei2mail
if (my $rd = delete $l2m->{each_smsg_done}) {
read($rd, my $buf, 1); # wait for EOF
}
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 15/16] lei q: tidy up progress reporting
2021-02-02 11:46 [PATCH 00/16] lei: -I/--include and more Eric Wong
` (13 preceding siblings ...)
2021-02-02 11:47 ` [PATCH 14/16] lei_overview: avoid unnecessary {l2m} delete Eric Wong
@ 2021-02-02 11:47 ` Eric Wong
2021-02-02 11:47 ` [PATCH 16/16] lei q: support --jobs [SEARCHERS],[WRITERS] Eric Wong
15 siblings, 0 replies; 17+ messages in thread
From: Eric Wong @ 2021-02-02 11:47 UTC (permalink / raw)
To: meta
We won't be reporting progress when output is going to stdout
since it can clutter up the terminal unless stderr != stdout,
which probably isn't worth checking.
We'll also use a more agnostic mset_progress which may
make it easier to support worker-less invocations.
---
lib/PublicInbox/LEI.pm | 1 +
lib/PublicInbox/LeiOverview.pm | 2 ++
lib/PublicInbox/LeiXSearch.pm | 34 +++++++++++++++++++---------------
3 files changed, 22 insertions(+), 15 deletions(-)
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 44afced3..2c512c5e 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -871,6 +871,7 @@ sub accept_dispatch { # Listener {post_accept} callback
sub dclose {
my ($self) = @_;
+ delete $self->{-progress};
for my $f (qw(lxs l2m)) {
my $wq = delete $self->{$f} or next;
if ($wq->wq_kill) {
diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm
index ff15d295..52da225d 100644
--- a/lib/PublicInbox/LeiOverview.pm
+++ b/lib/PublicInbox/LeiOverview.pm
@@ -90,6 +90,8 @@ sub new {
} else {
ovv_out_lk_init($self);
}
+ } elsif (!$opt->{quiet}) {
+ $lei->{-progress} = 1;
}
if ($json) {
$lei->{dedupe} //= PublicInbox::LeiDedupe->new($lei);
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index e207f0fc..57a18075 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -107,6 +107,19 @@ sub wait_startq ($) {
read($startq, my $query_prepare_done, 1);
}
+sub mset_progress {
+ my $lei = shift;
+ return unless $lei->{-progress};
+ if ($lei->{pkt_op}) { # called via pkt_op/pkt_do from workers
+ pkt_do($lei->{pkt_op}, 'mset_progress', @_);
+ } else { # single lei-daemon consumer
+ my @args = ref($_[-1]) eq 'ARRAY' ? @{$_[-1]} : @_;
+ my ($desc, $mset_size, $mset_total_est) = @args;
+ $lei->{-mset_total} += $mset_size;
+ $lei->err("# $desc $mset_size/$mset_total_est");
+ }
+}
+
sub query_thread_mset { # for --thread
my ($self, $lei, $ibxish) = @_;
local $0 = "$0 query_thread_mset";
@@ -121,7 +134,7 @@ sub query_thread_mset { # for --thread
my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei, $ibxish);
do {
$mset = $srch->mset($mo->{qstr}, $mo);
- pkt_do($lei->{pkt_op}, 'mset_progress', $desc, $mset->size,
+ mset_progress($lei, $desc, $mset->size,
$mset->get_matches_estimated);
my $ids = $srch->mset_to_artnums($mset, $mo);
my $ctx = { ids => $ids };
@@ -154,7 +167,7 @@ sub query_mset { # non-parallel for non-"--thread" users
my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei, $self);
do {
$mset = $self->mset($mo->{qstr}, $mo);
- pkt_do($lei->{pkt_op}, 'mset_progress', 'xsearch',
+ mset_progress($lei, 'xsearch', $mset->size,
$mset->size, $mset->get_matches_estimated);
for my $mitem ($mset->items) {
my $smsg = smsg_for($self, $mitem) or next;
@@ -174,8 +187,8 @@ sub each_eml { # callback for MboxReader->mboxrd
$smsg->{$_} //= '' for qw(from to cc ds subject references mid);
delete @$smsg{qw(From Subject -ds -ts)};
if (my $startq = delete($lei->{startq})) { wait_startq($startq) }
- ++$lei->{-nr_remote_eml};
- if (!$lei->{opt}->{quiet}) {
+ if ($lei->{-progress}) {
+ ++$lei->{-nr_remote_eml};
my $now = now();
my $next = $lei->{-next_progress} //= ($now + 1);
if ($now > $next) {
@@ -261,8 +274,7 @@ sub query_remote_mboxrd {
return $lei->fail("E: @$cmd: $@") if $@;
if ($? == 0) {
my $nr = $lei->{-nr_remote_eml};
- pkt_do($lei->{pkt_op}, 'mset_progress',
- $lei->{-current_url}, $nr, $nr);
+ mset_progress($lei, $lei->{-current_url}, $nr, $nr);
next;
}
seek($cerr, $coff, SEEK_SET) or warn "seek(curl stderr): $!\n";
@@ -318,19 +330,11 @@ Error closing $lei->{ovv}->{dst}: $!
}
$lei->start_mua;
}
- $lei->{opt}->{quiet} or
+ $lei->{-progress} and
$lei->err('# ', $lei->{-mset_total} // 0, " matches");
$lei->dclose;
}
-sub mset_progress { # called via pkt_op/pkt_do from workers
- my ($lei, $pargs) = @_;
- my ($desc, $mset_size, $mset_total_est) = @$pargs;
- return if $lei->{opt}->{quiet};
- $lei->{-mset_total} += $mset_size;
- $lei->err("# $desc $mset_size/$mset_total_est");
-}
-
sub do_post_augment {
my ($lei, $zpipe, $au_done) = @_;
my $l2m = $lei->{l2m} or die 'BUG: no {l2m}';
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 16/16] lei q: support --jobs [SEARCHERS],[WRITERS]
2021-02-02 11:46 [PATCH 00/16] lei: -I/--include and more Eric Wong
` (14 preceding siblings ...)
2021-02-02 11:47 ` [PATCH 15/16] lei q: tidy up progress reporting Eric Wong
@ 2021-02-02 11:47 ` Eric Wong
15 siblings, 0 replies; 17+ messages in thread
From: Eric Wong @ 2021-02-02 11:47 UTC (permalink / raw)
To: meta
This comma-delimited parameter allows controlling the number or
lei_xsearch and lei2mail worker processes. With the change
to make IPC wq_* work use the event loop, it's now safe to
run fewer worker processes for searching with no risk of
deadlocks.
MAX_PER_HOST isn't configurable yet for remote hosts,
and maybe it shouldn't be due to potential for abuse.
---
lib/PublicInbox/IPC.pm | 19 +++++++++++++++++++
lib/PublicInbox/LEI.pm | 5 ++++-
lib/PublicInbox/LeiQuery.pm | 14 ++++++++++++--
lib/PublicInbox/LeiXSearch.pm | 1 -
lib/PublicInbox/V2Writable.pm | 22 ++--------------------
5 files changed, 37 insertions(+), 24 deletions(-)
diff --git a/lib/PublicInbox/IPC.pm b/lib/PublicInbox/IPC.pm
index 50de1bed..3873649b 100644
--- a/lib/PublicInbox/IPC.pm
+++ b/lib/PublicInbox/IPC.pm
@@ -466,4 +466,23 @@ sub DESTROY {
# Sereal doesn't have dclone
sub deep_clone { ipc_thaw(ipc_freeze($_[-1])) }
+sub detect_nproc () {
+ # _SC_NPROCESSORS_ONLN = 84 on both Linux glibc and musl
+ return POSIX::sysconf(84) if $^O eq 'linux';
+ return POSIX::sysconf(58) if $^O eq 'freebsd';
+ # TODO: more OSes
+
+ # getconf(1) is POSIX, but *NPROCESSORS* vars are not
+ for (qw(_NPROCESSORS_ONLN NPROCESSORS_ONLN)) {
+ `getconf $_ 2>/dev/null` =~ /^(\d+)$/ and return $1;
+ }
+ for my $nproc (qw(nproc gnproc)) { # GNU coreutils nproc
+ `$nproc 2>/dev/null` =~ /^(\d+)$/ and return $1;
+ }
+
+ # should we bother with `sysctl hw.ncpu`? Those only give
+ # us total processor count, not online processor count.
+ undef
+}
+
1;
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 2c512c5e..9afc90cf 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -104,7 +104,7 @@ our %CMD = ( # sorted in order of importance/use:
'q' => [ 'SEARCH_TERMS...', 'search for messages matching terms', qw(
save-as=s output|mfolder|o=s format|f=s dedupe|d=s thread|t augment|a
sort|s=s reverse|r offset=i remote! local! external! pretty
- include|I=s@ exclude=s@ only=s@
+ include|I=s@ exclude=s@ only=s@ jobs|j=s
mua-cmd|mua=s no-torsocks torsocks=s verbose|v quiet|q
received-after=s received-before=s sent-after=s sent-since=s),
PublicInbox::LeiQuery::curl_opt(), opt_dash('limit|n=i', '[0-9]+') ],
@@ -236,6 +236,9 @@ my %OPTDESC = (
'q only=s@' => [ 'URL_OR_PATHNAME',
'only use specified external(s) for search' ],
+'q jobs=s' => [ '[SEARCH_JOBS][,WRITER_JOBS]',
+ 'control number of search and writer jobs' ],
+
'ls-query format|f=s' => $ls_format,
'ls-external format|f=s' => $ls_format,
diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm
index ca214ca1..72a67c24 100644
--- a/lib/PublicInbox/LeiQuery.pm
+++ b/lib/PublicInbox/LeiQuery.pm
@@ -17,6 +17,7 @@ sub lei_q {
my ($self, @argv) = @_;
require PublicInbox::LeiXSearch;
require PublicInbox::LeiOverview;
+ require PublicInbox::V2Writable;
PublicInbox::Config->json; # preload before forking
my $opt = $self->{opt};
# prepare any number of LeiXSearch || LeiSearch || Inbox || URL
@@ -53,13 +54,22 @@ sub lei_q {
unless ($lxs->locals || $lxs->remotes) {
return $self->fail('no local or remote inboxes to search');
}
- my $xj = $lxs->concurrency($opt);
+ my ($xj, $mj) = split(/,/, $opt->{jobs} // '');
+ if (defined($xj) && $xj ne '' && $xj !~ /\A[1-9][0-9]*\z/) {
+ return $self->fail("`$xj' search jobs must be >= 1");
+ }
+ $xj ||= $lxs->concurrency($opt); # allow: "--jobs ,$WRITER_ONLY"
+ my $nproc = $lxs->detect_nproc; # don't memoize, schedtool(1) exists
+ $xj = $nproc if $xj > $nproc;
PublicInbox::LeiOverview->new($self) or return;
$self->atfork_prepare_wq($lxs);
$lxs->wq_workers_start('lei_xsearch', $xj, $self->oldset);
delete $lxs->{-ipc_atfork_child_close};
if (my $l2m = $self->{l2m}) {
- my $mj = 4; # TODO: configurable
+ if (defined($mj) && $mj !~ /\A[1-9][0-9]*\z/) {
+ return $self->fail("`$mj' writer jobs must be >= 1");
+ }
+ $mj //= $nproc;
$self->atfork_prepare_wq($l2m);
$l2m->wq_workers_start('lei2mail', $mj, $self->oldset);
delete $l2m->{-ipc_atfork_child_close};
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 57a18075..37bd233e 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -350,7 +350,6 @@ sub do_post_augment {
}
my $MAX_PER_HOST = 4;
-sub MAX_PER_HOST { $MAX_PER_HOST }
sub concurrency {
my ($self, $opt) = @_;
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 35b7fe30..cbd4f003 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -8,6 +8,7 @@ use strict;
use v5.10.1;
use parent qw(PublicInbox::Lock);
use PublicInbox::SearchIdxShard;
+use PublicInbox::IPC;
use PublicInbox::Eml;
use PublicInbox::Git;
use PublicInbox::Import;
@@ -35,32 +36,13 @@ our $PACKING_FACTOR = 0.4;
# to increase Xapian shards
our $NPROC_MAX_DEFAULT = 4;
-sub detect_nproc () {
- # _SC_NPROCESSORS_ONLN = 84 on both Linux glibc and musl
- return POSIX::sysconf(84) if $^O eq 'linux';
- return POSIX::sysconf(58) if $^O eq 'freebsd';
- # TODO: more OSes
-
- # getconf(1) is POSIX, but *NPROCESSORS* vars are not
- for (qw(_NPROCESSORS_ONLN NPROCESSORS_ONLN)) {
- `getconf $_ 2>/dev/null` =~ /^(\d+)$/ and return $1;
- }
- for my $nproc (qw(nproc gnproc)) { # GNU coreutils nproc
- `$nproc 2>/dev/null` =~ /^(\d+)$/ and return $1;
- }
-
- # should we bother with `sysctl hw.ncpu`? Those only give
- # us total processor count, not online processor count.
- undef
-}
-
sub nproc_shards ($) {
my ($creat_opt) = @_;
my $n = $creat_opt->{nproc} if ref($creat_opt) eq 'HASH';
$n //= $ENV{NPROC};
if (!$n) {
# assume 2 cores if not detectable or zero
- state $NPROC_DETECTED = detect_nproc() || 2;
+ state $NPROC_DETECTED = PublicInbox::IPC::detect_nproc() || 2;
$n = $NPROC_DETECTED;
$n = $NPROC_MAX_DEFAULT if $n > $NPROC_MAX_DEFAULT;
}
^ permalink raw reply related [flat|nested] 17+ messages in thread