From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-3.4 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, NUMERIC_HTTP_ADDR,WEIRD_PORT shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 9F56920068 for ; Wed, 3 Feb 2021 08:11:44 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 05/11] lei: propagate curl errors, improve internal consistency Date: Tue, 2 Feb 2021 22:11:37 -1000 Message-Id: <20210203081143.24424-6-e@80x24.org> In-Reply-To: <20210203081143.24424-1-e@80x24.org> References: <20210203081143.24424-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: IO::Uncompress::Gunzip seems to be losing $? when closing PublicInbox::ProcessPipe. To workaround this, do a synchronous waitpid ourselves to force proper $? reporting update tests to use the new --only feature for testing invalid URLs. This improves internal code consistency by having {pkt_op} parse the same ASCII-only protocol script/lei understands. We no longer pass {sock} to worker processes at all, further reducing FD pressure on per-user limits. --- lib/PublicInbox/LEI.pm | 15 ++++++++------- lib/PublicInbox/LeiOverview.pm | 2 -- lib/PublicInbox/LeiXSearch.pm | 16 +++++++--------- lib/PublicInbox/PktOp.pm | 15 +++++++++++---- t/lei.t | 29 ++++++++++++++++------------- 5 files changed, 42 insertions(+), 35 deletions(-) diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 9b4d4e0b..05a39cad 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -285,8 +285,8 @@ sub x_it ($$) { # make sure client sees stdout before exit $self->{1}->autoflush(1) if $self->{1}; dump_and_clear_log(); - if (my $sock = $self->{sock}) { - send($sock, "x_it $code", MSG_EOR); + if (my $s = $self->{pkt_op} // $self->{sock}) { + send($s, "x_it $code", MSG_EOR); } elsif ($self->{oneshot}) { # don't want to end up using $? from child processes for my $f (qw(lxs l2m)) { @@ -339,9 +339,10 @@ sub puts ($;@) { out(shift, map { "$_\n" } @_) } sub child_error { # passes non-fatal curl exit codes to user my ($self, $child_error) = @_; # child_error is $? - if (my $sock = $self->{sock}) { # send to lei(1) client - send($sock, "child_error $child_error", MSG_EOR); - } elsif ($self->{oneshot}) { + if (my $s = $self->{pkt_op} // $self->{sock}) { + # send to the parent lei-daemon or to lei(1) client + send($s, "child_error $child_error", MSG_EOR); + } elsif (!$PublicInbox::DS::in_loop) { $self->{child_error} = $child_error; } # else noop if client disconnected } @@ -420,9 +421,9 @@ sub atfork_parent_wq { $lei->{$f} = $wq->deep_clone($tmp); } $self->{env} = $env; - delete @$lei{qw(3 -lei_store cfg old_1 pgr lxs)}; # keep l2m + delete @$lei{qw(sock 3 -lei_store cfg old_1 pgr lxs)}; # keep l2m my @io = (delete(@$lei{qw(0 1 2)}), - io_extract($lei, qw(sock pkt_op startq))); + io_extract($lei, qw(pkt_op startq))); my $l2m = $lei->{l2m}; if ($l2m && $l2m != $wq) { # $wq == lxs if (my $wq_s1 = $l2m->{-wq_s1}) { diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm index 366af8b2..88034ada 100644 --- a/lib/PublicInbox/LeiOverview.pm +++ b/lib/PublicInbox/LeiOverview.pm @@ -216,9 +216,7 @@ sub ovv_each_smsg_cb { # runs in wq worker usually $wcb->(undef, $smsg, $eml); }; } elsif ($l2m && $l2m->{-wq_s1}) { - my $sock = delete $lei->{sock}; # lei2mail doesn't need it my ($lei_ipc, @io) = $lei->atfork_parent_wq($l2m); - $lei->{sock} = $sock if $sock; # $io[0] becomes a notification pipe that triggers EOF # in this wq worker when all outstanding ->write_mail # calls are complete diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index 23a9c020..d33064bb 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -113,8 +113,7 @@ sub mset_progress { if ($lei->{pkt_op}) { # called via pkt_op/pkt_do from workers pkt_do($lei->{pkt_op}, 'mset_progress', @_); } else { # single lei-daemon consumer - my @args = ref($_[-1]) eq 'ARRAY' ? @{$_[-1]} : @_; - my ($desc, $mset_size, $mset_total_est) = @args; + my ($desc, $mset_size, $mset_total_est) = @_; $lei->{-mset_total} += $mset_size; $lei->err("# $desc $mset_size/$mset_total_est"); } @@ -264,14 +263,11 @@ sub query_remote_mboxrd { shift(@$cmd) if !$cmd->[0]; $lei->err("# @$cmd") if $verbose; - $? = 0; - my $fh = popen_rd($cmd, $env, $rdr); + my ($fh, $pid) = popen_rd($cmd, $env, $rdr); $fh = IO::Uncompress::Gunzip->new($fh); - eval { - PublicInbox::MboxReader->mboxrd($fh, \&each_eml, $self, - $lei, $each_smsg); - }; - return $lei->fail("E: @$cmd: $@") if $@; + PublicInbox::MboxReader->mboxrd($fh, \&each_eml, $self, + $lei, $each_smsg); + waitpid($pid, 0) == $pid or die "BUG: waitpid (curl): $!"; if ($? == 0) { my $nr = $lei->{-nr_remote_eml}; mset_progress($lei, $lei->{-current_url}, $nr, $nr); @@ -420,6 +416,8 @@ sub do_query { '.' => [ \&do_post_augment, $lei, $zpipe, $au_done ], '' => [ \&query_done, $lei ], 'mset_progress' => [ \&mset_progress, $lei ], + 'x_it' => [ $lei->can('x_it'), $lei ], + 'child_error' => [ $lei->can('child_error'), $lei ], }; (my $op, $lei->{pkt_op}) = PublicInbox::PktOp->pair($ops); my ($lei_ipc, @io) = $lei->atfork_parent_wq($self); diff --git a/lib/PublicInbox/PktOp.pm b/lib/PublicInbox/PktOp.pm index 40c7262a..10d76da0 100644 --- a/lib/PublicInbox/PktOp.pm +++ b/lib/PublicInbox/PktOp.pm @@ -4,8 +4,7 @@ # op dispatch socket, reads a message, runs a sub # There may be multiple producers, but (for now) only one consumer # Used for lei_xsearch and maybe other things -# "literal" => [ sub, @operands ] -# /regexp/ => [ sub, @operands ] +# "command" => [ $sub, @fixed_operands ] package PublicInbox::PktOp; use strict; use v5.10.1; @@ -57,11 +56,19 @@ sub event_step { $self->close; die "recv: $!"; } - my ($cmd, $pargs) = split(/\0/, $msg, 2); + my ($cmd, @pargs); + if (index($msg, "\0") > 0) { + ($cmd, my $pargs) = split(/\0/, $msg, 2); + @pargs = @{ipc_thaw($pargs)}; + } else { + # for compatibility with the script/lei in client mode, + # it doesn't load Sereal||Storable for startup speed + ($cmd, @pargs) = split(/ /, $msg); + } my $op = $self->{ops}->{$cmd //= $msg}; die "BUG: unknown message: `$cmd'" unless $op; my ($sub, @args) = @$op; - $sub->(@args, $pargs ? ipc_thaw($pargs) : ()); + $sub->(@args, @pargs); return $self->close if $msg eq ''; # close on EOF } while (1); } diff --git a/t/lei.t b/t/lei.t index 33f47ae4..461669a8 100644 --- a/t/lei.t +++ b/t/lei.t @@ -14,6 +14,7 @@ require_mods(qw(json DBD::SQLite Search::Xapian)); my $opt = { 1 => \(my $out = ''), 2 => \(my $err = '') }; my ($home, $for_destroy) = tmpdir(); my $err_filter; +my $curl = which('curl'); my @onions = qw(http://hjrcffqmbrq6wope.onion/meta/ http://czquwvybam4bgbro.onion/meta/ http://ou63pmih66umazou.onion/meta/); @@ -39,7 +40,7 @@ local $ENV{XDG_RUNTIME_DIR} = "$home/xdg_run"; local $ENV{HOME} = $home; local $ENV{FOO} = 'BAR'; mkdir "$home/xdg_run", 0700 or BAIL_OUT "mkdir: $!"; -my $home_trash = [ "$home/.local", "$home/.config" ]; +my $home_trash = [ "$home/.local", "$home/.config", "$home/junk" ]; my $cleanup = sub { rmtree([@$home_trash, @_]) }; my $config_file = "$home/.config/lei/config"; my $store_dir = "$home/.local/share/lei"; @@ -162,26 +163,19 @@ my $setup_publicinboxes = sub { my $test_external_remote = sub { my ($url, $k) = @_; SKIP: { - my $nr = 4; + my $nr = 5; skip "$k unset", $nr if !$url; - which('curl') or skip 'no curl', $nr; + $curl or skip 'no curl', $nr; which('torsocks') or skip 'no torsocks', $nr if $url =~ m!\.onion/!; - $lei->('ls-external'); - for my $e (split(/^/ms, $out)) { - $e =~ s/\s+boost.*//s; - $lei->('forget-external', '-q', $e) or - fail "error forgetting $e: $err" - } - $lei->('add-external', $url); my $mid = '20140421094015.GA8962@dcvr.yhbt.net'; - ok($lei->('q', '-q', "m:$mid"), "query $url"); + my @cmd = ('q', '--only', $url, '-q', "m:$mid"); + ok($lei->(@cmd), "query $url"); is($err, '', "no errors on $url"); my $res = $json->decode($out); is($res->[0]->{'m'}, "<$mid>", "got expected mid from $url"); - ok($lei->('q', '-q', "m:$mid", 'd:..20101002'), 'no results, no error'); + ok($lei->(@cmd, 'd:..20101002'), 'no results, no error'); is($err, '', 'no output on 404, matching local FS behavior'); is($out, "[null]\n", 'got null results'); - $lei->('forget-external', $url); } # /SKIP }; # /sub @@ -355,12 +349,21 @@ my $test_completion = sub { } }; +my $test_fail = sub { + $lei->(qw(q --only http://127.0.0.1:99999/bogus/ t:m)); + is($? >> 8, 3, 'got curl exit for bogus URL'); + $lei->(qw(q --only http://127.0.0.1:99999/bogus/ t:m -o), "$home/junk"); + is($? >> 8, 3, 'got curl exit for bogus URL with Maildir'); + is($out, '', 'no output'); +}; + my $test_lei_common = sub { $test_help->(); $test_config->(); $test_init->(); $test_external->(); $test_completion->(); + $test_fail->(); }; if ($ENV{TEST_LEI_ONESHOT}) {