"lei add-external --mirror $URL $DESTDIR" works. Tests are more split out and hopefully easier-to-manage going forward (they are slowing down, though, but more use of common setup_public_inboxes() may help). The curl(1) short options are gone to avoid conflicts. --help looks a bit nicer, now. Eric Wong (17): lei_overview: drop unnecessary autoflush call lei: favor "keywords" over "flags", test --no-kw lei: fix completion of --no-kw / --no-keywords lei: abort lei_import worker on client abort init: lowercase -j for --jobs lei_query: trim curl options tests: add test_lei wrapper, split out t/lei-import.t t/lei-externals: split out into separate test t/tests: split out setup_public_inboxes sub tests: split out lei-daemon.t from lei.t treewide: replace confess with croak script/lei: avoid waitpid(-1, ...) to keep tests fast lei: add-external --mirror support lei help: split out into separate file lei add-external: reject index and remote opts w/o mirror lei_curl: replace -K/--config with --curl-config lei: remove short switch support for curl(1) options MANIFEST | 11 +- Makefile.PL | 3 + contrib/completion/lei-completion.bash | 2 +- lib/PublicInbox/Admin.pm | 7 +- lib/PublicInbox/DS.pm | 10 +- lib/PublicInbox/Eml.pm | 4 +- lib/PublicInbox/IPC.pm | 2 +- lib/PublicInbox/LEI.pm | 200 +++++------- lib/PublicInbox/LeiCurl.pm | 72 +++++ lib/PublicInbox/LeiExternal.pm | 46 ++- lib/PublicInbox/LeiHelp.pm | 100 ++++++ lib/PublicInbox/LeiImport.pm | 4 +- lib/PublicInbox/LeiMirror.pm | 288 +++++++++++++++++ lib/PublicInbox/LeiOverview.pm | 1 - lib/PublicInbox/LeiQuery.pm | 24 +- lib/PublicInbox/LeiXSearch.pm | 33 +- lib/PublicInbox/OverIdx.pm | 2 +- lib/PublicInbox/TestCommon.pm | 142 ++++++++- script/lei | 28 +- script/public-inbox-init | 2 +- t/home1/.gitignore | 5 + t/home1/Makefile | 7 + t/home1/README | 8 + t/lei-daemon.t | 63 ++++ t/lei-externals.t | 200 ++++++++++++ t/lei-import.t | 39 +++ t/lei-mirror.t | 30 ++ t/lei-oneshot.t | 8 - t/lei.t | 424 +++---------------------- 29 files changed, 1180 insertions(+), 585 deletions(-) create mode 100644 lib/PublicInbox/LeiCurl.pm create mode 100644 lib/PublicInbox/LeiHelp.pm create mode 100644 lib/PublicInbox/LeiMirror.pm create mode 100644 t/home1/.gitignore create mode 100644 t/home1/Makefile create mode 100644 t/home1/README create mode 100644 t/lei-daemon.t create mode 100644 t/lei-externals.t create mode 100644 t/lei-import.t create mode 100644 t/lei-mirror.t delete mode 100644 t/lei-oneshot.t
This was actually causing xt/lei-sigpipe.t failures, presumably due to reused/recycled workers with many externals. --- lib/PublicInbox/LeiOverview.pm | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm index d3df4faa..24e4c190 100644 --- a/lib/PublicInbox/LeiOverview.pm +++ b/lib/PublicInbox/LeiOverview.pm @@ -182,7 +182,6 @@ sub _json_pretty { sub ovv_each_smsg_cb { # runs in wq worker usually my ($self, $lei, $ibxish) = @_; my ($json, $dedupe); - $lei->{1}->autoflush(1); if (my $pkg = $self->{json}) { $json = $pkg->new; $json->utf8->canonical;
JMAP brain says "keywords", IMAP brain says "flags"; JMAP brain wins today. Since "keywords" is a bit long, support "kw" as a shortcut since there's no conflict and "kw:" will be our search prefix for looking up messages by keyword. --- lib/PublicInbox/LEI.pm | 7 ++++--- lib/PublicInbox/LeiImport.pm | 4 ++-- t/lei.t | 21 ++++++++++++++++++++- 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 682d1bd1..b058b533 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -131,7 +131,7 @@ our %CMD = ( # sorted in order of importance/use: 'exclude mail matching From: or thread from non-Message-ID searches', qw(stdin| thread|t from|f=s mid=s oid=s) ], 'mark' => [ 'MESSAGE_FLAGS...', - 'set/unset flags on message(s) from stdin', + 'set/unset keywords on message(s) from stdin', qw(stdin| oid=s exact by-mid|mid:s) ], 'forget' => [ '[--stdin|--oid=OID|--by-mid=MID]', "exclude message(s) on stdin from `q' search results", @@ -152,7 +152,8 @@ our %CMD = ( # sorted in order of importance/use: 'add-watch' => [ '[URL_OR_PATHNAME]', 'watch for new messages and flag changes', - qw(import! flags! interval=s recursive|r exclude=s include=s) ], + qw(import! kw|keywords|flags! interval=s recursive|r + exclude=s include=s) ], 'ls-watch' => [ '[FILTER...]', 'list active watches with numbers and status', qw(format|f=s z) ], 'pause-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote) ], @@ -163,7 +164,7 @@ our %CMD = ( # sorted in order of importance/use: 'import' => [ 'URLS_OR_PATHNAMES...|--stdin', 'one-time import/update from URL or filesystem', qw(stdin| offset=i recursive|r exclude=s include|I=s - format|f=s flags!), + format|f=s kw|keywords|flags!), ], 'config' => [ '[...]', sub { diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm index 4a9af8a7..2c7cbf2b 100644 --- a/lib/PublicInbox/LeiImport.pm +++ b/lib/PublicInbox/LeiImport.pm @@ -26,7 +26,7 @@ sub call { # the main "lei import" method my ($cls, $lei, @argv) = @_; my $sto = $lei->_lei_store(1); $sto->write_prepare($lei); - $lei->{opt}->{flags} //= 1; + $lei->{opt}->{kw} //= 1; my $fmt = $lei->{opt}->{'format'}; my $self = $lei->{imp} = bless {}, $cls; return $lei->fail('--format unspecified') if !$fmt; @@ -63,7 +63,7 @@ sub ipc_atfork_child { sub _import_fh { my ($lei, $fh, $x) = @_; - my $set_kw = $lei->{opt}->{flags}; + my $set_kw = $lei->{opt}->{kw}; my $fmt = $lei->{opt}->{'format'}; eval { if ($fmt eq 'eml') { diff --git a/t/lei.t b/t/lei.t index eb824a30..41d854e8 100644 --- a/t/lei.t +++ b/t/lei.t @@ -400,7 +400,26 @@ my $test_import = sub { ok($lei->(qw(q s:boolean)), 'search hit after import'); ok($lei->(qw(import -f eml), 't/data/message_embed.eml'), 'import single file by path'); - $cleanup->(); + + my $str = <<''; +From: a@b +Message-ID: <x@y> +Status: RO + + ok($lei->([qw(import -f eml -)], undef, { %$opt, 0 => \$str }), + 'import single file with keywords from stdin'); + $lei->(qw(q m:x@y)); + my $res = $json->decode($out); + is($res->[1], undef, 'only one result'); + is_deeply($res->[0]->{kw}, ['seen'], "message `seen' keyword set"); + + $str =~ tr/x/v/; # v@y + ok($lei->([qw(import --no-kw -f eml -)], undef, { %$opt, 0 => \$str }), + 'import single file with --no-kw from stdin'); + $lei->(qw(q m:v@y)); + $res = $json->decode($out); + is($res->[1], undef, 'only one result'); + is_deeply($res->[0]->{kw}, [], 'no keywords set'); }; my $test_lei_common = sub {
We did not complete --no-* flags properly when multiple options are allowed. --- lib/PublicInbox/LEI.pm | 9 ++++++--- t/lei.t | 8 +++++++- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index b058b533..8d5a921e 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -274,6 +274,8 @@ my %OPTDESC = ( 'by-mid|mid:s' => [ 'MID', 'match only by Message-ID, ignoring contents' ], 'jobs:i' => 'set parallelism level', +'kw|keywords|flags!' => 'disable/enable importing flags', + # xargs, env, use "-0", git(1) uses "-z". We support z|0 everywhere 'z|0' => 'use NUL \\0 instead of newline (CR) to delimit lines', @@ -425,7 +427,7 @@ sub _help ($;$) { my (@vals, @s, @l); my $x = $sw; if ($x =~ s/!\z//) { # solve! => --no-solve - $x = "no-$x"; + $x =~ s/(\A|\|)/$1no-/g } elsif ($x =~ s/:.+//) { # optional args: $x = "mid:s" @vals = (' [', undef, ']'); } elsif ($x =~ s/=.+//) { # required arg: $x = "type=s" @@ -710,8 +712,9 @@ sub lei__complete { } puts $self, grep(/$re/, map { # generate short/long names if (s/[:=].+\z//) { # req/optional args, e.g output|o=i - } else { # negation: solve! => no-solve|solve - s/\A(.+)!\z/no-$1|$1/; + } elsif (s/!\z//) { + # negation: solve! => no-solve|solve + s/([\w\-]+)/$1|no-$1/g } map { my $x = length > 1 ? "--$_" : "-$_"; diff --git a/t/lei.t b/t/lei.t index 41d854e8..df333957 100644 --- a/t/lei.t +++ b/t/lei.t @@ -363,7 +363,7 @@ my $test_completion = sub { --mua --mua-cmd --no-local --local --verbose -v --save-as --no-remote --remote --torsocks --reverse -r )) { - ok($out{$sw}, "$sw offered as completion"); + ok($out{$sw}, "$sw offered as `lei q' completion"); } ok($lei->(qw(_complete lei q --form)), 'complete q --format'); @@ -376,6 +376,12 @@ my $test_completion = sub { ok($out{$f}, "got $sw $f as output format"); } } + ok($lei->(qw(_complete lei import)), 'complete import'); + %out = map { $_ => 1 } split(/\s+/s, $out); + for my $sw (qw(--flags --no-flags --no-kw --kw --no-keywords + --keywords)) { + ok($out{$sw}, "$sw offered as `lei import' completion"); + } }; my $test_fail = sub {
We'll stuff all the common wq key fields into the @WQ_KEYS array so it's easier to keep track of what to kill or reap. --- lib/PublicInbox/LEI.pm | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 8d5a921e..28ad88e7 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -286,6 +286,8 @@ my %CONFIG_KEYS = ( 'leistore.dir' => 'top-level storage location', ); +my @WQ_KEYS = qw(lxs l2m imp); # internal workers + # pronounced "exit": x_it(1 << 8) => exit(1); x_it(13) => SIGPIPE sub x_it ($$) { my ($self, $code) = @_; @@ -296,7 +298,7 @@ sub x_it ($$) { send($s, "x_it $code", MSG_EOR); } elsif ($self->{oneshot}) { # don't want to end up using $? from child processes - for my $f (qw(lxs l2m)) { + for my $f (@WQ_KEYS) { my $wq = delete $self->{$f} or next; $wq->DESTROY; } @@ -327,7 +329,7 @@ sub qerr ($;@) { $_[0]->{opt}->{quiet} or err(shift, @_) } sub fail_handler ($;$$) { my ($lei, $code, $io) = @_; - for my $f (qw(imp lxs l2m)) { + for my $f (@WQ_KEYS) { my $wq = delete $lei->{$f} or next; $wq->wq_wait_old($lei) if $wq->wq_kill_old; # lei-daemon } @@ -335,7 +337,7 @@ sub fail_handler ($;$$) { $lei->x_it($code // (1 >> 8)); } -sub sigpipe_handler { # handles SIGPIPE from l2m/lxs workers +sub sigpipe_handler { # handles SIGPIPE from @WQ_KEYS workers fail_handler($_[0], 13, delete $_[0]->{1}); } @@ -856,7 +858,7 @@ sub accept_dispatch { # Listener {post_accept} callback sub dclose { my ($self) = @_; delete $self->{-progress}; - for my $f (qw(lxs l2m)) { + for my $f (@WQ_KEYS) { my $wq = delete $self->{$f} or next; if ($wq->wq_kill) { $wq->wq_close
This is taken from common implementations of make(1) and only affected people using the command-line help output. --- script/public-inbox-init | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/public-inbox-init b/script/public-inbox-init index 6a867a22..e93cab73 100755 --- a/script/public-inbox-init +++ b/script/public-inbox-init @@ -24,7 +24,7 @@ options: --ng NEWSGROUP set NNTP newsgroup name --skip-artnum=NUM NNTP article numbers to skip --skip-epoch=NUM epochs to skip (-V2 only) - -J JOBS number of indexing jobs (-V2 only), (default: 4) + -j JOBS number of indexing jobs (-V2 only), (default: 4) See public-inbox-init(1) man page for full documentation. EOF
Get rid of short options which will or may conflict with some of our own. We may switch over to "git -c http.*" options since we need to run "git clone" and "git fetch" anyways. --- lib/PublicInbox/LeiQuery.pm | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm index 56350386..7c856032 100644 --- a/lib/PublicInbox/LeiQuery.pm +++ b/lib/PublicInbox/LeiQuery.pm @@ -152,18 +152,21 @@ sub _complete_q { # with other "lei q" switches. # FIXME: Getopt::Long doesn't easily let us support support options with # '.' in them (e.g. --http1.1) +# TODO: should we depend on "-c http.*" options for things which have +# analogues in git(1)? that would reduce likelyhood of conflicts with +# our other CLI options sub curl_opt { qw( abstract-unix-socket=s anyauth basic cacert=s capath=s - cert-status cert-type cert|E=s ciphers=s config|K=s@ - connect-timeout=s connect-to=s cookie-jar|c=s cookie|b=s crlfile=s + cert-status cert-type cert=s ciphers=s config|K=s@ + connect-timeout=s connect-to=s cookie-jar=s cookie=s crlfile=s digest disable dns-interface=s dns-ipv4-addr=s dns-ipv6-addr=s dns-servers=s doh-url=s egd-file=s engine=s false-start happy-eyeballs-timeout-ms=s haproxy-protocol header|H=s@ - http2-prior-knowledge http2 insecure|k + http2-prior-knowledge http2 insecure interface=s ipv4 ipv6 junk-session-cookies - key-type=s key=s limit-rate=s local-port=s location-trusted location|L + key-type=s key=s limit-rate=s local-port=s location-trusted location max-redirs=i max-time=s negotiate netrc-file=s netrc-optional netrc - no-alpn no-buffer|N no-npn no-sessionid noproxy=s ntlm-wb ntlm + no-alpn no-buffer no-npn no-sessionid noproxy=s ntlm-wb ntlm pass=s pinnedpubkey=s post301 post302 post303 preproxy=s proxy-anyauth proxy-basic proxy-cacert=s proxy-capath=s proxy-cert-type=s proxy-cert=s proxy-ciphers=s proxy-crlfile=s @@ -176,7 +179,7 @@ sub curl_opt { qw( retry-connrefused retry-delay=s retry-max-time=s retry=i sasl-ir service-name=s socks4=s socks4a=s socks5-basic socks5-gssapi-service-name=s socks5-gssapi socks5-hostname=s socks5=s - speed-limit|Y speed-type|y ssl-allow-beast sslv2 sslv3 + speed-limit speed-type ssl-allow-beast sslv2 sslv3 suppress-connect-headers tcp-fastopen tls-max=s tls13-ciphers=s tlsauthtype=s tlspassword=s tlsuser=s tlsv1 trace-ascii=s trace-time trace=s
This will make it easier to maintain and test lei going forward, we need to be testing against existing read-only daemons. We'll also save ourselves some boilerplate by exporting all the Test::More methods directly in TestCommon We'll start using this by splitting out the latest "lei import" tests into its own file. --- MANIFEST | 1 + lib/PublicInbox/TestCommon.pm | 93 ++++++++++++++++++++++++++++++++--- t/lei-import.t | 39 +++++++++++++++ t/lei.t | 35 ------------- 4 files changed, 127 insertions(+), 41 deletions(-) create mode 100644 t/lei-import.t diff --git a/MANIFEST b/MANIFEST index a11d4106..3bece258 100644 --- a/MANIFEST +++ b/MANIFEST @@ -351,6 +351,7 @@ t/init.t t/ipc.t t/iso-2202-jp.eml t/kqnotify.t +t/lei-import.t t/lei-oneshot.t t/lei.t t/lei_dedupe.t diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm index 40c2dc9e..2b78731b 100644 --- a/lib/PublicInbox/TestCommon.pm +++ b/lib/PublicInbox/TestCommon.pm @@ -9,14 +9,17 @@ use v5.10.1; use Fcntl qw(FD_CLOEXEC F_SETFD F_GETFD :seek); use POSIX qw(dup2); use IO::Socket::INET; -our @EXPORT = qw(tmpdir tcp_server tcp_connect require_git require_mods - run_script start_script key2sub xsys xsys_e xqx eml_load tick - have_xapian_compact); +our @EXPORT; BEGIN { + @EXPORT = qw(tmpdir tcp_server tcp_connect require_git require_mods + run_script start_script key2sub xsys xsys_e xqx eml_load tick + have_xapian_compact json_utf8 + test_lei $lei $lei_out $lei_err $lei_opt); require Test::More; - *BAIL_OUT = \&Test::More::BAIL_OUT; - *plan = \&Test::More::plan; - *skip = \&Test::More::skip; + my @methods = grep(!/\W/, @Test::More::EXPORT); + eval(join('', map { "*$_=\\&Test::More::$_;" } @methods)); + die $@ if $@; + push @EXPORT, @methods; } sub eml_load ($) { @@ -419,6 +422,84 @@ sub have_xapian_compact () { PublicInbox::Spawn::which($ENV{XAPIAN_COMPACT} || 'xapian-compact'); } +our ($err_skip, $lei_opt, $lei_out, $lei_err); +our $lei = sub { + my ($cmd, $env, $xopt) = @_; + $lei_out = $lei_err = ''; + if (!ref($cmd)) { + ($env, $xopt) = grep { (!defined) || ref } @_; + $cmd = [ grep { defined && !ref } @_ ]; + } + my $res = run_script(['lei', @$cmd], $env, $xopt // $lei_opt); + $err_skip and + $lei_err = join('', grep(!/$err_skip/, split(/^/m, $lei_err))); + $res; +}; + +sub json_utf8 () { + state $x = ref(PublicInbox::Config->json)->new->utf8->canonical; +} + +sub test_lei { +SKIP: { + my ($cb) = pop @_; + my $test_opt = shift // {}; + require_git(2.6) or skip('git 2.6+ required for lei test', 2); + require_mods(qw(json DBD::SQLite Search::Xapian), 2); + require PublicInbox::Config; + delete local $ENV{XDG_DATA_HOME}; + delete local $ENV{XDG_CONFIG_HOME}; + local $ENV{GIT_COMMITTER_EMAIL} = 'lei@example.com'; + local $ENV{GIT_COMMITTER_NAME} = 'lei user'; + my (undef, $fn, $lineno) = caller(0); + my $t = "$fn:$lineno"; + require PublicInbox::Spawn; + state $lei_daemon = PublicInbox::Spawn->can('send_cmd4') || + eval { require Socket::MsgHdr; 1 }; + $lei_opt = { 1 => \$lei_out, 2 => \$lei_err }; + my $daemon_pid; + my ($tmpdir, $for_destroy) = tmpdir(); + SKIP: { + skip <<'EOM', 1 unless $lei_daemon; +Socket::MsgHdr missing or Inline::C is unconfigured/missing +EOM + my $home = "$tmpdir/lei-daemon"; + mkdir($home, 0700) or BAIL_OUT "mkdir: $!"; + local $ENV{HOME} = $home; + my $xrd = "$home/xdg_run"; + mkdir($xrd, 0700) or BAIL_OUT "mkdir: $!"; + local $ENV{XDG_RUNTIME_DIR} = $xrd; + $cb->(); + ok($lei->(qw(daemon-pid)), "daemon-pid after $t"); + chomp($daemon_pid = $lei_out); + if ($daemon_pid) { + ok(kill(0, $daemon_pid), "daemon running after $t"); + ok($lei->(qw(daemon-kill)), "daemon-kill after $t"); + } else { + fail("daemon not running after $t"); + } + }; # SKIP for lei_daemon + unless ($test_opt->{daemon_only}) { + require_ok 'PublicInbox::LEI'; + my $home = "$tmpdir/lei-oneshot"; + mkdir($home, 0700) or BAIL_OUT "mkdir: $!"; + local $ENV{HOME} = $home; + # force sun_path[108] overflow: + my $xrd = "$home/1shot-test".('.sun_path' x 108); + local $err_skip = qr!\Q$xrd!; # for $lei->() filtering + local $ENV{XDG_RUNTIME_DIR} = $xrd; + $cb->(); + } + if ($daemon_pid) { + for (0..10) { + kill(0, $daemon_pid) or last; + tick; + } + ok(!kill(0, $daemon_pid), "$t daemon stopped after oneshot"); + } +}; # SKIP if missing git 2.6+ || Xapian || SQLite || json +} + package PublicInboxTestProcess; use strict; diff --git a/t/lei-import.t b/t/lei-import.t new file mode 100644 index 00000000..709d89fa --- /dev/null +++ b/t/lei-import.t @@ -0,0 +1,39 @@ +#!perl -w +# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use strict; use v5.10.1; use PublicInbox::TestCommon; +test_lei(sub { + +ok($lei->(qw(q s:boolean)), 'search miss before import'); +unlike($lei_out, qr/boolean/i, 'no results, yet'); +open my $fh, '<', 't/data/0001.patch' or BAIL_OUT $!; +ok($lei->([qw(import -f eml -)], undef, { %$lei_opt, 0 => $fh }), + 'import single file from stdin'); +close $fh; +ok($lei->(qw(q s:boolean)), 'search hit after import'); +ok($lei->(qw(import -f eml), 't/data/message_embed.eml'), + 'import single file by path'); + +my $str = <<''; +From: a@b +Message-ID: <x@y> +Status: RO + +my $opt = { %$lei_opt, 0 => \$str }; +ok($lei->([qw(import -f eml -)], undef, $opt), + 'import single file with keywords from stdin'); +$lei->(qw(q m:x@y)); +my $res = json_utf8->decode($lei_out); +is($res->[1], undef, 'only one result'); +is_deeply($res->[0]->{kw}, ['seen'], "message `seen' keyword set"); + +$str =~ tr/x/v/; # v@y +ok($lei->([qw(import --no-kw -f eml -)], undef, $opt), + 'import single file with --no-kw from stdin'); +$lei->(qw(q m:v@y)); +$res = json_utf8->decode($lei_out); +is($res->[1], undef, 'only one result'); +is_deeply($res->[0]->{kw}, [], 'no keywords set'); + +}); +done_testing; diff --git a/t/lei.t b/t/lei.t index df333957..9f92d895 100644 --- a/t/lei.t +++ b/t/lei.t @@ -41,7 +41,6 @@ local $ENV{GIT_COMMITTER_EMAIL} = 'lei@example.com'; local $ENV{GIT_COMMITTER_NAME} = 'lei user'; local $ENV{XDG_RUNTIME_DIR} = "$home/xdg_run"; local $ENV{HOME} = $home; -local $ENV{FOO} = 'BAR'; mkdir "$home/xdg_run", 0700 or BAIL_OUT "mkdir: $!"; my $home_trash = [ "$home/.local", "$home/.config", "$home/junk" ]; my $cleanup = sub { rmtree([@$home_trash, @_]) }; @@ -395,39 +394,6 @@ SKIP: { }; # /SKIP }; -my $test_import = sub { - $cleanup->(); - ok($lei->(qw(q s:boolean)), 'search miss before import'); - unlike($out, qr/boolean/i, 'no results, yet'); - open my $fh, '<', 't/data/0001.patch' or BAIL_OUT $!; - ok($lei->([qw(import -f eml -)], undef, { %$opt, 0 => $fh }), - 'import single file from stdin'); - close $fh; - ok($lei->(qw(q s:boolean)), 'search hit after import'); - ok($lei->(qw(import -f eml), 't/data/message_embed.eml'), - 'import single file by path'); - - my $str = <<''; -From: a@b -Message-ID: <x@y> -Status: RO - - ok($lei->([qw(import -f eml -)], undef, { %$opt, 0 => \$str }), - 'import single file with keywords from stdin'); - $lei->(qw(q m:x@y)); - my $res = $json->decode($out); - is($res->[1], undef, 'only one result'); - is_deeply($res->[0]->{kw}, ['seen'], "message `seen' keyword set"); - - $str =~ tr/x/v/; # v@y - ok($lei->([qw(import --no-kw -f eml -)], undef, { %$opt, 0 => \$str }), - 'import single file with --no-kw from stdin'); - $lei->(qw(q m:v@y)); - $res = $json->decode($out); - is($res->[1], undef, 'only one result'); - is_deeply($res->[0]->{kw}, [], 'no keywords set'); -}; - my $test_lei_common = sub { $test_help->(); $test_config->(); @@ -435,7 +401,6 @@ my $test_lei_common = sub { $test_external->(); $test_completion->(); $test_fail->(); - $test_import->(); }; if ($ENV{TEST_LEI_ONESHOT}) {
This is still overloaded with "lei q" stuff, but that's somewhat inevitable. --- MANIFEST | 1 + t/lei-externals.t | 231 ++++++++++++++++++++++++++++++++++++++++++++++ t/lei.t | 225 -------------------------------------------- 3 files changed, 232 insertions(+), 225 deletions(-) create mode 100644 t/lei-externals.t diff --git a/MANIFEST b/MANIFEST index 3bece258..c7fe4fb5 100644 --- a/MANIFEST +++ b/MANIFEST @@ -351,6 +351,7 @@ t/init.t t/ipc.t t/iso-2202-jp.eml t/kqnotify.t +t/lei-externals.t t/lei-import.t t/lei-oneshot.t t/lei.t diff --git a/t/lei-externals.t b/t/lei-externals.t new file mode 100644 index 00000000..739f779d --- /dev/null +++ b/t/lei-externals.t @@ -0,0 +1,231 @@ +#!perl -w +# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use strict; use v5.10.1; use PublicInbox::TestCommon; +use Fcntl qw(SEEK_SET); +use PublicInbox::Spawn qw(which); + +my @onions = qw(http://hjrcffqmbrq6wope.onion/meta/ + http://czquwvybam4bgbro.onion/meta/ + http://ou63pmih66umazou.onion/meta/); + +# TODO share this across tests, it takes ~300ms +my $setup_publicinboxes = sub { + my ($home) = @_; + use PublicInbox::InboxWritable; + for my $V (1, 2) { + run_script([qw(-init), "-V$V", "t$V", + '--newsgroup', "t.$V", + "$home/t$V", "http://example.com/t$V", + "t$V\@example.com" ]) or BAIL_OUT "init v$V"; + } + my $cfg = PublicInbox::Config->new; + my $seen = 0; + $cfg->each_inbox(sub { + my ($ibx) = @_; + my $im = PublicInbox::InboxWritable->new($ibx)->importer(0); + my $V = $ibx->version; + my @eml = (glob('t/*.eml'), 't/data/0001.patch'); + for (@eml) { + next if $_ eq 't/psgi_v2-old.eml'; # dup mid + $im->add(eml_load($_)) or BAIL_OUT "v$V add $_"; + $seen++; + } + $im->done; + if ($V == 1) { + run_script(['-index', $ibx->{inboxdir}]) or + BAIL_OUT 'index v1'; + } + }); + $seen || BAIL_OUT 'no imports'; +}; + +my $test_external_remote = sub { + my ($url, $k) = @_; +SKIP: { + my $nr = 5; + skip "$k unset", $nr if !$url; + which('curl') or skip 'no curl', $nr; + which('torsocks') or skip 'no torsocks', $nr if $url =~ m!\.onion/!; + my $mid = '20140421094015.GA8962@dcvr.yhbt.net'; + my @cmd = ('q', '--only', $url, '-q', "m:$mid"); + ok($lei->(@cmd), "query $url"); + is($lei_err, '', "no errors on $url"); + my $res = json_utf8->decode($lei_out); + is($res->[0]->{'m'}, "<$mid>", "got expected mid from $url"); + ok($lei->(@cmd, 'd:..20101002'), 'no results, no error'); + is($lei_err, '', 'no output on 404, matching local FS behavior'); + is($lei_out, "[null]\n", 'got null results'); +} # /SKIP +}; # /sub + +test_lei(sub { + my $home = $ENV{HOME}; + $setup_publicinboxes->($home); + my $config_file = "$home/.config/lei/config"; + my $store_dir = "$home/.local/share/lei"; + ok($lei->('ls-external'), 'ls-external works'); + is($lei_out.$lei_err, '', 'ls-external no output, yet'); + ok(!-e $config_file && !-e $store_dir, + 'nothing created by ls-external'); + + ok(!$lei->('add-external', "$home/nonexistent"), + "fails on non-existent dir"); + ok($lei->('ls-external'), 'ls-external works after add failure'); + is($lei_out.$lei_err, '', 'ls-external still has no output'); + my $cfg = PublicInbox::Config->new; + $cfg->each_inbox(sub { + my ($ibx) = @_; + ok($lei->(qw(add-external -q), $ibx->{inboxdir}), + 'added external'); + is($lei_out.$lei_err, '', 'no output'); + }); + ok(-s $config_file && -e $store_dir, + 'add-external created config + store'); + my $lcfg = PublicInbox::Config->new($config_file); + $cfg->each_inbox(sub { + my ($ibx) = @_; + is($lcfg->{"external.$ibx->{inboxdir}.boost"}, 0, + "configured boost on $ibx->{name}"); + }); + $lei->('ls-external'); + like($lei_out, qr/boost=0\n/s, 'ls-external has output'); + ok($lei->(qw(add-external -q https://EXAMPLE.com/ibx)), 'add remote'); + is($lei_err, '', 'no warnings after add-external'); + + ok($lei->(qw(_complete lei forget-external)), 'complete for externals'); + my %comp = map { $_ => 1 } split(/\s+/, $lei_out); + ok($comp{'https://example.com/ibx/'}, 'forget external completion'); + $cfg->each_inbox(sub { + my ($ibx) = @_; + ok($comp{$ibx->{inboxdir}}, "local $ibx->{name} completion"); + }); + for my $u (qw(h http https https: https:/ https:// https://e + https://example https://example. https://example.co + https://example.com https://example.com/ + https://example.com/i https://example.com/ibx)) { + ok($lei->(qw(_complete lei forget-external), $u), + "partial completion for URL $u"); + is($lei_out, "https://example.com/ibx/\n", + "completed partial URL $u"); + for my $qo (qw(-I --include --exclude --only)) { + ok($lei->(qw(_complete lei q), $qo, $u), + "partial completion for URL q $qo $u"); + is($lei_out, "https://example.com/ibx/\n", + "completed partial URL $u on q $qo"); + } + } + ok($lei->(qw(_complete lei add-external), 'https://'), + 'add-external hostname completion'); + is($lei_out, "https://example.com/\n", 'completed up to hostname'); + + $lei->('ls-external'); + like($lei_out, qr!https://example\.com/ibx/!s, 'added canonical URL'); + is($lei_err, '', 'no warnings on ls-external'); + ok($lei->(qw(forget-external -q https://EXAMPLE.com/ibx)), + 'forget'); + $lei->('ls-external'); + unlike($lei_out, qr!https://example\.com/ibx/!s, + 'removed canonical URL'); +SKIP: { + ok(!$lei->(qw(q s:prefix -o /dev/null -f maildir)), 'bad maildir'); + like($lei_err, qr!/dev/null exists and is not a directory!, + 'error shown'); + is($? >> 8, 1, 'errored out with exit 1'); + + ok(!$lei->(qw(q s:prefix -f mboxcl2 -o), $home), 'bad mbox'); + like($lei_err, qr!\Q$home\E exists and is not a writable file!, + 'error shown'); + is($? >> 8, 1, 'errored out with exit 1'); + + ok(!$lei->(qw(q s:prefix -o /dev/stdout -f Mbox2)), 'bad format'); + like($lei_err, qr/bad mbox --format=mbox2/, 'error shown'); + is($? >> 8, 1, 'errored out with exit 1'); + + # note, on a Bourne shell users should be able to use either: + # s:"use boolean prefix" + # "s:use boolean prefix" + # or use single quotes, it should not matter. Users only need + # to know shell quoting rules, not Xapian quoting rules. + # No double-quoting should be imposed on users on the CLI + $lei->('q', 's:use boolean prefix'); + like($lei_out, qr/search: use boolean prefix/, + 'phrase search got result'); + my $res = json_utf8->decode($lei_out); + is(scalar(@$res), 2, 'only 2 element array (1 result)'); + is($res->[1], undef, 'final element is undef'); # XXX should this be? + is(ref($res->[0]), 'HASH', 'first element is hashref'); + $lei->('q', '--pretty', 's:use boolean prefix'); + my $pretty = json_utf8->decode($lei_out); + is_deeply($res, $pretty, '--pretty is identical after decode'); + + { + open my $fh, '+>', undef or BAIL_OUT $!; + $fh->autoflush(1); + print $fh 's:use' or BAIL_OUT $!; + seek($fh, 0, SEEK_SET) or BAIL_OUT $!; + ok($lei->([qw(q -q --stdin)], undef, { %$lei_opt, 0 => $fh }), + '--stdin on regular file works'); + like($lei_out, qr/use boolean/, '--stdin on regular file'); + } + { + pipe(my ($r, $w)) or BAIL_OUT $!; + print $w 's:use' or BAIL_OUT $!; + close $w or BAIL_OUT $!; + ok($lei->([qw(q -q --stdin)], undef, { %$lei_opt, 0 => $r }), + '--stdin on pipe file works'); + like($lei_out, qr/use boolean prefix/, '--stdin on pipe'); + } + ok(!$lei->(qw(q -q --stdin s:use)), "--stdin and argv don't mix"); + + for my $fmt (qw(ldjson ndjson jsonl)) { + $lei->('q', '-f', $fmt, 's:use boolean prefix'); + is($lei_out, json_utf8->encode($pretty->[0])."\n", "-f $fmt"); + } + + require IO::Uncompress::Gunzip; + for my $sfx ('', '.gz') { + my $f = "$home/mbox$sfx"; + $lei->('q', '-o', "mboxcl2:$f", 's:use boolean prefix'); + my $cat = $sfx eq '' ? sub { + open my $mb, '<', $f or fail "no mbox: $!"; + <$mb> + } : sub { + my $z = IO::Uncompress::Gunzip->new($f, MultiStream=>1); + <$z>; + }; + my @s = grep(/^Subject:/, $cat->()); + is(scalar(@s), 1, "1 result in mbox$sfx"); + $lei->('q', '-a', '-o', "mboxcl2:$f", 's:see attachment'); + is(grep(!/^#/, $lei_err), 0, 'no errors from augment'); + @s = grep(/^Subject:/, my @wtf = $cat->()); + is(scalar(@s), 2, "2 results in mbox$sfx"); + + $lei->('q', '-a', '-o', "mboxcl2:$f", 's:nonexistent'); + is(grep(!/^#/, $lei_err), 0, "no errors on no results ($sfx)"); + + my @s2 = grep(/^Subject:/, $cat->()); + is_deeply(\@s2, \@s, + "same 2 old results w/ --augment and bad search $sfx"); + + $lei->('q', '-o', "mboxcl2:$f", 's:nonexistent'); + my @res = $cat->(); + is_deeply(\@res, [], "clobber w/o --augment $sfx"); + } + ok(!$lei->('q', '-o', "$home/mbox", 's:nope'), + 'fails if mbox format unspecified'); + ok(!$lei->(qw(q --no-local s:see)), '--no-local'); + is($? >> 8, 1, 'proper exit code'); + like($lei_err, qr/no local or remote.+? to search/, 'no inbox'); + my %e = ( + TEST_LEI_EXTERNAL_HTTPS => 'https://public-inbox.org/meta/', + TEST_LEI_EXTERNAL_ONION => $onions[int(rand(scalar(@onions)))], + ); + for my $k (keys %e) { + my $url = $ENV{$k} // ''; + $url = $e{$k} if $url eq '1'; + $test_external_remote->($url, $k); + } + }; # /SKIP +}); # test_lei +done_testing; diff --git a/t/lei.t b/t/lei.t index 9f92d895..cfcdafb9 100644 --- a/t/lei.t +++ b/t/lei.t @@ -7,7 +7,6 @@ use Test::More; use PublicInbox::TestCommon; use PublicInbox::Config; use File::Path qw(rmtree); -use Fcntl qw(SEEK_SET); use PublicInbox::Spawn qw(which); my $req_sendcmd = 'Socket::MsgHdr or Inline::C missing or unconfigured'; undef($req_sendcmd) if PublicInbox::Spawn->can('send_cmd4'); @@ -18,9 +17,6 @@ my $opt = { 1 => \(my $out = ''), 2 => \(my $err = '') }; my ($home, $for_destroy) = tmpdir(); my $err_filter; my $curl = which('curl'); -my @onions = qw(http://hjrcffqmbrq6wope.onion/meta/ - http://czquwvybam4bgbro.onion/meta/ - http://ou63pmih66umazou.onion/meta/); my $json = ref(PublicInbox::Config->json)->new->utf8->canonical; my $lei = sub { my ($cmd, $env, $xopt) = @_; @@ -130,226 +126,6 @@ my $test_config = sub { ok(!-f "$home/config/f", 'no file created'); }; -my $setup_publicinboxes = sub { - state $done = ''; - return if $done eq $home; - use PublicInbox::InboxWritable; - for my $V (1, 2) { - run_script([qw(-init), "-V$V", "t$V", - '--newsgroup', "t.$V", - "$home/t$V", "http://example.com/t$V", - "t$V\@example.com" ]) or BAIL_OUT "init v$V"; - } - my $cfg = PublicInbox::Config->new; - my $seen = 0; - $cfg->each_inbox(sub { - my ($ibx) = @_; - my $im = PublicInbox::InboxWritable->new($ibx)->importer(0); - my $V = $ibx->version; - my @eml = (glob('t/*.eml'), 't/data/0001.patch'); - for (@eml) { - next if $_ eq 't/psgi_v2-old.eml'; # dup mid - $im->add(eml_load($_)) or BAIL_OUT "v$V add $_"; - $seen++; - } - $im->done; - if ($V == 1) { - run_script(['-index', $ibx->{inboxdir}]) or - BAIL_OUT 'index v1'; - } - }); - $done = $home; - $seen || BAIL_OUT 'no imports'; -}; - -my $test_external_remote = sub { - my ($url, $k) = @_; -SKIP: { - my $nr = 5; - skip "$k unset", $nr if !$url; - skip $req_sendcmd, $nr if $req_sendcmd; - $curl or skip 'no curl', $nr; - which('torsocks') or skip 'no torsocks', $nr if $url =~ m!\.onion/!; - my $mid = '20140421094015.GA8962@dcvr.yhbt.net'; - my @cmd = ('q', '--only', $url, '-q', "m:$mid"); - ok($lei->(@cmd), "query $url"); - is($err, '', "no errors on $url"); - my $res = $json->decode($out); - is($res->[0]->{'m'}, "<$mid>", "got expected mid from $url"); - ok($lei->(@cmd, 'd:..20101002'), 'no results, no error'); - is($err, '', 'no output on 404, matching local FS behavior'); - is($out, "[null]\n", 'got null results'); -} # /SKIP -}; # /sub - -my $test_external = sub { - $setup_publicinboxes->(); - $cleanup->(); - $lei->('ls-external'); - is($out.$err, '', 'ls-external no output, yet'); - ok(!-e $config_file && !-e $store_dir, - 'nothing created by ls-external'); - - ok(!$lei->('add-external', "$home/nonexistent"), - "fails on non-existent dir"); - $lei->('ls-external'); - is($out.$err, '', 'ls-external still has no output'); - my $cfg = PublicInbox::Config->new; - $cfg->each_inbox(sub { - my ($ibx) = @_; - ok($lei->(qw(add-external -q), $ibx->{inboxdir}), - 'added external'); - is($out.$err, '', 'no output'); - }); - ok(-s $config_file && -e $store_dir, - 'add-external created config + store'); - my $lcfg = PublicInbox::Config->new($config_file); - $cfg->each_inbox(sub { - my ($ibx) = @_; - is($lcfg->{"external.$ibx->{inboxdir}.boost"}, 0, - "configured boost on $ibx->{name}"); - }); - $lei->('ls-external'); - like($out, qr/boost=0\n/s, 'ls-external has output'); - ok($lei->(qw(add-external -q https://EXAMPLE.com/ibx)), 'add remote'); - is($err, '', 'no warnings after add-external'); - - ok($lei->(qw(_complete lei forget-external)), 'complete for externals'); - my %comp = map { $_ => 1 } split(/\s+/, $out); - ok($comp{'https://example.com/ibx/'}, 'forget external completion'); - $cfg->each_inbox(sub { - my ($ibx) = @_; - ok($comp{$ibx->{inboxdir}}, "local $ibx->{name} completion"); - }); - for my $u (qw(h http https https: https:/ https:// https://e - https://example https://example. https://example.co - https://example.com https://example.com/ - https://example.com/i https://example.com/ibx)) { - ok($lei->(qw(_complete lei forget-external), $u), - "partial completion for URL $u"); - is($out, "https://example.com/ibx/\n", - "completed partial URL $u"); - for my $qo (qw(-I --include --exclude --only)) { - ok($lei->(qw(_complete lei q), $qo, $u), - "partial completion for URL q $qo $u"); - is($out, "https://example.com/ibx/\n", - "completed partial URL $u on q $qo"); - } - } - ok($lei->(qw(_complete lei add-external), 'https://'), - 'add-external hostname completion'); - is($out, "https://example.com/\n", 'completed up to hostname'); - - $lei->('ls-external'); - like($out, qr!https://example\.com/ibx/!s, 'added canonical URL'); - is($err, '', 'no warnings on ls-external'); - ok($lei->(qw(forget-external -q https://EXAMPLE.com/ibx)), - 'forget'); - $lei->('ls-external'); - unlike($out, qr!https://example\.com/ibx/!s, 'removed canonical URL'); - -SKIP: { - skip $req_sendcmd, 52 if $req_sendcmd; - ok(!$lei->(qw(q s:prefix -o /dev/null -f maildir)), 'bad maildir'); - like($err, qr!/dev/null exists and is not a directory!, - 'error shown'); - is($? >> 8, 1, 'errored out with exit 1'); - - ok(!$lei->(qw(q s:prefix -f mboxcl2 -o), $home), 'bad mbox'); - like($err, qr!\Q$home\E exists and is not a writable file!, - 'error shown'); - is($? >> 8, 1, 'errored out with exit 1'); - - ok(!$lei->(qw(q s:prefix -o /dev/stdout -f Mbox2)), 'bad format'); - like($err, qr/bad mbox --format=mbox2/, 'error shown'); - is($? >> 8, 1, 'errored out with exit 1'); - - # note, on a Bourne shell users should be able to use either: - # s:"use boolean prefix" - # "s:use boolean prefix" - # or use single quotes, it should not matter. Users only need - # to know shell quoting rules, not Xapian quoting rules. - # No double-quoting should be imposed on users on the CLI - $lei->('q', 's:use boolean prefix'); - like($out, qr/search: use boolean prefix/, 'phrase search got result'); - my $res = $json->decode($out); - is(scalar(@$res), 2, 'only 2 element array (1 result)'); - is($res->[1], undef, 'final element is undef'); # XXX should this be? - is(ref($res->[0]), 'HASH', 'first element is hashref'); - $lei->('q', '--pretty', 's:use boolean prefix'); - my $pretty = $json->decode($out); - is_deeply($res, $pretty, '--pretty is identical after decode'); - - { - open my $fh, '+>', undef or BAIL_OUT $!; - $fh->autoflush(1); - print $fh 's:use' or BAIL_OUT $!; - seek($fh, 0, SEEK_SET) or BAIL_OUT $!; - ok($lei->([qw(q -q --stdin)], undef, { %$opt, 0 => $fh }), - '--stdin on regular file works'); - like($out, qr/use boolean prefix/, '--stdin on regular file'); - } - { - pipe(my ($r, $w)) or BAIL_OUT $!; - print $w 's:use' or BAIL_OUT $!; - close $w or BAIL_OUT $!; - ok($lei->([qw(q -q --stdin)], undef, { %$opt, 0 => $r }), - '--stdin on pipe file works'); - like($out, qr/use boolean prefix/, '--stdin on pipe'); - } - ok(!$lei->(qw(q -q --stdin s:use)), "--stdin and argv don't mix"); - - for my $fmt (qw(ldjson ndjson jsonl)) { - $lei->('q', '-f', $fmt, 's:use boolean prefix'); - is($out, $json->encode($pretty->[0])."\n", "-f $fmt"); - } - - require IO::Uncompress::Gunzip; - for my $sfx ('', '.gz') { - my $f = "$home/mbox$sfx"; - $lei->('q', '-o', "mboxcl2:$f", 's:use boolean prefix'); - my $cat = $sfx eq '' ? sub { - open my $mb, '<', $f or fail "no mbox: $!"; - <$mb> - } : sub { - my $z = IO::Uncompress::Gunzip->new($f, MultiStream=>1); - <$z>; - }; - my @s = grep(/^Subject:/, $cat->()); - is(scalar(@s), 1, "1 result in mbox$sfx"); - $lei->('q', '-a', '-o', "mboxcl2:$f", 's:see attachment'); - is(grep(!/^#/, $err), 0, 'no errors from augment'); - @s = grep(/^Subject:/, my @wtf = $cat->()); - is(scalar(@s), 2, "2 results in mbox$sfx"); - - $lei->('q', '-a', '-o', "mboxcl2:$f", 's:nonexistent'); - is(grep(!/^#/, $err), 0, "no errors on no results ($sfx)"); - - my @s2 = grep(/^Subject:/, $cat->()); - is_deeply(\@s2, \@s, - "same 2 old results w/ --augment and bad search $sfx"); - - $lei->('q', '-o', "mboxcl2:$f", 's:nonexistent'); - my @res = $cat->(); - is_deeply(\@res, [], "clobber w/o --augment $sfx"); - } - ok(!$lei->('q', '-o', "$home/mbox", 's:nope'), - 'fails if mbox format unspecified'); - ok(!$lei->(qw(q --no-local s:see)), '--no-local'); - is($? >> 8, 1, 'proper exit code'); - like($err, qr/no local or remote.+? to search/, 'no inbox'); - my %e = ( - TEST_LEI_EXTERNAL_HTTPS => 'https://public-inbox.org/meta/', - TEST_LEI_EXTERNAL_ONION => $onions[int(rand(scalar(@onions)))], - ); - for my $k (keys %e) { - my $url = $ENV{$k} // ''; - $url = $e{$k} if $url eq '1'; - $test_external_remote->($url, $k); - } - }; # /SKIP -}; - my $test_completion = sub { ok($lei->(qw(_complete lei)), 'no errors on complete'); my %out = map { $_ => 1 } split(/\s+/s, $out); @@ -398,7 +174,6 @@ my $test_lei_common = sub { $test_help->(); $test_config->(); $test_init->(); - $test_external->(); $test_completion->(); $test_fail->(); };
We'll probably use this in many more existing places and likely change non-lei tests to use it. --- MANIFEST | 3 +++ Makefile.PL | 3 +++ lib/PublicInbox/TestCommon.pm | 50 +++++++++++++++++++++++++++++++++-- t/home1/.gitignore | 5 ++++ t/home1/Makefile | 7 +++++ t/home1/README | 8 ++++++ t/lei-externals.t | 35 ++---------------------- 7 files changed, 76 insertions(+), 35 deletions(-) create mode 100644 t/home1/.gitignore create mode 100644 t/home1/Makefile create mode 100644 t/home1/README diff --git a/MANIFEST b/MANIFEST index c7fe4fb5..000834cc 100644 --- a/MANIFEST +++ b/MANIFEST @@ -328,6 +328,9 @@ t/git.fast-import-data t/git.t t/gzip_filter.t t/hl_mod.t +t/home1/.gitignore +t/home1/Makefile +t/home1/README t/html_index.t t/httpd-corner.psgi t/httpd-corner.t diff --git a/Makefile.PL b/Makefile.PL index b9e0a8cd..68545573 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -172,6 +172,9 @@ WriteMakefile( # ExtUtils::MakeMaker # this file won't run w/o it... }, MAN3PODS => \%man3, + clean => { + FILES => 't/home1/setup* t/home1/t* t/home1/.public-inbox' + }, ); sub MY::postamble { diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm index 2b78731b..bb2cd7e6 100644 --- a/lib/PublicInbox/TestCommon.pm +++ b/lib/PublicInbox/TestCommon.pm @@ -13,7 +13,7 @@ our @EXPORT; BEGIN { @EXPORT = qw(tmpdir tcp_server tcp_connect require_git require_mods run_script start_script key2sub xsys xsys_e xqx eml_load tick - have_xapian_compact json_utf8 + have_xapian_compact json_utf8 setup_public_inboxes test_lei $lei $lei_out $lei_err $lei_opt); require Test::More; my @methods = grep(!/\W/, @Test::More::EXPORT); @@ -498,7 +498,53 @@ EOM ok(!kill(0, $daemon_pid), "$t daemon stopped after oneshot"); } }; # SKIP if missing git 2.6+ || Xapian || SQLite || json -} +} # /test_lei + +# returns the pathname to a ~/.public-inbox/config in scalar context, +# ($test_home, $pi_config_pathname) in list context +sub setup_public_inboxes () { + my $test_home = "t/home1"; + my $pi_config = "$test_home/.public-inbox/config"; + my $stamp = "$test_home/setup-stamp"; + my @ret = ($test_home, $pi_config); + return @ret if -f $stamp; + + require PublicInbox::Lock; + my $lk = bless { lock_path => "$test_home/setup.lock" }, + 'PublicInbox::Lock'; + my $end = $lk->lock_for_scope; + return @ret if -f $stamp; + + require PublicInbox::InboxWritable; + local $ENV{PI_CONFIG} = $pi_config; + for my $V (1, 2) { + run_script([qw(-init), "-V$V", "t$V", + '--newsgroup', "t.$V", + "$test_home/t$V", "http://example.com/t$V", + "t$V\@example.com" ]) or BAIL_OUT "init v$V"; + } + my $cfg = PublicInbox::Config->new; + my $seen = 0; + $cfg->each_inbox(sub { + my ($ibx) = @_; + my $im = PublicInbox::InboxWritable->new($ibx)->importer(0); + my $V = $ibx->version; + my @eml = (glob('t/*.eml'), 't/data/0001.patch'); + for (@eml) { + next if $_ eq 't/psgi_v2-old.eml'; # dup mid + $im->add(eml_load($_)) or BAIL_OUT "v$V add $_"; + $seen++; + } + $im->done; + if ($V == 1) { + run_script(['-index', $ibx->{inboxdir}]) or + BAIL_OUT 'index v1'; + } + }); + $seen or BAIL_OUT 'no imports'; + open my $fh, '>', $stamp or BAIL_OUT "open $stamp: $!"; + @ret; +}; package PublicInboxTestProcess; use strict; diff --git a/t/home1/.gitignore b/t/home1/.gitignore new file mode 100644 index 00000000..b97d81e6 --- /dev/null +++ b/t/home1/.gitignore @@ -0,0 +1,5 @@ +/.public-inbox +/t1 +/t2 +/setup.lock +/setup-stamp diff --git a/t/home1/Makefile b/t/home1/Makefile new file mode 100644 index 00000000..9d4895dc --- /dev/null +++ b/t/home1/Makefile @@ -0,0 +1,7 @@ +all :: + +help :: + @cat README + +clean :: + $(RM) -rf t1 t2 .public-inbox setup-stamp setup-lock diff --git a/t/home1/README b/t/home1/README new file mode 100644 index 00000000..1ba87891 --- /dev/null +++ b/t/home1/README @@ -0,0 +1,8 @@ +This directory is for read-only test inboxes and will be shared +between various tests. + +See setup_publicinboxes() in lib/PublicInbox/TestCommon.pm. + +It is versioned (currently "1" in "home1") and will be renamed +"home2" and so forth if the data created by setup_publicinboxes() +changes. diff --git a/t/lei-externals.t b/t/lei-externals.t index 739f779d..f2cb09b4 100644 --- a/t/lei-externals.t +++ b/t/lei-externals.t @@ -9,37 +9,6 @@ my @onions = qw(http://hjrcffqmbrq6wope.onion/meta/ http://czquwvybam4bgbro.onion/meta/ http://ou63pmih66umazou.onion/meta/); -# TODO share this across tests, it takes ~300ms -my $setup_publicinboxes = sub { - my ($home) = @_; - use PublicInbox::InboxWritable; - for my $V (1, 2) { - run_script([qw(-init), "-V$V", "t$V", - '--newsgroup', "t.$V", - "$home/t$V", "http://example.com/t$V", - "t$V\@example.com" ]) or BAIL_OUT "init v$V"; - } - my $cfg = PublicInbox::Config->new; - my $seen = 0; - $cfg->each_inbox(sub { - my ($ibx) = @_; - my $im = PublicInbox::InboxWritable->new($ibx)->importer(0); - my $V = $ibx->version; - my @eml = (glob('t/*.eml'), 't/data/0001.patch'); - for (@eml) { - next if $_ eq 't/psgi_v2-old.eml'; # dup mid - $im->add(eml_load($_)) or BAIL_OUT "v$V add $_"; - $seen++; - } - $im->done; - if ($V == 1) { - run_script(['-index', $ibx->{inboxdir}]) or - BAIL_OUT 'index v1'; - } - }); - $seen || BAIL_OUT 'no imports'; -}; - my $test_external_remote = sub { my ($url, $k) = @_; SKIP: { @@ -59,9 +28,9 @@ SKIP: { } # /SKIP }; # /sub +my ($ro_home, $cfg_path) = setup_public_inboxes; test_lei(sub { my $home = $ENV{HOME}; - $setup_publicinboxes->($home); my $config_file = "$home/.config/lei/config"; my $store_dir = "$home/.local/share/lei"; ok($lei->('ls-external'), 'ls-external works'); @@ -73,7 +42,7 @@ test_lei(sub { "fails on non-existent dir"); ok($lei->('ls-external'), 'ls-external works after add failure'); is($lei_out.$lei_err, '', 'ls-external still has no output'); - my $cfg = PublicInbox::Config->new; + my $cfg = PublicInbox::Config->new($cfg_path); $cfg->each_inbox(sub { my ($ibx) = @_; ok($lei->(qw(add-external -q), $ibx->{inboxdir}),
This makes it easier for hackers to find daemon-specific tests and forces us to always test both daemon and oneshot mode. --- MANIFEST | 2 +- lib/PublicInbox/TestCommon.pm | 8 +- t/lei-daemon.t | 63 ++++++++++++ t/lei-oneshot.t | 8 -- t/lei.t | 177 ++++++++-------------------------- 5 files changed, 107 insertions(+), 151 deletions(-) create mode 100644 t/lei-daemon.t delete mode 100644 t/lei-oneshot.t diff --git a/MANIFEST b/MANIFEST index 000834cc..52dea385 100644 --- a/MANIFEST +++ b/MANIFEST @@ -354,9 +354,9 @@ t/init.t t/ipc.t t/iso-2202-jp.eml t/kqnotify.t +t/lei-daemon.t t/lei-externals.t t/lei-import.t -t/lei-oneshot.t t/lei.t t/lei_dedupe.t t/lei_external.t diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm index bb2cd7e6..c861dc5d 100644 --- a/lib/PublicInbox/TestCommon.pm +++ b/lib/PublicInbox/TestCommon.pm @@ -456,13 +456,15 @@ SKIP: { require PublicInbox::Spawn; state $lei_daemon = PublicInbox::Spawn->can('send_cmd4') || eval { require Socket::MsgHdr; 1 }; + # XXX fix and move this inside daemon-only before 1.7 release + skip <<'EOM', 1 unless $lei_daemon; +Socket::MsgHdr missing or Inline::C is unconfigured/missing +EOM $lei_opt = { 1 => \$lei_out, 2 => \$lei_err }; my $daemon_pid; my ($tmpdir, $for_destroy) = tmpdir(); SKIP: { - skip <<'EOM', 1 unless $lei_daemon; -Socket::MsgHdr missing or Inline::C is unconfigured/missing -EOM + skip 'TEST_LEI_ONESHOT set', 1 if $ENV{TEST_LEI_ONESHOT}; my $home = "$tmpdir/lei-daemon"; mkdir($home, 0700) or BAIL_OUT "mkdir: $!"; local $ENV{HOME} = $home; diff --git a/t/lei-daemon.t b/t/lei-daemon.t new file mode 100644 index 00000000..c55ba86c --- /dev/null +++ b/t/lei-daemon.t @@ -0,0 +1,63 @@ +#!perl -w +# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use strict; use v5.10.1; use PublicInbox::TestCommon; + +test_lei({ daemon_only => 1 }, sub { + my $sock = "$ENV{XDG_RUNTIME_DIR}/lei/5.seq.sock"; + my $err_log = "$ENV{XDG_RUNTIME_DIR}/lei/errors.log"; + ok($lei->('daemon-pid'), 'daemon-pid'); + is($lei_err, '', 'no error from daemon-pid'); + like($lei_out, qr/\A[0-9]+\n\z/s, 'pid returned') or BAIL_OUT; + chomp(my $pid = $lei_out); + ok(kill(0, $pid), 'pid is valid'); + ok(-S $sock, 'sock created'); + is(-s $err_log, 0, 'nothing in errors.log'); + open my $efh, '>>', $err_log or BAIL_OUT $!; + print $efh "phail\n" or BAIL_OUT $!; + close $efh or BAIL_OUT $!; + + ok($lei->('daemon-pid'), 'daemon-pid'); + chomp(my $pid_again = $lei_out); + is($pid, $pid_again, 'daemon-pid idempotent'); + like($lei_err, qr/phail/, 'got mock "phail" error previous run'); + + ok($lei->(qw(daemon-kill)), 'daemon-kill'); + is($lei_out, '', 'no output from daemon-kill'); + is($lei_err, '', 'no error from daemon-kill'); + for (0..100) { + kill(0, $pid) or last; + tick(); + } + ok(-S $sock, 'sock still exists'); + ok(!kill(0, $pid), 'pid gone after stop'); + + ok($lei->(qw(daemon-pid)), 'daemon-pid'); + chomp(my $new_pid = $lei_out); + ok(kill(0, $new_pid), 'new pid is running'); + ok(-S $sock, 'sock still exists'); + + for my $sig (qw(-0 -CHLD)) { + ok($lei->('daemon-kill', $sig), "handles $sig"); + } + is($lei_out.$lei_err, '', 'no output on innocuous signals'); + ok($lei->('daemon-pid'), 'daemon-pid'); + chomp $lei_out; + is($lei_out, $new_pid, 'PID unchanged after -0/-CHLD'); + + if ('socket inaccessible') { + chmod 0000, $sock or BAIL_OUT "chmod 0000: $!"; + ok($lei->('help'), 'connect fail, one-shot fallback works'); + like($lei_err, qr/\bconnect\(/, 'connect error noted'); + like($lei_out, qr/^usage: /, 'help output works'); + chmod 0700, $sock or BAIL_OUT "chmod 0700: $!"; + } + unlink $sock or BAIL_OUT "unlink($sock) $!"; + for (0..100) { + kill('CHLD', $new_pid) or last; + tick(); + } + ok(!kill(0, $new_pid), 'daemon exits after unlink'); +}); + +done_testing; diff --git a/t/lei-oneshot.t b/t/lei-oneshot.t deleted file mode 100644 index 7688da5b..00000000 --- a/t/lei-oneshot.t +++ /dev/null @@ -1,8 +0,0 @@ -#!perl -w -# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org> -# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> -use strict; -use v5.10.1; -use PublicInbox::TestCommon; -local $ENV{TEST_LEI_ONESHOT} = '1'; -require './t/lei.t'; diff --git a/t/lei.t b/t/lei.t index cfcdafb9..f789f63a 100644 --- a/t/lei.t +++ b/t/lei.t @@ -1,87 +1,56 @@ #!perl -w # Copyright (C) 2020-2021 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> -use strict; -use v5.10.1; -use Test::More; -use PublicInbox::TestCommon; -use PublicInbox::Config; +use strict; use v5.10.1; use PublicInbox::TestCommon; use File::Path qw(rmtree); use PublicInbox::Spawn qw(which); -my $req_sendcmd = 'Socket::MsgHdr or Inline::C missing or unconfigured'; -undef($req_sendcmd) if PublicInbox::Spawn->can('send_cmd4'); -eval { require Socket::MsgHdr; undef $req_sendcmd }; -require_git 2.6; -require_mods(qw(json DBD::SQLite Search::Xapian)); -my $opt = { 1 => \(my $out = ''), 2 => \(my $err = '') }; -my ($home, $for_destroy) = tmpdir(); -my $err_filter; -my $curl = which('curl'); -my $json = ref(PublicInbox::Config->json)->new->utf8->canonical; -my $lei = sub { - my ($cmd, $env, $xopt) = @_; - $out = $err = ''; - if (!ref($cmd)) { - ($env, $xopt) = grep { (!defined) || ref } @_; - $cmd = [ grep { defined && !ref } @_ ]; - } - my $res = run_script(['lei', @$cmd], $env, $xopt // $opt); - $err_filter and - $err = join('', grep(!/$err_filter/, split(/^/m, $err))); - $res; -}; -delete local $ENV{XDG_DATA_HOME}; -delete local $ENV{XDG_CONFIG_HOME}; -local $ENV{GIT_COMMITTER_EMAIL} = 'lei@example.com'; -local $ENV{GIT_COMMITTER_NAME} = 'lei user'; -local $ENV{XDG_RUNTIME_DIR} = "$home/xdg_run"; -local $ENV{HOME} = $home; -mkdir "$home/xdg_run", 0700 or BAIL_OUT "mkdir: $!"; -my $home_trash = [ "$home/.local", "$home/.config", "$home/junk" ]; +# this only tests the basic help/config/init/completion bits of lei; +# actual functionality is tested in other t/lei-*.t tests +my $curl = which('curl'); +my $home; +my $home_trash = []; my $cleanup = sub { rmtree([@$home_trash, @_]) }; -my $config_file = "$home/.config/lei/config"; -my $store_dir = "$home/.local/share/lei"; my $test_help = sub { ok(!$lei->(), 'no args fails'); is($? >> 8, 1, '$? is 1'); - is($out, '', 'nothing in stdout'); - like($err, qr/^usage:/sm, 'usage in stderr'); + is($lei_out, '', 'nothing in stdout'); + like($lei_err, qr/^usage:/sm, 'usage in stderr'); for my $arg (['-h'], ['--help'], ['help'], [qw(daemon-pid --help)]) { ok($lei->($arg), "lei @$arg"); - like($out, qr/^usage:/sm, "usage in stdout (@$arg)"); - is($err, '', "nothing in stderr (@$arg)"); + like($lei_out, qr/^usage:/sm, "usage in stdout (@$arg)"); + is($lei_err, '', "nothing in stderr (@$arg)"); } for my $arg ([''], ['--halp'], ['halp'], [qw(daemon-pid --halp)]) { ok(!$lei->($arg), "lei @$arg"); is($? >> 8, 1, '$? set correctly'); - isnt($err, '', 'something in stderr'); - is($out, '', 'nothing in stdout'); + isnt($lei_err, '', 'something in stderr'); + is($lei_out, '', 'nothing in stdout'); } ok($lei->(qw(init -h)), 'init -h'); - like($out, qr! \Q$home\E/\.local/share/lei/store\b!, + like($lei_out, qr! \Q$home\E/\.local/share/lei/store\b!, 'actual path shown in init -h'); ok($lei->(qw(init -h), { XDG_DATA_HOME => '/XDH' }), 'init with XDG_DATA_HOME'); - like($out, qr! /XDH/lei/store\b!, 'XDG_DATA_HOME in init -h'); - is($err, '', 'no errors from init -h'); + like($lei_out, qr! /XDH/lei/store\b!, 'XDG_DATA_HOME in init -h'); + is($lei_err, '', 'no errors from init -h'); ok($lei->(qw(config -h)), 'config-h'); - like($out, qr! \Q$home\E/\.config/lei/config\b!, + like($lei_out, qr! \Q$home\E/\.config/lei/config\b!, 'actual path shown in config -h'); ok($lei->(qw(config -h), { XDG_CONFIG_HOME => '/XDC' }), 'config with XDG_CONFIG_HOME'); - like($out, qr! /XDC/lei/config\b!, 'XDG_CONFIG_HOME in config -h'); - is($err, '', 'no errors from config -h'); + like($lei_out, qr! /XDC/lei/config\b!, 'XDG_CONFIG_HOME in config -h'); + is($lei_err, '', 'no errors from config -h'); }; my $ok_err_info = sub { my ($msg) = @_; - is(grep(!/^I:/, split(/^/, $err)), 0, $msg) or - diag "$msg: err=$err"; + is(grep(!/^I:/, split(/^/, $lei_err)), 0, $msg) or + diag "$msg: err=$lei_err"; }; my $test_init = sub { @@ -92,7 +61,7 @@ my $test_init = sub { $ok_err_info->('after idempotent init w/o args'); ok(!$lei->('init', "$home/x"), 'init conflict'); - is(grep(/^E:/, split(/^/, $err)), 1, 'got error on conflict'); + is(grep(/^E:/, split(/^/, $lei_err)), 1, 'got error on conflict'); ok(!-e "$home/x", 'nothing created on conflict'); $cleanup->(); @@ -104,36 +73,36 @@ my $test_init = sub { $cleanup->("$home/x"); ok(!$lei->('init', "$home/x", "$home/2"), 'too many args fails'); - like($err, qr/too many/, 'noted excessive'); + like($lei_err, qr/too many/, 'noted excessive'); ok(!-e "$home/x", 'x not created on excessive'); for my $d (@$home_trash) { my $base = (split(m!/!, $d))[-1]; ok(!-d $d, "$base not created"); } - is($out, '', 'nothing in stdout on init failure'); + is($lei_out, '', 'nothing in stdout on init failure'); }; my $test_config = sub { $cleanup->(); ok($lei->(qw(config a.b c)), 'config set var'); - is($out.$err, '', 'no output on var set'); + is($lei_out.$lei_err, '', 'no output on var set'); ok($lei->(qw(config -l)), 'config -l'); - is($err, '', 'no errors on listing'); - is($out, "a.b=c\n", 'got expected output'); + is($lei_err, '', 'no errors on listing'); + is($lei_out, "a.b=c\n", 'got expected output'); ok(!$lei->(qw(config -f), "$home/.config/f", qw(x.y z)), 'config set var with -f fails'); - like($err, qr/not supported/, 'not supported noted'); + like($lei_err, qr/not supported/, 'not supported noted'); ok(!-f "$home/config/f", 'no file created'); }; my $test_completion = sub { ok($lei->(qw(_complete lei)), 'no errors on complete'); - my %out = map { $_ => 1 } split(/\s+/s, $out); + my %out = map { $_ => 1 } split(/\s+/s, $lei_out); ok($out{'q'}, "`lei q' offered as completion"); ok($out{'add-external'}, "`lei add-external' offered as completion"); ok($lei->(qw(_complete lei q)), 'complete q (no args)'); - %out = map { $_ => 1 } split(/\s+/s, $out); + %out = map { $_ => 1 } split(/\s+/s, $lei_out); for my $sw (qw(-f --format -o --output --mfolder --augment -a --mua --mua-cmd --no-local --local --verbose -v --save-as --no-remote --remote --torsocks @@ -142,17 +111,17 @@ my $test_completion = sub { } ok($lei->(qw(_complete lei q --form)), 'complete q --format'); - is($out, "--format\n", 'complete lei q --format'); + is($lei_out, "--format\n", 'complete lei q --format'); for my $sw (qw(-f --format)) { ok($lei->(qw(_complete lei q), $sw), "complete q $sw ARG"); - %out = map { $_ => 1 } split(/\s+/s, $out); + %out = map { $_ => 1 } split(/\s+/s, $lei_out); for my $f (qw(mboxrd mboxcl2 mboxcl mboxo json jsonl concatjson maildir)) { ok($out{$f}, "got $sw $f as output format"); } } ok($lei->(qw(_complete lei import)), 'complete import'); - %out = map { $_ => 1 } split(/\s+/s, $out); + %out = map { $_ => 1 } split(/\s+/s, $lei_out); for my $sw (qw(--flags --no-flags --no-kw --kw --no-keywords --keywords)) { ok($out{$sw}, "$sw offered as `lei import' completion"); @@ -161,93 +130,23 @@ my $test_completion = sub { my $test_fail = sub { SKIP: { - skip $req_sendcmd, 3 if $req_sendcmd; + skip 'no curl', 3 unless which('curl'); $lei->(qw(q --only http://127.0.0.1:99999/bogus/ t:m)); is($? >> 8, 3, 'got curl exit for bogus URL'); $lei->(qw(q --only http://127.0.0.1:99999/bogus/ t:m -o), "$home/junk"); is($? >> 8, 3, 'got curl exit for bogus URL with Maildir'); - is($out, '', 'no output'); + is($lei_out, '', 'no output'); }; # /SKIP }; -my $test_lei_common = sub { +test_lei(sub { + $home = $ENV{HOME}; + $home_trash = [ "$home/.local", "$home/.config", "$home/junk" ]; $test_help->(); $test_config->(); $test_init->(); $test_completion->(); $test_fail->(); -}; - -if ($ENV{TEST_LEI_ONESHOT}) { - require_ok 'PublicInbox::LEI'; - # force sun_path[108] overflow, ($lei->() filters out this path) - my $xrd = "$home/1shot-test".('.sun_path' x 108); - local $ENV{XDG_RUNTIME_DIR} = $xrd; - $err_filter = qr!\Q$xrd!; - $test_lei_common->(); -} else { -SKIP: { # real socket - skip $req_sendcmd, 115 if $req_sendcmd; - local $ENV{XDG_RUNTIME_DIR} = "$home/xdg_run"; - my $sock = "$ENV{XDG_RUNTIME_DIR}/lei/5.seq.sock"; - my $err_log = "$ENV{XDG_RUNTIME_DIR}/lei/errors.log"; - - ok($lei->('daemon-pid'), 'daemon-pid'); - is($err, '', 'no error from daemon-pid'); - like($out, qr/\A[0-9]+\n\z/s, 'pid returned') or BAIL_OUT; - chomp(my $pid = $out); - ok(kill(0, $pid), 'pid is valid'); - ok(-S $sock, 'sock created'); - - $test_lei_common->(); - is(-s $err_log, 0, 'nothing in errors.log'); - open my $efh, '>>', $err_log or BAIL_OUT $!; - print $efh "phail\n" or BAIL_OUT $!; - close $efh or BAIL_OUT $!; - - ok($lei->('daemon-pid'), 'daemon-pid'); - chomp(my $pid_again = $out); - is($pid, $pid_again, 'daemon-pid idempotent'); - like($err, qr/phail/, 'got mock "phail" error previous run'); - - ok($lei->(qw(daemon-kill)), 'daemon-kill'); - is($out, '', 'no output from daemon-kill'); - is($err, '', 'no error from daemon-kill'); - for (0..100) { - kill(0, $pid) or last; - tick(); - } - ok(-S $sock, 'sock still exists'); - ok(!kill(0, $pid), 'pid gone after stop'); - - ok($lei->(qw(daemon-pid)), 'daemon-pid'); - chomp(my $new_pid = $out); - ok(kill(0, $new_pid), 'new pid is running'); - ok(-S $sock, 'sock still exists'); - - for my $sig (qw(-0 -CHLD)) { - ok($lei->('daemon-kill', $sig), "handles $sig"); - } - is($out.$err, '', 'no output on innocuous signals'); - ok($lei->('daemon-pid'), 'daemon-pid'); - chomp $out; - is($out, $new_pid, 'PID unchanged after -0/-CHLD'); - - if ('socket inaccessible') { - chmod 0000, $sock or BAIL_OUT "chmod 0000: $!"; - ok($lei->('help'), 'connect fail, one-shot fallback works'); - like($err, qr/\bconnect\(/, 'connect error noted'); - like($out, qr/^usage: /, 'help output works'); - chmod 0700, $sock or BAIL_OUT "chmod 0700: $!"; - } - unlink $sock or BAIL_OUT "unlink($sock) $!"; - for (0..100) { - kill('CHLD', $new_pid) or last; - tick(); - } - ok(!kill(0, $new_pid), 'daemon exits after unlink'); - # success over socket, can't test without -}; # SKIP -} # else +}); done_testing;
The PublicInbox::Eml (and previously Email::MIME) use of confess was the primary (or only) culprit behind the lei2mail segfaults fixed by commit 0795b0906cc81f40. ("ds: guard against stack-not-refcounted quirk of Perl 5"). We never care about a backtrace when dealing with Eml objects anyways, so it was just a worthless waste of CPU cycles. We can also drop confess in a few other places. Since we only use Perl and Inline::C, users will never be without source and can replace s/croak/Carp::confess/ on a per-callsite basis to help report problems. It's also possible to use PERL5OPT=-MCarp=verbose in the environment though still potentially risky. Link: https://public-inbox.org/meta/20210201082833.3293-1-e@80x24.org/ --- lib/PublicInbox/DS.pm | 10 +++++----- lib/PublicInbox/Eml.pm | 4 ++-- lib/PublicInbox/IPC.pm | 2 +- lib/PublicInbox/OverIdx.pm | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/PublicInbox/DS.pm b/lib/PublicInbox/DS.pm index 263c3458..ec965abe 100644 --- a/lib/PublicInbox/DS.pm +++ b/lib/PublicInbox/DS.pm @@ -32,7 +32,7 @@ use Scalar::Util qw(blessed); use PublicInbox::Syscall qw(:epoll); use PublicInbox::Tmpfile; use Errno qw(EAGAIN EINVAL); -use Carp qw(confess carp); +use Carp qw(carp); our @EXPORT_OK = qw(now msg_more dwaitpid); my $nextq; # queue for next_tick @@ -335,9 +335,9 @@ retry: $ev &= ~EPOLLEXCLUSIVE; goto retry; } - die "couldn't add epoll watch for $fd: $!\n"; + die "EPOLL_CTL_ADD $self/$sock/$fd: $!"; } - confess("DescriptorMap{$fd} defined ($DescriptorMap{$fd})") + croak("FD:$fd in use by $DescriptorMap{$fd} (for $self/$sock)") if defined($DescriptorMap{$fd}); $DescriptorMap{$fd} = $self; @@ -368,7 +368,7 @@ sub close { # notifications about it my $fd = fileno($sock); epoll_ctl($Epoll, EPOLL_CTL_DEL, $fd, 0) and - confess("EPOLL_CTL_DEL: $!"); + croak("EPOLL_CTL_DEL($self/$sock): $!"); # we explicitly don't delete from DescriptorMap here until we # actually close the socket, as we might be in the middle of @@ -587,7 +587,7 @@ sub msg_more ($$) { sub epwait ($$) { my ($sock, $ev) = @_; epoll_ctl($Epoll, EPOLL_CTL_MOD, fileno($sock), $ev) and - confess("EPOLL_CTL_MOD $!"); + croak("EPOLL_CTL_MOD($sock): $!"); } # return true if complete, false if incomplete (or failure) diff --git a/lib/PublicInbox/Eml.pm b/lib/PublicInbox/Eml.pm index f7f62e7b..81a6632b 100644 --- a/lib/PublicInbox/Eml.pm +++ b/lib/PublicInbox/Eml.pm @@ -332,7 +332,7 @@ sub body_set { sub body_str_set { my ($self, $body_str) = @_; my $charset = ct($self)->{attributes}->{charset} or - Carp::confess('body_str was given, but no charset is defined'); + croak('body_str was given, but no charset is defined'); body_set($self, \(encode($charset, $body_str, Encode::FB_CROAK))); } @@ -454,7 +454,7 @@ sub body_str { if ($STR_TYPE{$ct->{type}} && $STR_SUBTYPE{$ct->{subtype}}) { return body($self); } - Carp::confess("can't get body as a string for ", + croak("can't get body as a string for ", join("\n\t", header_raw($self, 'Content-Type'))); } decode($charset, body($self), Encode::FB_CROAK); diff --git a/lib/PublicInbox/IPC.pm b/lib/PublicInbox/IPC.pm index a0e6bfee..0dee2a92 100644 --- a/lib/PublicInbox/IPC.pm +++ b/lib/PublicInbox/IPC.pm @@ -11,7 +11,7 @@ package PublicInbox::IPC; use strict; use v5.10.1; use parent qw(Exporter); -use Carp qw(confess croak); +use Carp qw(croak); use PublicInbox::DS qw(dwaitpid); use PublicInbox::Spawn; use PublicInbox::OnDestroy; diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm index 985c5473..9013ae23 100644 --- a/lib/PublicInbox/OverIdx.pm +++ b/lib/PublicInbox/OverIdx.pm @@ -456,7 +456,7 @@ sub dbh_close { sub create { my ($self) = @_; my $fn = $self->{filename} // do { - Carp::confess('BUG: no {filename}') unless $self->{dbh}; + croak('BUG: no {filename}') unless $self->{dbh}; return; }; unless (-r $fn) {
We only spawn one process to be reaped at the moment. tests will run the contents of script/* in the same process if possible, so any test scripts which spawn -httpd or other read-only can cause us to stall with waitpid(-1, ...) --- script/lei | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/script/lei b/script/lei index 40c21ad8..b7f21f14 100755 --- a/script/lei +++ b/script/lei @@ -14,13 +14,15 @@ my $send_cmd = PublicInbox::CmdIPC4->can('send_cmd4') // do { PublicInbox::Spawn->can('send_cmd4'); }; -sub sigchld { - my ($sig) = @_; - my $flags = $sig ? POSIX::WNOHANG() : 0; - while (waitpid(-1, $flags) > 0) {} -} +my %pids; +my $sigchld = sub { + my $flags = scalar(@_) ? POSIX::WNOHANG() : 0; + for my $pid (keys %pids) { + delete($pids{$pid}) if waitpid($pid, $flags) == $pid; + } +}; -sub exec_cmd { +my $exec_cmd = sub { my ($fds, $argc, @argv) = @_; my @old = (*STDIN{IO}, *STDOUT{IO}, *STDERR{IO}); my @rdr; @@ -29,7 +31,7 @@ sub exec_cmd { push @rdr, shift(@old), $tmpfh; } require POSIX; # WNOHANG - $SIG{CHLD} = \&sigchld; + $SIG{CHLD} = $sigchld; my $pid = fork // die "fork: $!"; if ($pid == 0) { my %env = map { split(/=/, $_, 2) } splice(@argv, $argc); @@ -38,9 +40,11 @@ sub exec_cmd { } %ENV = (%ENV, %env); exec(@argv); - die "exec: @argv: $!"; + warn "exec: @argv: $!\n"; + POSIX::_exit(1); } -} + $pids{$pid} = 1; +}; if ($send_cmd && eval { my $path = do { @@ -107,13 +111,13 @@ Falling back to (slow) one-shot mode } elsif ($buf =~ /\Achild_error ([0-9]+)\z/) { $x_it_code = $1 + 0; } elsif ($buf =~ /\Aexec (.+)\z/) { - exec_cmd(\@fds, split(/\0/, $1)); + $exec_cmd->(\@fds, split(/\0/, $1)); } else { - sigchld(); + $sigchld->(); die $buf; } } - sigchld(); + $sigchld->(); if (my $sig = ($x_it_code & 127)) { kill $sig, $$; sleep(1) while 1;
This can be useful for users who want to clone and mirror an existing public-inbox. This doesn't have update support, yet, so users will need to run "git fetch && public-inbox-index" for now. --- MANIFEST | 3 + contrib/completion/lei-completion.bash | 2 +- lib/PublicInbox/Admin.pm | 7 +- lib/PublicInbox/LEI.pm | 17 +- lib/PublicInbox/LeiCurl.pm | 65 ++++++ lib/PublicInbox/LeiExternal.pm | 28 ++- lib/PublicInbox/LeiMirror.pm | 288 +++++++++++++++++++++++++ lib/PublicInbox/LeiXSearch.pm | 33 +-- lib/PublicInbox/TestCommon.pm | 5 +- t/lei-mirror.t | 24 +++ 10 files changed, 427 insertions(+), 45 deletions(-) create mode 100644 lib/PublicInbox/LeiCurl.pm create mode 100644 lib/PublicInbox/LeiMirror.pm create mode 100644 t/lei-mirror.t diff --git a/MANIFEST b/MANIFEST index 52dea385..4236f87c 100644 --- a/MANIFEST +++ b/MANIFEST @@ -177,9 +177,11 @@ lib/PublicInbox/InputPipe.pm lib/PublicInbox/Isearch.pm lib/PublicInbox/KQNotify.pm lib/PublicInbox/LEI.pm +lib/PublicInbox/LeiCurl.pm lib/PublicInbox/LeiDedupe.pm lib/PublicInbox/LeiExternal.pm lib/PublicInbox/LeiImport.pm +lib/PublicInbox/LeiMirror.pm lib/PublicInbox/LeiOverview.pm lib/PublicInbox/LeiQuery.pm lib/PublicInbox/LeiSearch.pm @@ -357,6 +359,7 @@ t/kqnotify.t t/lei-daemon.t t/lei-externals.t t/lei-import.t +t/lei-mirror.t t/lei.t t/lei_dedupe.t t/lei_external.t diff --git a/contrib/completion/lei-completion.bash b/contrib/completion/lei-completion.bash index fbda474c..619805fb 100644 --- a/contrib/completion/lei-completion.bash +++ b/contrib/completion/lei-completion.bash @@ -5,7 +5,7 @@ # Needs a lot of work, see `lei__complete' in lib/PublicInbox::LEI.pm _lei() { case ${COMP_WORDS[@]} in - *' add-external http'*) + *' add-external h'* | *' --mirror h'*) compopt -o nospace ;; *) compopt +o nospace ;; # the default diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm index 3b38a5a3..b21fb241 100644 --- a/lib/PublicInbox/Admin.pm +++ b/lib/PublicInbox/Admin.pm @@ -273,8 +273,8 @@ EOM $idx->{nidx} // 0; # returns number processed } -sub progress_prepare ($) { - my ($opt) = @_; +sub progress_prepare ($;$) { + my ($opt, $dst) = @_; # public-inbox-index defaults to quiet, -xcpdb and -compact do not if (defined($opt->{quiet}) && $opt->{quiet} < 0) { @@ -286,7 +286,8 @@ sub progress_prepare ($) { $opt->{1} = $null; # suitable for spawn() redirect } else { $opt->{verbose} ||= 1; - $opt->{-progress} = sub { print STDERR @_ }; + $dst //= *STDERR{GLOB}; + $opt->{-progress} = sub { print $dst @_ }; } } diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 28ad88e7..bdeab7e3 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -98,6 +98,13 @@ sub _config_path ($) { .'/lei/config'); } +sub index_opt { + # TODO: drop underscore variants everywhere, they're undocumented + qw(fsync|sync! jobs|j=i indexlevel|index-level|L=s compact+ + max_size|max-size=s sequential_shard|sequential-shard + batch_size|batch-size=s skip-docdata quiet|q verbose|v+) +} + # TODO: generate shell completion + help using %CMD and %OPTDESC # command => [ positional_args, 1-line description, Getopt::Long option spec ] our %CMD = ( # sorted in order of importance/use: @@ -105,7 +112,7 @@ our %CMD = ( # sorted in order of importance/use: save-as=s output|mfolder|o=s format|f=s dedupe|d=s thread|t augment|a sort|s=s reverse|r offset=i remote! local! external! pretty include|I=s@ exclude=s@ only=s@ jobs|j=s globoff|g stdin| - mua-cmd|mua=s no-torsocks torsocks=s verbose|v quiet|q + mua-cmd|mua=s no-torsocks torsocks=s verbose|v+ quiet|q received-after=s received-before=s sent-after=s sent-since=s), PublicInbox::LeiQuery::curl_opt(), opt_dash('limit|n=i', '[0-9]+') ], @@ -115,7 +122,8 @@ our %CMD = ( # sorted in order of importance/use: 'add-external' => [ 'URL_OR_PATHNAME', 'add/set priority of a publicinbox|extindex for extra matches', - qw(boost=i quiet|q) ], + qw(boost=i c=s@ mirror=s no-torsocks torsocks=s inbox-version=i), + index_opt(), PublicInbox::LeiQuery::curl_opt() ], 'ls-external' => [ '[FILTER...]', 'list publicinbox|extindex locations', qw(format|f=s z|0 local remote quiet|q) ], 'forget-external' => [ 'URL_OR_PATHNAME...|--prune', @@ -204,7 +212,7 @@ my %OPTDESC = ( 'help|h' => 'show this built-in help', 'quiet|q' => 'be quiet', 'globoff|g' => "do not match locations using '*?' wildcards and '[]' ranges", -'verbose|v' => 'be more verbose', +'verbose|v+' => 'be more verbose', 'solve!' => 'do not attempt to reconstruct blobs from emails', 'torsocks=s' => ['auto|no|yes', 'whether or not to wrap git and curl commands with torsocks'], @@ -286,7 +294,7 @@ my %CONFIG_KEYS = ( 'leistore.dir' => 'top-level storage location', ); -my @WQ_KEYS = qw(lxs l2m imp); # internal workers +my @WQ_KEYS = qw(lxs l2m imp mrr); # internal workers # pronounced "exit": x_it(1 << 8) => exit(1); x_it(13) => SIGPIPE sub x_it ($$) { @@ -714,6 +722,7 @@ sub lei__complete { } puts $self, grep(/$re/, map { # generate short/long names if (s/[:=].+\z//) { # req/optional args, e.g output|o=i + } elsif (s/\+\z//) { # verbose|v+ } elsif (s/!\z//) { # negation: solve! => no-solve|solve s/([\w\-]+)/$1|no-$1/g diff --git a/lib/PublicInbox/LeiCurl.pm b/lib/PublicInbox/LeiCurl.pm new file mode 100644 index 00000000..c8747d4f --- /dev/null +++ b/lib/PublicInbox/LeiCurl.pm @@ -0,0 +1,65 @@ +# Copyright (C) 2021 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# common option and torsocks(1) wrapping for curl(1) +package PublicInbox::LeiCurl; +use strict; +use v5.10.1; +use PublicInbox::Spawn qw(which); +use PublicInbox::Config; + +# prepares a common command for curl(1) based on $lei command +sub new { + my ($cls, $lei, $curl) = @_; + $curl //= which('curl') // return $lei->fail('curl not found'); + my $opt = $lei->{opt}; + my @cmd = ($curl, qw(-Sf)); + $cmd[-1] .= 's' if $opt->{quiet}; # already the default for "lei q" + $cmd[-1] .= 'v' if $opt->{verbose}; # we use ourselves, too + for my $o ($lei->curl_opt) { + $o =~ s/\|[a-z0-9]\b//i; # remove single char short option + if ($o =~ s/=[is]@\z//) { + my $ary = $opt->{$o} or next; + push @cmd, map { ("--$o", $_) } @$ary; + } elsif ($o =~ s/=[is]\z//) { + my $val = $opt->{$o} // next; + push @cmd, "--$o", $val; + } elsif ($opt->{$o}) { + push @cmd, "--$o"; + } + } + push @cmd, '-v' if $opt->{verbose}; # lei uses this itself + bless \@cmd, $cls; +} + +sub torsocks { # useful for "git clone" and "git fetch", too + my ($self, $lei, $uri)= @_; + my $opt = $lei->{opt}; + $opt->{torsocks} = 'false' if $opt->{'no-torsocks'}; + my $torsocks = $opt->{torsocks} //= 'auto'; + if ($torsocks eq 'auto' && substr($uri->host, -6) eq '.onion' && + (($lei->{env}->{LD_PRELOAD}//'') !~ /torsocks/)) { + # "auto" continues anyways if torsocks is missing; + # a proxy may be specified via CLI, curlrc, + # environment variable, or even firewall rule + [ ($lei->{torsocks} //= which('torsocks')) // () ] + } elsif (PublicInbox::Config::git_bool($torsocks)) { + my $x = $lei->{torsocks} //= which('torsocks'); + $x or return $lei->fail(<<EOM); +--torsocks=yes specified but torsocks not found in PATH=$ENV{PATH} +EOM + [ $x ]; + } else { # the common case for current Internet :< + []; + } +} + +# completes the result of cmd() for $uri +sub for_uri { + my ($self, $lei, $uri) = @_; + my $pfx = torsocks($self, $lei, $uri) or return; # error + [ @$pfx, @$self, substr($uri->path, -3) eq '.gz' ? () : '--compressed', + $uri->as_string ] +} + +1; diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm index accacf1a..6a5c2517 100644 --- a/lib/PublicInbox/LeiExternal.pm +++ b/lib/PublicInbox/LeiExternal.pm @@ -88,19 +88,35 @@ sub get_externals { (); } -sub lei_add_external { +sub add_external_finish { my ($self, $location) = @_; my $cfg = $self->_lei_cfg(1); my $new_boost = $self->{opt}->{boost} // 0; - $location = ext_canonicalize($location); - if ($location !~ m!\Ahttps?://! && !-d $location) { - return $self->fail("$location not a directory"); - } my $key = "external.$location.boost"; my $cur_boost = $cfg->{$key}; return if defined($cur_boost) && $cur_boost == $new_boost; # idempotent $self->lei_config($key, $new_boost); - $self->_lei_store(1)->done; # just create the store +} + +sub lei_add_external { + my ($self, $location) = @_; + $self->_lei_store(1)->write_prepare($self); + my $new_boost = $self->{opt}->{boost} // 0; + $location = ext_canonicalize($location); + my $mirror = $self->{opt}->{mirror}; + if (defined($mirror) && -d $location) { + $self->fail(<<""); # TODO: did you mean "update-external?" +--mirror destination `$location' already exists + + } + if ($location !~ m!\Ahttps?://! && !-d $location) { + $mirror // return $self->fail("$location not a directory"); + $mirror = ext_canonicalize($mirror); + require PublicInbox::LeiMirror; + PublicInbox::LeiMirror->start($self, $mirror => $location); + } else { + add_external_finish($self, $location); + } } sub lei_forget_external { diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm new file mode 100644 index 00000000..bb172e6a --- /dev/null +++ b/lib/PublicInbox/LeiMirror.pm @@ -0,0 +1,288 @@ +# Copyright (C) 2021 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# "lei add-external --mirror" support +package PublicInbox::LeiMirror; +use strict; +use v5.10.1; +use parent qw(PublicInbox::IPC); +use IO::Uncompress::Gunzip qw(gunzip $GunzipError); +use PublicInbox::Spawn qw(popen_rd spawn); +use PublicInbox::PktOp; + +sub mirror_done { # EOF callback for main daemon + my ($lei) = @_; + my $mrr = delete $lei->{mrr}; + $mrr->wq_wait_old($lei) if $mrr; + # FIXME: check $? before finish + $lei->add_external_finish($mrr->{dst}); + $lei->dclose; +} + +# for old installations without manifest.js.gz +sub try_scrape { + my ($self) = @_; + my $uri = URI->new($self->{src}); + my $lei = $self->{lei}; + my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return; + my $cmd = $curl->for_uri($lei, $uri); + my $opt = { 0 => $lei->{0}, 2 => $lei->{2} }; + my $fh = popen_rd($cmd, $lei->{env}, $opt); + my $html = do { local $/; <$fh> } // die "read(curl $uri): $!"; + close($fh) or return $lei->child_error($?, "@$cmd failed"); + + # we grep with URL below, we don't want Subject/From headers + # making us clone random URLs + my @urls = ($html =~ m!\bgit clone --mirror ([a-z\+]+://\S+)!g); + my $url = $uri->as_string; + chop($url) eq '/' or die "BUG: $uri not canonicalized"; + + # since this is for old instances w/o manifest.js.gz, try v1 first + return clone_v1($self) if grep(m!\A\Q$url\E/*\z!, @urls); + if (my @v2_urls = grep(m!\A\Q$url\E/[0-9]+\z!, @urls)) { + my %v2_uris = map { $_ => URI->new($_) } @v2_urls; # uniq + return clone_v2($self, [ values %v2_uris ]); + } + + # filter out common URLs served by WWW (e.g /$MSGID/T/) + if (@urls && $url =~ s!/+[^/]+\@[^/]+/.*\z!! && + grep(m!\A\Q$url\E/*\z!, @urls)) { + die <<""; +E: confused by scraping <$uri>, did you mean <$url>? + + } + @urls and die <<""; +E: confused by scraping <$uri>, got ambiguous results: +@urls + + die "E: scraping <$uri> revealed nothing\n"; +} + +sub clone_cmd { + my ($lei) = @_; + my @cmd = qw(git); + # we support "-c $key=$val" for arbitrary git config options + # e.g.: git -c http.proxy=socks5h://127.0.0.1:9050 + push(@cmd, '-c', $_) for @{$lei->{opt}->{c} // []}; + push @cmd, qw(clone --mirror); + push @cmd, '-q' if $lei->{opt}->{quiet}; + push @cmd, '-v' if $lei->{opt}->{verbose}; + # XXX any other options to support? + # --reference is tricky with multiple epochs... + @cmd; +} + +# tries the relatively new /$INBOX/_/text/config/raw endpoint +sub _try_config { + my ($self) = @_; + my $dst = $self->{dst}; + if (!-d $dst || !mkdir($dst)) { + require File::Path; + File::Path::mkpath($dst); + -d $dst or die "mkpath($dst): $!\n"; + } + my $uri = URI->new($self->{src}); + my $lei = $self->{lei}; + my $path = $uri->path; + chop($path) eq '/' or die "BUG: $uri not canonicalized"; + $uri->path($path . '/_/text/config/raw'); + my $cmd = $self->{curl}->for_uri($lei, $uri); + push @$cmd, '--compressed'; # curl decompresses for us + my $ce = "$dst/inbox.config.example"; + my $f = "$ce-$$.tmp"; + open(my $fh, '+>', $f) or return $lei->err("open $f: $! (non-fatal)"); + my $opt = { 0 => $lei->{0}, 1 => $fh, 2 => $lei->{2} }; + $lei->qerr("# @$cmd"); + my $pid = spawn($cmd, $lei->{env}, $opt); + waitpid($pid, 0) == $pid or return $lei->err("waitpid @$cmd: $!"); + if (($? >> 8) == 22) { # 404 missing + unlink($f) if -s $fh == 0; + return; + } + return $lei->err("# @$cmd failed (non-fatal)") if $?; + rename($f, $ce) or return $lei->err("link($f, $ce): $! (non-fatal)"); + my $cfg = PublicInbox::Config::git_config_dump($f); + my $ibx = $self->{ibx} = {}; + for my $sec (grep(/\Apublicinbox\./, @{$cfg->{-section_order}})) { + for (qw(address newsgroup nntpmirror)) { + $ibx->{$_} = $cfg->{"$sec.$_"}; + } + } +} + +sub index_cloned_inbox { + my ($self, $iv) = @_; + my $ibx = delete($self->{ibx}) // { + address => [ 'lei@example.com' ], + version => $iv, + }; + $ibx->{inboxdir} = $self->{dst}; + PublicInbox::Inbox->new($ibx); + PublicInbox::InboxWritable->new($ibx); + my $opt = {}; + my $lei = $self->{lei}; + for my $sw ($lei->index_opt) { + my ($k) = ($sw =~ /\A([\w-]+)/); + $opt->{$k} = $lei->{opt}->{$k}; + } + # force synchronous dwaitpid for v2: + local $PublicInbox::DS::in_loop = 0; + my $cfg = PublicInbox::Config->new; + my $env = PublicInbox::Admin::index_prepare($opt, $cfg); + local %ENV = (%ENV, %$env) if $env; + PublicInbox::Admin::progress_prepare($opt, $lei->{2}); + PublicInbox::Admin::index_inbox($ibx, undef, $opt); +} + +sub clone_v1 { + my ($self) = @_; + my $lei = $self->{lei}; + my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return; + my $uri = URI->new($self->{src}); + my $pfx = $curl->torsocks($lei, $uri) or return; + my $cmd = [ @$pfx, clone_cmd($lei), $uri->as_string, $self->{dst} ]; + $lei->qerr("# @$cmd"); + my $pid = spawn($cmd, $lei->{env}, $lei); + waitpid($pid, 0) == $pid or die "BUG: waitpid @$cmd: $!"; + $? == 0 or return $lei->child_error($?, "@$cmd failed"); + _try_config($self); + index_cloned_inbox($self, 1); +} + +sub clone_v2 { + my ($self, $v2_uris) = @_; + my $lei = $self->{lei}; + my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return; + my $pfx //= $curl->torsocks($lei, $v2_uris->[0]) or return; + my @epochs; + my $dst = $self->{dst}; + my @src_edst; + for my $uri (@$v2_uris) { + my $src = $uri->as_string; + my $edst = $dst; + $src =~ m!/([0-9]+)(?:\.git)?\z! or die <<""; +failed to extract epoch number from $src + + my $nr = $1 + 0; + $edst .= "/git/$nr.git"; + push @src_edst, [ $src, $edst ]; + } + my $lk = bless { lock_path => "$dst/inbox.lock" }, 'PublicInbox::Lock'; + _try_config($self); + my $on_destroy = $lk->lock_for_scope($$); + my @cmd = clone_cmd($lei); + while (my $pair = shift(@src_edst)) { + my $cmd = [ @$pfx, @cmd, @$pair ]; + $lei->qerr("# @$cmd"); + my $pid = spawn($cmd, $lei->{env}, $lei); + waitpid($pid, 0) == $pid or die "BUG: waitpid @$cmd: $!"; + $? == 0 or return $lei->child_error($?, "@$cmd failed"); + } + undef $on_destroy; # unlock + index_cloned_inbox($self, 2); +} + +sub try_manifest { + my ($self) = @_; + my $uri = URI->new($self->{src}); + my $lei = $self->{lei}; + my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return; + my $path = $uri->path; + chop($path) eq '/' or die "BUG: $uri not canonicalized"; + $uri->path($path . '/manifest.js.gz'); + my $cmd = $curl->for_uri($lei, $uri); + $lei->qerr("# @$cmd"); + my $opt = { 0 => $lei->{0}, 2 => $lei->{2} }; + my $fh = popen_rd($cmd, $lei->{env}, $opt); + my $gz = do { local $/; <$fh> } // die "read(curl $uri): $!"; + unless (close $fh) { + return try_scrape($self) if ($? >> 8) == 22; # 404 missing + return $lei->child_error($?, "@$cmd failed"); + } + my $js; + gunzip(\$gz => \$js, MultiStream => 1) or + die "gunzip($uri): $GunzipError"; + my $m = eval { PublicInbox::Config->json->decode($js) }; + die "$uri: error decoding `$js': $@" if $@; + ref($m) eq 'HASH' or die "$uri unknown type: ".ref($m); + + my $v1_bare = $m->{$path}; + my @v2_epochs = grep(m!\A\Q$path\E/git/[0-9]+\.git\z!, keys %$m); + if (@v2_epochs) { + # It may be possible to have v1 + v2 in parallel someday: + $lei->err(<<EOM) if defined $v1_bare; +# `$v1_bare' appears to be a v1 inbox while v2 epochs exist: +# @v2_epochs +# ignoring $v1_bare (use --inbox-version=1 to force v1 instead) +EOM + @v2_epochs = map { $uri->path($_); $uri->clone } @v2_epochs; + clone_v2($self, \@v2_epochs); + } elsif ($v1_bare) { + clone_v1($self); + } elsif (my @maybe = grep(m!\Q$path\E!, keys %$m)) { + die "E: confused by <$uri>, possible matches:\n@maybe"; + } else { + die "E: confused by <$uri>"; + } +} + +sub start_clone_url { + my ($self) = @_; + return try_manifest($self) if $self->{src} =~ m!\Ahttps?://!; + die "TODO: non-HTTP/HTTPS clone of $self->{src} not supported, yet"; +} + +sub do_mirror { # via wq_do + my ($self) = @_; + my $lei = $self->{lei}; + eval { + my $iv = $lei->{opt}->{'inbox-version'}; + if (defined $iv) { + return clone_v1($self) if $iv == 1; + return try_scrape($self) if $iv == 2; + die "bad --inbox-version=$iv\n"; + } + return start_clone_url($self) if $self->{src} =~ m!://!; + die "TODO: cloning local directories not supported, yet"; + }; + return $lei->fail($@) if $@; + $lei->qerr("# mirrored $self->{src} => $self->{dst}"); +} + +sub start { + my ($cls, $lei, $src, $dst) = @_; + my $self = bless { lei => $lei, src => $src, dst => $dst }, $cls; + $lei->{mrr} = $self; + if ($src =~ m!https?://!) { + require URI; + require PublicInbox::LeiCurl; + } + require PublicInbox::Lock; + require PublicInbox::Inbox; + require PublicInbox::Admin; + require PublicInbox::InboxWritable; + my $ops = { + '!' => [ $lei->can('fail_handler'), $lei ], + 'x_it' => [ $lei->can('x_it'), $lei ], + 'child_error' => [ $lei->can('child_error'), $lei ], + '' => [ \&mirror_done, $lei ], + }; + ($lei->{pkt_op_c}, $lei->{pkt_op_p}) = PublicInbox::PktOp->pair($ops); + $self->wq_workers_start('lei_mirror', 1, $lei->oldset, {lei => $lei}); + my $op = delete $lei->{pkt_op_c}; + delete $lei->{pkt_op_p}; + $self->wq_do('do_mirror', []); + $self->wq_close(1); + $lei->event_step_init; # wait for shutdowns + if ($lei->{oneshot}) { + while ($op->{sock}) { $op->event_step } + } +} + +sub ipc_atfork_child { + my ($self) = @_; + $self->{lei}->lei_atfork_child; + $self->SUPER::ipc_atfork_child; +} + +1; diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index f8068362..1e5d7ca6 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -212,7 +212,6 @@ sub query_remote_mboxrd { my ($opt, $env) = @$lei{qw(opt env)}; my @qform = (q => $lei->{mset_opt}->{qstr}, x => 'm'); push(@qform, t => 1) if $opt->{thread}; - my @cmd = ($self->{curl}, qw(-sSf -d), ''); my $verbose = $opt->{verbose}; my $reap; my $cerr = File::Temp->new(TEMPLATE => 'curl.err-XXXX', TMPDIR => 1); @@ -223,43 +222,18 @@ sub query_remote_mboxrd { # spawn a process to force line-buffering, otherwise curl # will write 1 character at-a-time and parallel outputs # mmmaaayyy llloookkk llliiikkkeee ttthhhiiisss - push @cmd, '-v'; my $o = { 1 => $lei->{2}, 2 => $lei->{2} }; my $pid = spawn(['tail', '-f', $cerr->filename], undef, $o); $reap = PublicInbox::OnDestroy->new(\&kill_reap, $pid); } - for my $o ($lei->curl_opt) { - $o =~ s/\|[a-z0-9]\b//i; # remove single char short option - if ($o =~ s/=[is]@\z//) { - my $ary = $opt->{$o} or next; - push @cmd, map { ("--$o", $_) } @$ary; - } elsif ($o =~ s/=[is]\z//) { - my $val = $opt->{$o} // next; - push @cmd, "--$o", $val; - } elsif ($opt->{$o}) { - push @cmd, "--$o"; - } - } - $opt->{torsocks} = 'false' if $opt->{'no-torsocks'}; - my $tor = $opt->{torsocks} //= 'auto'; + my $curl = PublicInbox::LeiCurl->new($lei, $self->{curl}) or return; + push @$curl, '-s', '-d', ''; my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei); for my $uri (@$uris) { $lei->{-current_url} = $uri->as_string; $lei->{-nr_remote_eml} = 0; $uri->query_form(@qform); - my $cmd = [ @cmd, $uri->as_string ]; - if ($tor eq 'auto' && substr($uri->host, -6) eq '.onion' && - (($env->{LD_PRELOAD}//'') !~ /torsocks/)) { - unshift @$cmd, which('torsocks'); - } elsif (PublicInbox::Config::git_bool($tor)) { - unshift @$cmd, which('torsocks'); - } - - # continue anyways if torsocks is missing; a proxy may be - # specified via CLI, curlrc, environment variable, or even - # firewall rule - shift(@$cmd) if !$cmd->[0]; - + my $cmd = $curl->for_uri($lei, $uri); $lei->err("# @$cmd") if $verbose; my ($fh, $pid) = popen_rd($cmd, $env, $rdr); $fh = IO::Uncompress::Gunzip->new($fh); @@ -440,6 +414,7 @@ sub add_uri { if (my $curl = $self->{curl} //= which('curl') // 0) { require PublicInbox::MboxReader; require IO::Uncompress::Gunzip; + require PublicInbox::LeiCurl; push @{$self->{remotes}}, $uri; } else { warn "curl missing, ignoring $uri\n"; diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm index c861dc5d..5cce44e4 100644 --- a/lib/PublicInbox/TestCommon.pm +++ b/lib/PublicInbox/TestCommon.pm @@ -461,8 +461,9 @@ SKIP: { Socket::MsgHdr missing or Inline::C is unconfigured/missing EOM $lei_opt = { 1 => \$lei_out, 2 => \$lei_err }; - my $daemon_pid; - my ($tmpdir, $for_destroy) = tmpdir(); + my ($daemon_pid, $for_destroy); + my $tmpdir = $test_opt->{tmpdir}; + ($tmpdir, $for_destroy) = tmpdir unless $tmpdir; SKIP: { skip 'TEST_LEI_ONESHOT set', 1 if $ENV{TEST_LEI_ONESHOT}; my $home = "$tmpdir/lei-daemon"; diff --git a/t/lei-mirror.t b/t/lei-mirror.t new file mode 100644 index 00000000..cf34c7ae --- /dev/null +++ b/t/lei-mirror.t @@ -0,0 +1,24 @@ +#!perl -w +# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use strict; use v5.10.1; use PublicInbox::TestCommon; +my $sock = tcp_server(); +my ($tmpdir, $for_destroy) = tmpdir(); +my $http = 'http://'.$sock->sockhost.':'.$sock->sockport.'/'; +my ($ro_home, $cfg_path) = setup_public_inboxes; +my $cmd = [ qw(-httpd -W0), "--stdout=$tmpdir/out", "--stderr=$tmpdir/err" ]; +my $td = start_script($cmd, { PI_CONFIG => $cfg_path }, { 3 => $sock }); +test_lei({ tmpdir => $tmpdir }, sub { + my $home = $ENV{HOME}; + my $t1 = "$home/t1-mirror"; + ok($lei->('add-external', $t1, '--mirror', "$http/t1/"), '--mirror v1'); + ok(-f "$t1/public-inbox/msgmap.sqlite3", 't1-mirror indexed'); + my $t2 = "$home/t2-mirror"; + ok($lei->('add-external', $t2, '--mirror', "$http/t2/"), '--mirror v2'); + ok(-f "$t2/msgmap.sqlite3", 't2-mirror indexed'); +}); + +ok($td->kill, 'killed -httpd'); +$td->join; + +done_testing;
We'll reword and improve formatting with non-breaking spaces ("\xa0") which is only replaced with SP after wrapping. Some terminology is shortened (e.g. "URL_OR_PATHNAME" => "LOCATION") to improve formatting. This also enables completion for -h/--help and lets us prioritize favored switch names while attempting to satisfy users relying on muscle memory from other tools. --- MANIFEST | 1 + lib/PublicInbox/LEI.pm | 167 +++++++++++++------------------------ lib/PublicInbox/LeiHelp.pm | 100 ++++++++++++++++++++++ 3 files changed, 160 insertions(+), 108 deletions(-) create mode 100644 lib/PublicInbox/LeiHelp.pm diff --git a/MANIFEST b/MANIFEST index 4236f87c..521f1f68 100644 --- a/MANIFEST +++ b/MANIFEST @@ -180,6 +180,7 @@ lib/PublicInbox/LEI.pm lib/PublicInbox/LeiCurl.pm lib/PublicInbox/LeiDedupe.pm lib/PublicInbox/LeiExternal.pm +lib/PublicInbox/LeiHelp.pm lib/PublicInbox/LeiImport.pm lib/PublicInbox/LeiMirror.pm lib/PublicInbox/LeiOverview.pm diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index bdeab7e3..3098ade7 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -23,7 +23,6 @@ use PublicInbox::Sigfd; use PublicInbox::DS qw(now dwaitpid); use PublicInbox::Spawn qw(spawn popen_rd); use PublicInbox::OnDestroy; -use Text::Wrap qw(wrap); use Time::HiRes qw(stat); # ctime comparisons for config cache use File::Path qw(mkpath); use File::Spec; @@ -100,33 +99,34 @@ sub _config_path ($) { sub index_opt { # TODO: drop underscore variants everywhere, they're undocumented - qw(fsync|sync! jobs|j=i indexlevel|index-level|L=s compact+ + qw(fsync|sync! jobs|j=i indexlevel|L=s compact max_size|max-size=s sequential_shard|sequential-shard - batch_size|batch-size=s skip-docdata quiet|q verbose|v+) + batch_size|batch-size=s skip-docdata) } -# TODO: generate shell completion + help using %CMD and %OPTDESC +# we generate shell completion + help using %CMD and %OPTDESC, +# see lei__complete() and PublicInbox::LeiHelp # command => [ positional_args, 1-line description, Getopt::Long option spec ] our %CMD = ( # sorted in order of importance/use: 'q' => [ '--stdin|SEARCH_TERMS...', 'search for messages matching terms', qw( save-as=s output|mfolder|o=s format|f=s dedupe|d=s thread|t augment|a sort|s=s reverse|r offset=i remote! local! external! pretty include|I=s@ exclude=s@ only=s@ jobs|j=s globoff|g stdin| - mua-cmd|mua=s no-torsocks torsocks=s verbose|v+ quiet|q - received-after=s received-before=s sent-after=s sent-since=s), + mua-cmd|mua=s no-torsocks torsocks=s verbose|v+ quiet|q), PublicInbox::LeiQuery::curl_opt(), opt_dash('limit|n=i', '[0-9]+') ], 'show' => [ 'MID|OID', 'show a given object (Message-ID or object ID)', qw(type=s solve! format|f=s dedupe|d=s thread|t remote local!), pass_through('git show') ], -'add-external' => [ 'URL_OR_PATHNAME', +'add-external' => [ 'LOCATION', 'add/set priority of a publicinbox|extindex for extra matches', qw(boost=i c=s@ mirror=s no-torsocks torsocks=s inbox-version=i), + qw(quiet|q verbose|v+), index_opt(), PublicInbox::LeiQuery::curl_opt() ], 'ls-external' => [ '[FILTER...]', 'list publicinbox|extindex locations', qw(format|f=s z|0 local remote quiet|q) ], -'forget-external' => [ 'URL_OR_PATHNAME...|--prune', +'forget-external' => [ 'LOCATION...|--prune', 'exclude further results from a publicinbox|extindex', qw(prune quiet|q) ], @@ -145,21 +145,20 @@ our %CMD = ( # sorted in order of importance/use: "exclude message(s) on stdin from `q' search results", qw(stdin| oid=s exact by-mid|mid:s quiet|q) ], -'purge-mailsource' => [ 'URL_OR_PATHNAME|--all', +'purge-mailsource' => [ 'LOCATION|--all', 'remove imported messages from IMAP, Maildirs, and MH', qw(exact! all jobs:i indexed) ], # code repos are used for `show' to solve blobs from patch mails -'add-coderepo' => [ 'PATHNAME', 'add or set priority of a git code repo', +'add-coderepo' => [ 'DIRNAME', 'add or set priority of a git code repo', qw(boost=i) ], 'ls-coderepo' => [ '[FILTER_TERMS...]', 'list known code repos', qw(format|f=s z) ], -'forget-coderepo' => [ 'PATHNAME', +'forget-coderepo' => [ 'DIRNAME', 'stop using repo to solve blobs from patches', qw(prune) ], -'add-watch' => [ '[URL_OR_PATHNAME]', - 'watch for new messages and flag changes', +'add-watch' => [ 'LOCATION', 'watch for new messages and flag changes', qw(import! kw|keywords|flags! interval=s recursive|r exclude=s include=s) ], 'ls-watch' => [ '[FILTER...]', 'list active watches with numbers and status', @@ -169,7 +168,7 @@ our %CMD = ( # sorted in order of importance/use: 'forget-watch' => [ '{WATCH_NUMBER|--prune}', 'stop and forget a watch', qw(prune) ], -'import' => [ 'URLS_OR_PATHNAMES...|--stdin', +'import' => [ 'LOCATION...|--stdin', 'one-time import/update from URL or filesystem', qw(stdin| offset=i recursive|r exclude=s include|I=s format|f=s kw|keywords|flags!), @@ -179,8 +178,8 @@ our %CMD = ( # sorted in order of importance/use: 'git-config(1) wrapper for '._config_path($_[0]); }, qw(config-file|system|global|file|f=s), # for conflict detection pass_through('git config') ], -'init' => [ '[PATHNAME]', sub { - 'initialize storage, default: '._store_path($_[0]); +'init' => [ '[DIRNAME]', sub { + "initialize storage, default: "._store_path($_[0]); }, qw(quiet|q) ], 'daemon-kill' => [ '[-SIGNAL]', 'signal the lei-daemon', opt_dash('signal|s=s', '[0-9]+|(?:[A-Z][A-Z0-9]+)') ], @@ -208,43 +207,66 @@ my $stdin_formats = [ 'MAIL_FORMAT|eml|mboxrd|mboxcl2|mboxcl|mboxo', 'specify message input format' ]; my $ls_format = [ 'OUT|plain|json|null', 'listing output format' ]; +# we use \x{a0} (non-breaking SP) to avoid wrapping in PublicInbox::LeiHelp my %OPTDESC = ( 'help|h' => 'show this built-in help', 'quiet|q' => 'be quiet', -'globoff|g' => "do not match locations using '*?' wildcards and '[]' ranges", +'globoff|g' => "do not match locations using '*?' wildcards ". + "and\xa0'[]'\x{a0}ranges", 'verbose|v+' => 'be more verbose', 'solve!' => 'do not attempt to reconstruct blobs from emails', -'torsocks=s' => ['auto|no|yes', +'torsocks=s' => ['VAL|auto|no|yes', 'whether or not to wrap git and curl commands with torsocks'], 'no-torsocks' => 'alias for --torsocks=no', 'save-as=s' => ['NAME', 'save a search terms by given name'], 'type=s' => [ 'any|mid|git', 'disambiguate type' ], -'dedupe|d=s' => ['STRAT|content|oid|mid|none', +'dedupe|d=s' => ['STRATEGY|content|oid|mid|none', 'deduplication strategy'], 'show thread|t' => 'display entire thread a message belongs to', 'q thread|t' => 'return all messages in the same thread as the actual match(es)', 'augment|a' => 'augment --output destination instead of clobbering', -'output|mfolder|o=s' => [ 'DEST', - "destination (e.g. `/path/to/Maildir', or `-' for stdout)" ], -'mua-cmd|mua=s' => [ 'COMMAND', - "MUA to run on --output Maildir or mbox (e.g. `mutt -f %f'" ], +'output|mfolder|o=s' => [ 'MFOLDER', + "destination (e.g.\xa0`/path/to/Maildir', ". + "or\xa0`-'\x{a0}for\x{a0}stdout)" ], +'mua-cmd|mua=s' => [ 'CMD', + "MUA to run on --output Maildir or mbox (e.g.\xa0`mutt\xa0-f\xa0%f')" ], 'show format|f=s' => [ 'OUT|plain|raw|html|mboxrd|mboxcl2|mboxcl', 'message/object output format' ], 'mark format|f=s' => $stdin_formats, 'forget format|f=s' => $stdin_formats, + +'add-external inbox-version=i' => [ 'NUM|1|2', + 'force a public-inbox version with --mirror'], +'add-external mirror=s' => [ 'URL', 'mirror a public-inbox'], + +# public-inbox-index options +'add-external jobs|j=i' => 'set parallelism when indexing after --mirror', +'fsync!' => 'speed up indexing after --mirror, risk index corruption', +'compact' => 'run compact index after mirroring', +'indexlevel|L=s' => [ 'LEVEL|full|medium|basic', + "indexlevel with --mirror (default: full)" ], +'max_size|max-size=s' => [ 'SIZE', + 'do not index messages larger than SIZE (default: infinity)' ], +'batch_size|batch-size=s' => [ 'SIZE', + 'flush changes to OS after given number of bytes (default: 1m)' ], +'sequential_shard|sequential-shard' => + 'index Xapian shards sequentially for slow storage', +'skip-docdata' => + 'drop compatibility w/ public-inbox <1.6 to save ~1.5% space', + 'q format|f=s' => [ 'OUT|maildir|mboxrd|mboxcl2|mboxcl|mboxo|html|json|jsonl|concatjson', 'specify output format, default depends on --output'], -'q exclude=s@' => [ 'URL_OR_PATHNAME', +'q exclude=s@' => [ 'LOCATION', 'exclude specified external(s) from search' ], -'q include|I=s@' => [ 'URL_OR_PATHNAME', +'q include|I=s@' => [ 'LOCATION', 'include specified external(s) in search' ], -'q only=s@' => [ 'URL_OR_PATHNAME', +'q only=s@' => [ 'LOCATION', 'only use specified external(s) for search' ], 'q jobs=s' => [ '[SEARCH_JOBS][,WRITER_JOBS]', @@ -258,9 +280,9 @@ my %OPTDESC = ( 'limit|n=i@' => ['NUM', 'limit on number of matches (default: 10000)' ], 'offset=i' => ['OFF', 'search result offset (default: 0)'], -'sort|s=s' => [ 'VAL|received,relevance,docid', - "order of results `--output'-dependent"], -'reverse|r' => [ 'reverse search results' ], # like sort(1) +'sort|s=s' => [ 'VAL|received|relevance|docid', + "order of results is `--output'-dependent"], +'reverse|r' => 'reverse search results', # like sort(1) 'boost=i' => 'increase/decrease priority of results (default: 0)', @@ -280,7 +302,6 @@ my %OPTDESC = ( 'exact!' => 'rely on content match instead of exact header matches', 'by-mid|mid:s' => [ 'MID', 'match only by Message-ID, ignoring contents' ], -'jobs:i' => 'set parallelism level', 'kw|keywords|flags!' => 'disable/enable importing flags', @@ -415,86 +436,15 @@ sub lei_atfork_child { $current_lei = $persist ? undef : $self; # for SIG{__WARN__} } -sub _help ($;$) { - my ($self, $errmsg) = @_; - my $cmd = $self->{cmd} // 'COMMAND'; - my @info = @{$CMD{$cmd} // [ '...', '...' ]}; - my @top = ($cmd, shift(@info) // ()); - my $cmd_desc = shift(@info); - $cmd_desc = $cmd_desc->($self) if ref($cmd_desc) eq 'CODE'; - my @opt_desc; - my $lpad = 2; - for my $sw (grep { !ref } @info) { # ("prio=s", "z", $GLP_PASS) - my $desc = $OPTDESC{"$cmd\t$sw"} // $OPTDESC{$sw} // next; - my $arg_vals = ''; - ($arg_vals, $desc) = @$desc if ref($desc) eq 'ARRAY'; - - # lower-case is a keyword (e.g. `content', `oid'), - # ALL_CAPS is a string description (e.g. `PATH') - if ($desc !~ /default/ && $arg_vals =~ /\b([a-z]+)[,\|]/) { - $desc .= "\ndefault: `$1'"; - } - my (@vals, @s, @l); - my $x = $sw; - if ($x =~ s/!\z//) { # solve! => --no-solve - $x =~ s/(\A|\|)/$1no-/g - } elsif ($x =~ s/:.+//) { # optional args: $x = "mid:s" - @vals = (' [', undef, ']'); - } elsif ($x =~ s/=.+//) { # required arg: $x = "type=s" - @vals = (' ', undef); - } # else: no args $x = 'thread|t' - for (split(/\|/, $x)) { # help|h - length($_) > 1 ? push(@l, "--$_") : push(@s, "-$_"); - } - if (!scalar(@vals)) { # no args 'thread|t' - } elsif ($arg_vals =~ s/\A([A-Z_]+)\b//) { # "NAME" - $vals[1] = $1; - } else { - $vals[1] = uc(substr($l[0], 2)); # "--type" => "TYPE" - } - if ($arg_vals =~ /([,\|])/) { - my $sep = $1; - my @allow = split(/\Q$sep\E/, $arg_vals); - my $must = $sep eq '|' ? 'Must' : 'Can'; - @allow = map { "`$_'" } @allow; - my $last = pop @allow; - $desc .= "\n$must be one of: " . - join(', ', @allow) . " or $last"; - } - my $lhs = join(', ', @s, @l) . join('', @vals); - if ($x =~ /\|\z/) { # "stdin|" or "clear|" - $lhs =~ s/\A--/- , --/; - } else { - $lhs =~ s/\A--/ --/; # pad if no short options - } - $lpad = length($lhs) if length($lhs) > $lpad; - push @opt_desc, $lhs, $desc; - } - my $msg = $errmsg ? "E: $errmsg\n" : ''; - $msg .= <<EOF; -usage: lei @top - $cmd_desc - -EOF - $lpad += 2; - local $Text::Wrap::columns = 78 - $lpad; - my $padding = ' ' x ($lpad + 2); - while (my ($lhs, $rhs) = splice(@opt_desc, 0, 2)) { - $msg .= ' '.pack("A$lpad", $lhs); - $rhs = wrap('', '', $rhs); - $rhs =~ s/\n/\n$padding/sg; # LHS pad continuation lines - $msg .= $rhs; - $msg .= "\n"; - } - my $out = $self->{$errmsg ? 2 : 1}; - start_pager($self) if -t $out; - print $out $msg; - x_it($self, $errmsg ? 1 << 8 : 0); # stderr => failure - undef; +sub _help { + require PublicInbox::LeiHelp; + PublicInbox::LeiHelp::call($_[0], $_[1], \%CMD, \%OPTDESC); } sub optparse ($$$) { my ($self, $cmd, $argv) = @_; + # allow _complete --help to complete, not show help + return 1 if substr($cmd, 0, 1) eq '_'; $self->{cmd} = $cmd; $OPT = $self->{opt} = {}; my $info = $CMD{$cmd} // [ '[...]' ]; @@ -720,7 +670,8 @@ sub lei__complete { get-color-name get-colorbool); # fall-through } - puts $self, grep(/$re/, map { # generate short/long names + # generate short/long names from Getopt::Long specs + puts $self, grep(/$re/, qw(--help -h), map { if (s/[:=].+\z//) { # req/optional args, e.g output|o=i } elsif (s/\+\z//) { # verbose|v+ } elsif (s/!\z//) { @@ -730,7 +681,7 @@ sub lei__complete { map { my $x = length > 1 ? "--$_" : "-$_"; $x eq $cur ? () : $x; - } split(/\|/, $_, -1) # help|h + } grep(!/_/, split(/\|/, $_, -1)) # help|h } grep { $OPTDESC{"$cmd\t$_"} || $OPTDESC{$_} } @spec); } elsif ($cmd eq 'config' && !@argv && !$CONFIG_KEYS{$cur}) { puts $self, grep(/$re/, keys %CONFIG_KEYS); diff --git a/lib/PublicInbox/LeiHelp.pm b/lib/PublicInbox/LeiHelp.pm new file mode 100644 index 00000000..43414ab4 --- /dev/null +++ b/lib/PublicInbox/LeiHelp.pm @@ -0,0 +1,100 @@ +# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# -h/--help support for lei +package PublicInbox::LeiHelp; +use strict; +use v5.10.1; +use Text::Wrap qw(wrap); + +my %NOHELP = map { $_ => 1 } qw(mua-cmd mfolder); + +sub call { + my ($self, $errmsg, $CMD, $OPTDESC) = @_; + my $cmd = $self->{cmd} // 'COMMAND'; + my @info = @{$CMD->{$cmd} // [ '...', '...' ]}; + my @top = ($cmd, shift(@info) // ()); + my $cmd_desc = shift(@info); + $cmd_desc = $cmd_desc->($self) if ref($cmd_desc) eq 'CODE'; + $cmd_desc =~ s/default: /default:\xa0/; + my @opt_desc; + my $lpad = 2; + for my $sw (grep { !ref } @info) { # ("prio=s", "z", $GLP_PASS) + my $desc = $OPTDESC->{"$cmd\t$sw"} // $OPTDESC->{$sw} // next; + my $arg_vals = ''; + ($arg_vals, $desc) = @$desc if ref($desc) eq 'ARRAY'; + + # lower-case is a keyword (e.g. `content', `oid'), + # ALL_CAPS is a string description (e.g. `PATH') + if ($desc !~ /default/ && $arg_vals =~ /\b([a-z]+)[,\|]/) { + $desc .= " (default:\xa0`$1')"; + } else { + $desc =~ s/default: /default:\xa0/; + } + my (@vals, @s, @l); + my $x = $sw; + if ($x =~ s/!\z//) { # solve! => --no-solve + $x =~ s/(\A|\|)/$1no-/g + } elsif ($x =~ s/\+\z//) { # verbose|v+ + } elsif ($x =~ s/:.+//) { # optional args: $x = "mid:s" + @vals = (' [', undef, ']'); + } elsif ($x =~ s/=.+//) { # required arg: $x = "type=s" + @vals = (' ', undef); + } # else: no args $x = 'thread|t' + + # we support underscore options from public-inbox-* commands; + # but they've never been documented and will likely go away. + # $x = help|h + for (grep { !/_/ && !$NOHELP{$_} } split(/\|/, $x)) { + length($_) > 1 ? push(@l, "--$_") : push(@s, "-$_"); + } + if (!scalar(@vals)) { # no args 'thread|t' + } elsif ($arg_vals =~ s/\A([A-Z_]+)\b//) { # "NAME" + $vals[1] = $1; + } else { + $vals[1] = uc(substr($l[0], 2)); # "--type" => "TYPE" + } + if ($arg_vals =~ /([,\|])/) { + my $sep = $1; + my @allow = split(/\Q$sep\E/, $arg_vals); + my $must = $sep eq '|' ? 'Must' : 'Can'; + @allow = map { length $_ ? "`$_'" : () } @allow; + my $last = pop @allow; + $desc .= "\n$must be one of: " . + join(', ', @allow) . " or $last"; + } + my $lhs = join(', ', @s, @l) . join('', @vals); + if ($x =~ /\|\z/) { # "stdin|" or "clear|" + $lhs =~ s/\A--/- , --/; + } else { + $lhs =~ s/\A--/ --/; # pad if no short options + } + $lpad = length($lhs) if length($lhs) > $lpad; + push @opt_desc, $lhs, $desc; + } + my $msg = $errmsg ? "E: $errmsg\n" : ''; + $msg .= <<EOF; +usage: lei @top +$cmd_desc + +EOF + $lpad += 2; + local $Text::Wrap::columns = 78 - $lpad; + # local $Text::Wrap::break = ; # don't break on nbsp (\xa0) + my $padding = ' ' x ($lpad + 2); + while (my ($lhs, $rhs) = splice(@opt_desc, 0, 2)) { + $msg .= ' '.pack("A$lpad", $lhs); + $rhs = wrap('', '', $rhs); + $rhs =~ s/\n/\n$padding/sg; # LHS pad continuation lines + $msg .= $rhs; + $msg .= "\n"; + } + my $fd = $errmsg ? 2 : 1; + $self->start_pager if -t $self->{$fd}; + $msg =~ s/\xa0/ /gs; # convert NBSP to SP + print { $self->{$fd} } $msg; + $self->x_it($errmsg ? (1 << 8) : 0); # stderr => failure + undef; +} + +1;
Option combinations which make no sense should fail to prevent misunderstandings and avoid surprises. --- lib/PublicInbox/LeiExternal.pm | 22 ++++++++++++++++++++-- t/lei-mirror.t | 6 ++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm index 6a5c2517..b65dc87c 100644 --- a/lib/PublicInbox/LeiExternal.pm +++ b/lib/PublicInbox/LeiExternal.pm @@ -101,9 +101,27 @@ sub add_external_finish { sub lei_add_external { my ($self, $location) = @_; $self->_lei_store(1)->write_prepare($self); - my $new_boost = $self->{opt}->{boost} // 0; + my $opt = $self->{opt}; + my $mirror = $opt->{mirror} // do { + my @fail; + for my $sw ($self->index_opt, $self->curl_opt, + qw(c no-torsocks torsocks inbox-version)) { + my ($f) = (split(/|/, $sw, 2))[0]; + next unless defined $opt->{$f}; + $f = length($f) == 1 ? "-$f" : "--$f"; + push @fail, $f; + } + if (scalar(@fail) == 1) { + return $self->("@fail requires --mirror"); + } elsif (@fail) { + my $last = pop @fail; + my $fail = join(', ', @fail); + return $self->("@fail and $last require --mirror"); + } + undef; + }; + my $new_boost = $opt->{boost} // 0; $location = ext_canonicalize($location); - my $mirror = $self->{opt}->{mirror}; if (defined($mirror) && -d $location) { $self->fail(<<""); # TODO: did you mean "update-external?" --mirror destination `$location' already exists diff --git a/t/lei-mirror.t b/t/lei-mirror.t index cf34c7ae..6af49678 100644 --- a/t/lei-mirror.t +++ b/t/lei-mirror.t @@ -16,6 +16,12 @@ test_lei({ tmpdir => $tmpdir }, sub { my $t2 = "$home/t2-mirror"; ok($lei->('add-external', $t2, '--mirror', "$http/t2/"), '--mirror v2'); ok(-f "$t2/msgmap.sqlite3", 't2-mirror indexed'); + + ok(!$lei->('add-external', $t2, '--mirror', "$http/t2/"), + '--mirror fails if reused'); + + ok(!$lei->('add-external', "$t2-fail", '-Lmedium'), '--mirror v2'); + ok(!-d "$t2-fail", 'destination not created on failure'); }); ok($td->kill, 'killed -httpd');
Seeing --config in the command-line for lei may mislead users into thinking we support config file overrides that way. Rename the option to --curl-config and drop the short switch for now. --- lib/PublicInbox/LeiCurl.pm | 7 +++++++ lib/PublicInbox/LeiQuery.pm | 5 ++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/lib/PublicInbox/LeiCurl.pm b/lib/PublicInbox/LeiCurl.pm index c8747d4f..38b17c78 100644 --- a/lib/PublicInbox/LeiCurl.pm +++ b/lib/PublicInbox/LeiCurl.pm @@ -8,6 +8,10 @@ use v5.10.1; use PublicInbox::Spawn qw(which); use PublicInbox::Config; +my %lei2curl = ( + 'curl-config=s@' => 'config|K=s@', +); + # prepares a common command for curl(1) based on $lei command sub new { my ($cls, $lei, $curl) = @_; @@ -17,6 +21,9 @@ sub new { $cmd[-1] .= 's' if $opt->{quiet}; # already the default for "lei q" $cmd[-1] .= 'v' if $opt->{verbose}; # we use ourselves, too for my $o ($lei->curl_opt) { + if (my $lei_spec = $lei2curl{$o}) { + $o = $lei_spec; + } $o =~ s/\|[a-z0-9]\b//i; # remove single char short option if ($o =~ s/=[is]@\z//) { my $ary = $opt->{$o} or next; diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm index 7c856032..63945d53 100644 --- a/lib/PublicInbox/LeiQuery.pm +++ b/lib/PublicInbox/LeiQuery.pm @@ -155,9 +155,12 @@ sub _complete_q { # TODO: should we depend on "-c http.*" options for things which have # analogues in git(1)? that would reduce likelyhood of conflicts with # our other CLI options +# Note: some names are renamed to avoid potential conflicts, +# see %lei2mail in lib/PublicInbox/LeiCurl.pm sub curl_opt { qw( + curl-config=s@ abstract-unix-socket=s anyauth basic cacert=s capath=s - cert-status cert-type cert=s ciphers=s config|K=s@ + cert-status cert-type cert=s ciphers=s connect-timeout=s connect-to=s cookie-jar=s cookie=s crlfile=s digest disable dns-interface=s dns-ipv4-addr=s dns-ipv6-addr=s dns-servers=s doh-url=s egd-file=s engine=s false-start
In particular, -U and -u switches may conflict with diff(1) options we may need for "lei show" which will use solver remotely or locally. --- lib/PublicInbox/LeiQuery.pm | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm index 63945d53..0346498f 100644 --- a/lib/PublicInbox/LeiQuery.pm +++ b/lib/PublicInbox/LeiQuery.pm @@ -164,7 +164,7 @@ sub curl_opt { qw( connect-timeout=s connect-to=s cookie-jar=s cookie=s crlfile=s digest disable dns-interface=s dns-ipv4-addr=s dns-ipv6-addr=s dns-servers=s doh-url=s egd-file=s engine=s false-start - happy-eyeballs-timeout-ms=s haproxy-protocol header|H=s@ + happy-eyeballs-timeout-ms=s haproxy-protocol header=s@ http2-prior-knowledge http2 insecure interface=s ipv4 ipv6 junk-session-cookies key-type=s key=s limit-rate=s local-port=s location-trusted location @@ -177,7 +177,7 @@ sub curl_opt { qw( proxy-key-type=s proxy-key proxy-negotiate proxy-ntlm proxy-pass=s proxy-pinnedpubkey=s proxy-service-name=s proxy-ssl-allow-beast proxy-tls13-ciphers=s proxy-tlsauthtype=s proxy-tlspassword=s - proxy-tlsuser=s proxy-tlsv1 proxy-user|U=s proxy=s + proxy-tlsuser=s proxy-tlsv1 proxy-user=s proxy=s proxytunnel=s pubkey=s random-file=s referer=s resolve=s retry-connrefused retry-delay=s retry-max-time=s retry=i sasl-ir service-name=s socks4=s socks4a=s socks5-basic @@ -186,7 +186,7 @@ sub curl_opt { qw( suppress-connect-headers tcp-fastopen tls-max=s tls13-ciphers=s tlsauthtype=s tlspassword=s tlsuser=s tlsv1 trace-ascii=s trace-time trace=s - unix-socket=s user-agent|A=s user|u=s + unix-socket=s user-agent=s user=s ) }